diff options
Diffstat (limited to 'package/xbmc')
-rw-r--r-- | package/xbmc/Makefile | 4 | ||||
-rw-r--r-- | package/xbmc/patches/xbmc-gotham_rbp_backports.patch | 20665 |
2 files changed, 20666 insertions, 3 deletions
diff --git a/package/xbmc/Makefile b/package/xbmc/Makefile index 3c792e3ad..dd0c50c46 100644 --- a/package/xbmc/Makefile +++ b/package/xbmc/Makefile @@ -5,7 +5,7 @@ include $(ADK_TOPDIR)/rules.mk PKG_NAME:= xbmc PKG_VERSION:= 13.1 -PKG_RELEASE:= 1 +PKG_RELEASE:= 2 PKG_MD5SUM:= 9ce6b6ac89b6aa0b111a1acdf3606e06 PKG_DESCR:= software media player PKG_SECTION:= mm/video @@ -64,7 +64,6 @@ AUTOTOOL_STYLE:= autoreconf CONFIGURE_ENV+= DESTDIR='${WRKINST}' \ TEXTUREPACKER_NATIVE_ROOT='$(STAGING_HOST_DIR)/usr' CONFIGURE_ARGS+= --disable-optical-drive \ - --disable-optmizations \ --disable-mysql \ --disable-avahi \ --disable-rsxs \ @@ -81,7 +80,6 @@ CONFIGURE_ARGS+= --disable-optical-drive \ --disable-wayland \ --disable-pulse \ --disable-mid \ - --with-ffmpeg \ --enable-alsa \ --enable-libmp3lame \ --enable-libvorbisenc \ diff --git a/package/xbmc/patches/xbmc-gotham_rbp_backports.patch b/package/xbmc/patches/xbmc-gotham_rbp_backports.patch new file mode 100644 index 000000000..9a4772437 --- /dev/null +++ b/package/xbmc/patches/xbmc-gotham_rbp_backports.patch @@ -0,0 +1,20665 @@ +From 1353d8feca19f2f84019797942d70864054db1b0 Mon Sep 17 00:00:00 2001 +From: Ben Avison <bavison@riscosopen.org> +Date: Mon, 5 Aug 2013 13:12:46 +0100 +Subject: [PATCH 01/94] h264_parser: Initialize the h264dsp context in the + parser as well +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Each AVStream struct for an H.264 elementary stream actually has two +copies of the H264DSPContext struct (and in fact all the other members +of H264Context as well): + +((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp +((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp + +but only the first of these was actually being initialised. This +prevented the addition of platform-specific implementations of +parser-related functions. + +Signed-off-by: Martin Storsjö <martin@martin.st> +--- + lib/ffmpeg/libavcodec/h264_parser.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c +index aff9ba1..a732f79 100644 +--- a/lib/ffmpeg/libavcodec/h264_parser.c ++++ b/lib/ffmpeg/libavcodec/h264_parser.c +@@ -386,6 +386,7 @@ static int init(AVCodecParserContext *s) + H264Context *h = s->priv_data; + h->thread_context[0] = h; + h->slice_context_count = 1; ++ ff_h264dsp_init(&h->h264dsp, 8, 1); + return 0; + } + +-- +1.9.3 + + +From 7ea2cb68f6fb1149fce70854e36ed6357a267238 Mon Sep 17 00:00:00 2001 +From: Ben Avison <bavison@riscosopen.org> +Date: Mon, 5 Aug 2013 13:12:47 +0100 +Subject: [PATCH 02/94] h264dsp: Factorize code into a new function, + h264_find_start_code_candidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This performs the start code search which was previously part of +h264_find_frame_end() - the most CPU intensive part of the function. + +By itself, this results in a performance regression: + Before After + Mean StdDev Mean StdDev Change +Overall time 2925.6 26.2 3068.5 31.7 -4.7% + +but this can more than be made up for by platform-optimised +implementations of the function. + +Signed-off-by: Martin Storsjö <martin@martin.st> +--- + lib/ffmpeg/libavcodec/h264_parser.c | 20 +++----------------- + lib/ffmpeg/libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++ + lib/ffmpeg/libavcodec/h264dsp.h | 9 +++++++++ + 3 files changed, 41 insertions(+), 17 deletions(-) + +diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c +index a732f79..972aace 100644 +--- a/lib/ffmpeg/libavcodec/h264_parser.c ++++ b/lib/ffmpeg/libavcodec/h264_parser.c +@@ -62,23 +62,9 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si + } + + if(state==7){ +-#if HAVE_FAST_UNALIGNED +- /* we check i<buf_size instead of i+3/7 because its simpler +- * and there should be FF_INPUT_BUFFER_PADDING_SIZE bytes at the end +- */ +-# if HAVE_FAST_64BIT +- while(i<next_avc && !((~*(const uint64_t*)(buf+i) & (*(const uint64_t*)(buf+i) - 0x0101010101010101ULL)) & 0x8080808080808080ULL)) +- i+=8; +-# else +- while(i<next_avc && !((~*(const uint32_t*)(buf+i) & (*(const uint32_t*)(buf+i) - 0x01010101U)) & 0x80808080U)) +- i+=4; +-# endif +-#endif +- for(; i<next_avc; i++){ +- if(!buf[i]){ +- state=2; +- break; +- } ++ i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); ++ if (i < buf_size) ++ state = 2; + } + }else if(state<=2){ + if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5 +diff --git a/lib/ffmpeg/libavcodec/h264dsp.c b/lib/ffmpeg/libavcodec/h264dsp.c +index da9e417..b7d61cd 100644 +--- a/lib/ffmpeg/libavcodec/h264dsp.c ++++ b/lib/ffmpeg/libavcodec/h264dsp.c +@@ -60,6 +60,34 @@ + #include "h264addpx_template.c" + #undef BIT_DEPTH + ++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) ++{ ++ int i = 0; ++#if HAVE_FAST_UNALIGNED ++ /* we check i < size instead of i + 3 / 7 because it is ++ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE ++ * bytes at the end. ++ */ ++#if HAVE_FAST_64BIT ++ while (i < size && ++ !((~*(const uint64_t *)(buf + i) & ++ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & ++ 0x8080808080808080ULL)) ++ i += 8; ++#else ++ while (i < size && ++ !((~*(const uint32_t *)(buf + i) & ++ (*(const uint32_t *)(buf + i) - 0x01010101U)) & ++ 0x80808080U)) ++ i += 4; ++#endif ++#endif ++ for (; i < size; i++) ++ if (!buf[i]) ++ break; ++ return i; ++} ++ + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) + { + #undef FUNC +@@ -146,6 +174,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo + H264_DSP(8); + break; + } ++ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; + + if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); + if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); +diff --git a/lib/ffmpeg/libavcodec/h264dsp.h b/lib/ffmpeg/libavcodec/h264dsp.h +index 98ea15c..1be4804 100644 +--- a/lib/ffmpeg/libavcodec/h264dsp.h ++++ b/lib/ffmpeg/libavcodec/h264dsp.h +@@ -105,6 +105,15 @@ typedef struct H264DSPContext { + /* bypass-transform */ + void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); + void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); ++ ++ /** ++ * Search buf from the start for up to size bytes. Return the index ++ * of a zero byte, or >= size if not found. Ideally, use lookahead ++ * to filter out any zero bytes that are known to not be followed by ++ * one or more further zero bytes and a one byte. Better still, filter ++ * out any bytes that form the trailing_zero_8bits syntax element too. ++ */ ++ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size); + } H264DSPContext; + + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, +-- +1.9.3 + + +From 458ff4b6c1855c529f563dbbd15e35aaab50adae Mon Sep 17 00:00:00 2001 +From: Ben Avison <bavison@riscosopen.org> +Date: Mon, 5 Aug 2013 13:12:48 +0100 +Subject: [PATCH 03/94] arm: Add assembly version of + h264_find_start_code_candidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 508.8 23.4 185.4 9.0 +174.4% +Overall 3068.5 31.7 2752.1 29.4 +11.5% + +In combination with the preceding patch: + Before After + Mean StdDev Mean StdDev Change +Overall 2925.6 26.2 2752.1 29.4 +6.3% + +Signed-off-by: Martin Storsjö <martin@martin.st> +--- + lib/ffmpeg/libavcodec/arm/Makefile | 1 + + lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S | 253 +++++++++++++++++++++++++++ + lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c | 4 + + lib/ffmpeg/libavcodec/h264_parser.c | 1 - + 4 files changed, 258 insertions(+), 1 deletion(-) + create mode 100644 lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S + +diff --git a/lib/ffmpeg/libavcodec/arm/Makefile b/lib/ffmpeg/libavcodec/arm/Makefile +index 7390a8b..480000b71 100644 +--- a/lib/ffmpeg/libavcodec/arm/Makefile ++++ b/lib/ffmpeg/libavcodec/arm/Makefile +@@ -9,6 +9,7 @@ OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_init_arm.o \ + OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ + + ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o ++ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o + + OBJS-$(CONFIG_FLAC_DECODER) += arm/flacdsp_init_arm.o \ + arm/flacdsp_arm.o \ +diff --git a/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S b/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S +new file mode 100644 +index 0000000..c4f12a6 +--- /dev/null ++++ b/lib/ffmpeg/libavcodec/arm/h264dsp_armv6.S +@@ -0,0 +1,253 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison <bavison@riscosopen.org> ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/arm/asm.S" ++ ++RESULT .req a1 ++BUF .req a1 ++SIZE .req a2 ++PATTERN .req a3 ++PTR .req a4 ++DAT0 .req v1 ++DAT1 .req v2 ++DAT2 .req v3 ++DAT3 .req v4 ++TMP0 .req v5 ++TMP1 .req v6 ++TMP2 .req ip ++TMP3 .req lr ++ ++#define PRELOAD_DISTANCE 4 ++ ++.macro innerloop4 ++ ldr DAT0, [PTR], #4 ++ subs SIZE, SIZE, #4 @ C flag survives rest of macro ++ sub TMP0, DAT0, PATTERN, lsr #14 ++ bic TMP0, TMP0, DAT0 ++ ands TMP0, TMP0, PATTERN ++.endm ++ ++.macro innerloop16 decrement, do_preload ++ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} ++ .ifnc "\do_preload","" ++ pld [PTR, #PRELOAD_DISTANCE*32] ++ .endif ++ .ifnc "\decrement","" ++ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro ++ .endif ++ sub TMP0, DAT0, PATTERN, lsr #14 ++ sub TMP1, DAT1, PATTERN, lsr #14 ++ bic TMP0, TMP0, DAT0 ++ bic TMP1, TMP1, DAT1 ++ sub TMP2, DAT2, PATTERN, lsr #14 ++ sub TMP3, DAT3, PATTERN, lsr #14 ++ ands TMP0, TMP0, PATTERN ++ bic TMP2, TMP2, DAT2 ++ it eq ++ andseq TMP1, TMP1, PATTERN ++ bic TMP3, TMP3, DAT3 ++ itt eq ++ andseq TMP2, TMP2, PATTERN ++ andseq TMP3, TMP3, PATTERN ++.endm ++ ++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */ ++function ff_h264_find_start_code_candidate_armv6, export=1 ++ push {v1-v6,lr} ++ mov PTR, BUF ++ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go ++ @ before using code that does preloads ++ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 ++ blo 60f ++ ++ @ Get to word-alignment, 1 byte at a time ++ tst PTR, #3 ++ beq 2f ++1: ldrb DAT0, [PTR], #1 ++ sub SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ tst PTR, #3 ++ bne 1b ++2: @ Get to 4-word alignment, 1 word at a time ++ ldr PATTERN, =0x80008000 ++ setend be ++ tst PTR, #12 ++ beq 4f ++3: innerloop4 ++ bne 91f ++ tst PTR, #12 ++ bne 3b ++4: @ Get to cacheline (8-word) alignment ++ tst PTR, #16 ++ beq 5f ++ innerloop16 16 ++ bne 93f ++5: @ Check complete cachelines, with preloading ++ @ We need to stop when there are still (PRELOAD_DISTANCE+1) ++ @ complete cachelines to go ++ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 ++6: innerloop16 , do_preload ++ bne 93f ++ innerloop16 32 ++ bne 93f ++ bcs 6b ++ @ Preload trailing part-cacheline, if any ++ tst SIZE, #31 ++ beq 7f ++ pld [PTR, #(PRELOAD_DISTANCE+1)*32] ++ @ Check remaining data without doing any more preloads. First ++ @ do in chunks of 4 words: ++7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 ++ bmi 9f ++8: innerloop16 16 ++ bne 93f ++ bcs 8b ++ @ Then in words: ++9: adds SIZE, SIZE, #16 - 4 ++ bmi 11f ++10: innerloop4 ++ bne 91f ++ bcs 10b ++11: setend le ++ @ Check second byte of final halfword ++ ldrb DAT0, [PTR, #-1] ++ teq DAT0, #0 ++ beq 90f ++ @ Check any remaining bytes ++ tst SIZE, #3 ++ beq 13f ++12: ldrb DAT0, [PTR], #1 ++ sub SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ tst SIZE, #3 ++ bne 12b ++ @ No candidate found ++13: sub RESULT, PTR, BUF ++ b 99f ++ ++60: @ Small buffer - simply check by looping over bytes ++ subs SIZE, SIZE, #1 ++ bcc 99f ++61: ldrb DAT0, [PTR], #1 ++ subs SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ bcs 61b ++ @ No candidate found ++ sub RESULT, PTR, BUF ++ b 99f ++ ++90: @ Found a candidate at the preceding byte ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #1 ++ b 99f ++ ++91: @ Found a candidate somewhere in the preceding 4 bytes ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #4 ++ sub TMP0, DAT0, #0x20000 ++ bics TMP0, TMP0, DAT0 ++ itt pl ++ ldrbpl DAT0, [PTR, #-3] ++ addpl RESULT, RESULT, #2 ++ bpl 92f ++ teq RESULT, #0 ++ beq 98f @ don't look back a byte if found at first byte in buffer ++ ldrb DAT0, [PTR, #-5] ++92: teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++ ++93: @ Found a candidate somewhere in the preceding 16 bytes ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #16 ++ teq TMP0, #0 ++ beq 95f @ not in first 4 bytes ++ sub TMP0, DAT0, #0x20000 ++ bics TMP0, TMP0, DAT0 ++ itt pl ++ ldrbpl DAT0, [PTR, #-15] ++ addpl RESULT, RESULT, #2 ++ bpl 94f ++ teq RESULT, #0 ++ beq 98f @ don't look back a byte if found at first byte in buffer ++ ldrb DAT0, [PTR, #-17] ++94: teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++95: add RESULT, RESULT, #4 ++ teq TMP1, #0 ++ beq 96f @ not in next 4 bytes ++ sub TMP1, DAT1, #0x20000 ++ bics TMP1, TMP1, DAT1 ++ itee mi ++ ldrbmi DAT0, [PTR, #-13] ++ ldrbpl DAT0, [PTR, #-11] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++96: add RESULT, RESULT, #4 ++ teq TMP2, #0 ++ beq 97f @ not in next 4 bytes ++ sub TMP2, DAT2, #0x20000 ++ bics TMP2, TMP2, DAT2 ++ itee mi ++ ldrbmi DAT0, [PTR, #-9] ++ ldrbpl DAT0, [PTR, #-7] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++97: add RESULT, RESULT, #4 ++ sub TMP3, DAT3, #0x20000 ++ bics TMP3, TMP3, DAT3 ++ itee mi ++ ldrbmi DAT0, [PTR, #-5] ++ ldrbpl DAT0, [PTR, #-3] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ @ drop through to 98f ++98: setend le ++99: pop {v1-v6,pc} ++.endfunc ++ ++ .unreq RESULT ++ .unreq BUF ++ .unreq SIZE ++ .unreq PATTERN ++ .unreq PTR ++ .unreq DAT0 ++ .unreq DAT1 ++ .unreq DAT2 ++ .unreq DAT3 ++ .unreq TMP0 ++ .unreq TMP1 ++ .unreq TMP2 ++ .unreq TMP3 +diff --git a/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c b/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c +index 785b604..2804e56 100644 +--- a/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c ++++ b/lib/ffmpeg/libavcodec/arm/h264dsp_init_arm.c +@@ -24,6 +24,8 @@ + #include "libavutil/arm/cpu.h" + #include "libavcodec/h264dsp.h" + ++int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size); ++ + void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); + void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +@@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, + { + int cpu_flags = av_get_cpu_flags(); + ++ if (have_armv6(cpu_flags)) ++ c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6; + if (have_neon(cpu_flags)) + ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); + } +diff --git a/lib/ffmpeg/libavcodec/h264_parser.c b/lib/ffmpeg/libavcodec/h264_parser.c +index 972aace..363843c 100644 +--- a/lib/ffmpeg/libavcodec/h264_parser.c ++++ b/lib/ffmpeg/libavcodec/h264_parser.c +@@ -65,7 +65,6 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si + i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); + if (i < buf_size) + state = 2; +- } + }else if(state<=2){ + if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5 + else if(buf[i]) state = 7; +-- +1.9.3 + + +From 5841d5b69f0df2f286c0a8e419deb16d927e864e Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 19 Aug 2013 22:48:05 +0100 +Subject: [PATCH 04/94] [ffmpeg] Backport of h264_find_start_code_candidate + optimisation + +--- + ...-Initialize-the-h264dsp-context-in-the-pa.patch | 39 +++ + ...torize-code-into-a-new-function-h264_find.patch | 134 +++++++++ + ...embly-version-of-h264_find_start_code_can.patch | 322 +++++++++++++++++++++ + 3 files changed, 495 insertions(+) + create mode 100644 lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch + create mode 100644 lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch + create mode 100644 lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch + +diff --git a/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch b/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch +new file mode 100644 +index 0000000..263578d +--- /dev/null ++++ b/lib/ffmpeg/patches/0056-h264_parser-Initialize-the-h264dsp-context-in-the-pa.patch +@@ -0,0 +1,39 @@ ++From 7a82022ee2f9b1fad991ace0936901e7419444be Mon Sep 17 00:00:00 2001 ++From: Ben Avison <bavison@riscosopen.org> ++Date: Mon, 5 Aug 2013 13:12:46 +0100 ++Subject: [PATCH 1/3] h264_parser: Initialize the h264dsp context in the ++ parser as well ++MIME-Version: 1.0 ++Content-Type: text/plain; charset=UTF-8 ++Content-Transfer-Encoding: 8bit ++ ++Each AVStream struct for an H.264 elementary stream actually has two ++copies of the H264DSPContext struct (and in fact all the other members ++of H264Context as well): ++ ++((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp ++((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp ++ ++but only the first of these was actually being initialised. This ++prevented the addition of platform-specific implementations of ++parser-related functions. ++ ++Signed-off-by: Martin Storsjö <martin@martin.st> ++--- ++ libavcodec/h264_parser.c | 1 + ++ 1 file changed, 1 insertion(+) ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index 2ed155c..da2a5f9 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -417,6 +417,7 @@ static av_cold int init(AVCodecParserContext *s) ++ H264Context *h = s->priv_data; ++ h->thread_context[0] = h; ++ h->slice_context_count = 1; +++ ff_h264dsp_init(&h->h264dsp, 8, 1); ++ return 0; ++ } ++ ++-- ++1.7.9.5 +diff --git a/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch b/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch +new file mode 100644 +index 0000000..0151d85 +--- /dev/null ++++ b/lib/ffmpeg/patches/0057-h264dsp-Factorize-code-into-a-new-function-h264_find.patch +@@ -0,0 +1,134 @@ ++From 218d6844b37d339ffbf2044ad07d8be7767e2734 Mon Sep 17 00:00:00 2001 ++From: Ben Avison <bavison@riscosopen.org> ++Date: Mon, 5 Aug 2013 13:12:47 +0100 ++Subject: [PATCH 2/3] h264dsp: Factorize code into a new function, ++ h264_find_start_code_candidate ++MIME-Version: 1.0 ++Content-Type: text/plain; charset=UTF-8 ++Content-Transfer-Encoding: 8bit ++ ++This performs the start code search which was previously part of ++h264_find_frame_end() - the most CPU intensive part of the function. ++ ++By itself, this results in a performance regression: ++ Before After ++ Mean StdDev Mean StdDev Change ++Overall time 2925.6 26.2 3068.5 31.7 -4.7% ++ ++but this can more than be made up for by platform-optimised ++implementations of the function. ++ ++Signed-off-by: Martin Storsjö <martin@martin.st> ++--- ++ libavcodec/h264_parser.c | 27 +++------------------------ ++ libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++ ++ libavcodec/h264dsp.h | 9 +++++++++ ++ 3 files changed, 41 insertions(+), 24 deletions(-) ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index da2a5f9..ef5da98 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf, ++ ++ for (i = 0; i < buf_size; i++) { ++ if (state == 7) { ++-#if HAVE_FAST_UNALIGNED ++- /* we check i < buf_size instead of i + 3 / 7 because it is ++- * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE ++- * bytes at the end. ++- */ ++-#if HAVE_FAST_64BIT ++- while (i < buf_size && ++- !((~*(const uint64_t *)(buf + i) & ++- (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & ++- 0x8080808080808080ULL)) ++- i += 8; ++-#else ++- while (i < buf_size && ++- !((~*(const uint32_t *)(buf + i) & ++- (*(const uint32_t *)(buf + i) - 0x01010101U)) & ++- 0x80808080U)) ++- i += 4; ++-#endif ++-#endif ++- for (; i < buf_size; i++) ++- if (!buf[i]) { ++- state = 2; ++- break; ++- } +++ i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); +++ if (i < buf_size) +++ state = 2; ++ } else if (state <= 2) { ++ if (buf[i] == 1) ++ state ^= 5; // 2->7, 1->4, 0->5 ++diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c ++index 3ca6abe..a901dbb 100644 ++--- a/libavcodec/h264dsp.c +++++ b/libavcodec/h264dsp.c ++@@ -53,6 +53,34 @@ ++ #include "h264addpx_template.c" ++ #undef BIT_DEPTH ++ +++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) +++{ +++ int i = 0; +++#if HAVE_FAST_UNALIGNED +++ /* we check i < size instead of i + 3 / 7 because it is +++ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE +++ * bytes at the end. +++ */ +++#if HAVE_FAST_64BIT +++ while (i < size && +++ !((~*(const uint64_t *)(buf + i) & +++ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & +++ 0x8080808080808080ULL)) +++ i += 8; +++#else +++ while (i < size && +++ !((~*(const uint32_t *)(buf + i) & +++ (*(const uint32_t *)(buf + i) - 0x01010101U)) & +++ 0x80808080U)) +++ i += 4; +++#endif +++#endif +++ for (; i < size; i++) +++ if (!buf[i]) +++ break; +++ return i; +++} +++ ++ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, ++ const int chroma_format_idc) ++ { ++@@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, ++ H264_DSP(8); ++ break; ++ } +++ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; ++ ++ if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); ++ if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); ++diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h ++index 1f9f8fe..6249ba7 100644 ++--- a/libavcodec/h264dsp.h +++++ b/libavcodec/h264dsp.h ++@@ -105,6 +105,15 @@ typedef struct H264DSPContext { ++ /* bypass-transform */ ++ void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); ++ void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); +++ +++ /** +++ * Search buf from the start for up to size bytes. Return the index +++ * of a zero byte, or >= size if not found. Ideally, use lookahead +++ * to filter out any zero bytes that are known to not be followed by +++ * one or more further zero bytes and a one byte. Better still, filter +++ * out any bytes that form the trailing_zero_8bits syntax element too. +++ */ +++ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size); ++ } H264DSPContext; ++ ++ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, ++-- ++1.7.9.5 +diff --git a/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch b/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch +new file mode 100644 +index 0000000..cdc2d1e +--- /dev/null ++++ b/lib/ffmpeg/patches/0058-arm-Add-assembly-version-of-h264_find_start_code_can.patch +@@ -0,0 +1,322 @@ ++From 45e10e5c8d3df09c80a4d80483bff2712367f3fa Mon Sep 17 00:00:00 2001 ++From: Ben Avison <bavison@riscosopen.org> ++Date: Mon, 5 Aug 2013 13:12:48 +0100 ++Subject: [PATCH 3/3] arm: Add assembly version of ++ h264_find_start_code_candidate ++MIME-Version: 1.0 ++Content-Type: text/plain; charset=UTF-8 ++Content-Transfer-Encoding: 8bit ++ ++ Before After ++ Mean StdDev Mean StdDev Change ++This function 508.8 23.4 185.4 9.0 +174.4% ++Overall 3068.5 31.7 2752.1 29.4 +11.5% ++ ++In combination with the preceding patch: ++ Before After ++ Mean StdDev Mean StdDev Change ++Overall 2925.6 26.2 2752.1 29.4 +6.3% ++ ++Signed-off-by: Martin Storsjö <martin@martin.st> ++--- ++ libavcodec/arm/Makefile | 1 + ++ libavcodec/arm/h264dsp_armv6.S | 253 +++++++++++++++++++++++++++++++++++++ ++ libavcodec/arm/h264dsp_init_arm.c | 4 + ++ 3 files changed, 258 insertions(+) ++ create mode 100644 libavcodec/arm/h264dsp_armv6.S ++ ++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile ++index e941aaa..9c64b36 100644 ++--- a/libavcodec/arm/Makefile +++++ b/libavcodec/arm/Makefile ++@@ -45,6 +45,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ ++ arm/simple_idct_armv6.o \ ++ ++ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o +++ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o ++ ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ ++ arm/hpeldsp_armv6.o ++ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o ++diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S ++new file mode 100644 ++index 0000000..c4f12a6 ++--- /dev/null +++++ b/libavcodec/arm/h264dsp_armv6.S ++@@ -0,0 +1,253 @@ +++/* +++ * Copyright (c) 2013 RISC OS Open Ltd +++ * Author: Ben Avison <bavison@riscosopen.org> +++ * +++ * This file is part of Libav. +++ * +++ * Libav is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * Libav is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with Libav; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#include "libavutil/arm/asm.S" +++ +++RESULT .req a1 +++BUF .req a1 +++SIZE .req a2 +++PATTERN .req a3 +++PTR .req a4 +++DAT0 .req v1 +++DAT1 .req v2 +++DAT2 .req v3 +++DAT3 .req v4 +++TMP0 .req v5 +++TMP1 .req v6 +++TMP2 .req ip +++TMP3 .req lr +++ +++#define PRELOAD_DISTANCE 4 +++ +++.macro innerloop4 +++ ldr DAT0, [PTR], #4 +++ subs SIZE, SIZE, #4 @ C flag survives rest of macro +++ sub TMP0, DAT0, PATTERN, lsr #14 +++ bic TMP0, TMP0, DAT0 +++ ands TMP0, TMP0, PATTERN +++.endm +++ +++.macro innerloop16 decrement, do_preload +++ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} +++ .ifnc "\do_preload","" +++ pld [PTR, #PRELOAD_DISTANCE*32] +++ .endif +++ .ifnc "\decrement","" +++ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro +++ .endif +++ sub TMP0, DAT0, PATTERN, lsr #14 +++ sub TMP1, DAT1, PATTERN, lsr #14 +++ bic TMP0, TMP0, DAT0 +++ bic TMP1, TMP1, DAT1 +++ sub TMP2, DAT2, PATTERN, lsr #14 +++ sub TMP3, DAT3, PATTERN, lsr #14 +++ ands TMP0, TMP0, PATTERN +++ bic TMP2, TMP2, DAT2 +++ it eq +++ andseq TMP1, TMP1, PATTERN +++ bic TMP3, TMP3, DAT3 +++ itt eq +++ andseq TMP2, TMP2, PATTERN +++ andseq TMP3, TMP3, PATTERN +++.endm +++ +++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */ +++function ff_h264_find_start_code_candidate_armv6, export=1 +++ push {v1-v6,lr} +++ mov PTR, BUF +++ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go +++ @ before using code that does preloads +++ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 +++ blo 60f +++ +++ @ Get to word-alignment, 1 byte at a time +++ tst PTR, #3 +++ beq 2f +++1: ldrb DAT0, [PTR], #1 +++ sub SIZE, SIZE, #1 +++ teq DAT0, #0 +++ beq 90f +++ tst PTR, #3 +++ bne 1b +++2: @ Get to 4-word alignment, 1 word at a time +++ ldr PATTERN, =0x80008000 +++ setend be +++ tst PTR, #12 +++ beq 4f +++3: innerloop4 +++ bne 91f +++ tst PTR, #12 +++ bne 3b +++4: @ Get to cacheline (8-word) alignment +++ tst PTR, #16 +++ beq 5f +++ innerloop16 16 +++ bne 93f +++5: @ Check complete cachelines, with preloading +++ @ We need to stop when there are still (PRELOAD_DISTANCE+1) +++ @ complete cachelines to go +++ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 +++6: innerloop16 , do_preload +++ bne 93f +++ innerloop16 32 +++ bne 93f +++ bcs 6b +++ @ Preload trailing part-cacheline, if any +++ tst SIZE, #31 +++ beq 7f +++ pld [PTR, #(PRELOAD_DISTANCE+1)*32] +++ @ Check remaining data without doing any more preloads. First +++ @ do in chunks of 4 words: +++7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 +++ bmi 9f +++8: innerloop16 16 +++ bne 93f +++ bcs 8b +++ @ Then in words: +++9: adds SIZE, SIZE, #16 - 4 +++ bmi 11f +++10: innerloop4 +++ bne 91f +++ bcs 10b +++11: setend le +++ @ Check second byte of final halfword +++ ldrb DAT0, [PTR, #-1] +++ teq DAT0, #0 +++ beq 90f +++ @ Check any remaining bytes +++ tst SIZE, #3 +++ beq 13f +++12: ldrb DAT0, [PTR], #1 +++ sub SIZE, SIZE, #1 +++ teq DAT0, #0 +++ beq 90f +++ tst SIZE, #3 +++ bne 12b +++ @ No candidate found +++13: sub RESULT, PTR, BUF +++ b 99f +++ +++60: @ Small buffer - simply check by looping over bytes +++ subs SIZE, SIZE, #1 +++ bcc 99f +++61: ldrb DAT0, [PTR], #1 +++ subs SIZE, SIZE, #1 +++ teq DAT0, #0 +++ beq 90f +++ bcs 61b +++ @ No candidate found +++ sub RESULT, PTR, BUF +++ b 99f +++ +++90: @ Found a candidate at the preceding byte +++ sub RESULT, PTR, BUF +++ sub RESULT, RESULT, #1 +++ b 99f +++ +++91: @ Found a candidate somewhere in the preceding 4 bytes +++ sub RESULT, PTR, BUF +++ sub RESULT, RESULT, #4 +++ sub TMP0, DAT0, #0x20000 +++ bics TMP0, TMP0, DAT0 +++ itt pl +++ ldrbpl DAT0, [PTR, #-3] +++ addpl RESULT, RESULT, #2 +++ bpl 92f +++ teq RESULT, #0 +++ beq 98f @ don't look back a byte if found at first byte in buffer +++ ldrb DAT0, [PTR, #-5] +++92: teq DAT0, #0 +++ it eq +++ subeq RESULT, RESULT, #1 +++ b 98f +++ +++93: @ Found a candidate somewhere in the preceding 16 bytes +++ sub RESULT, PTR, BUF +++ sub RESULT, RESULT, #16 +++ teq TMP0, #0 +++ beq 95f @ not in first 4 bytes +++ sub TMP0, DAT0, #0x20000 +++ bics TMP0, TMP0, DAT0 +++ itt pl +++ ldrbpl DAT0, [PTR, #-15] +++ addpl RESULT, RESULT, #2 +++ bpl 94f +++ teq RESULT, #0 +++ beq 98f @ don't look back a byte if found at first byte in buffer +++ ldrb DAT0, [PTR, #-17] +++94: teq DAT0, #0 +++ it eq +++ subeq RESULT, RESULT, #1 +++ b 98f +++95: add RESULT, RESULT, #4 +++ teq TMP1, #0 +++ beq 96f @ not in next 4 bytes +++ sub TMP1, DAT1, #0x20000 +++ bics TMP1, TMP1, DAT1 +++ itee mi +++ ldrbmi DAT0, [PTR, #-13] +++ ldrbpl DAT0, [PTR, #-11] +++ addpl RESULT, RESULT, #2 +++ teq DAT0, #0 +++ it eq +++ subeq RESULT, RESULT, #1 +++ b 98f +++96: add RESULT, RESULT, #4 +++ teq TMP2, #0 +++ beq 97f @ not in next 4 bytes +++ sub TMP2, DAT2, #0x20000 +++ bics TMP2, TMP2, DAT2 +++ itee mi +++ ldrbmi DAT0, [PTR, #-9] +++ ldrbpl DAT0, [PTR, #-7] +++ addpl RESULT, RESULT, #2 +++ teq DAT0, #0 +++ it eq +++ s |