Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Sibras committed Oct 1, 2021
2 parents 4ca702d + 66a5bc1 commit 77bee0c
Show file tree
Hide file tree
Showing 31 changed files with 118 additions and 81 deletions.
2 changes: 1 addition & 1 deletion AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ S: Sweden
N: Laurent Aimar
E: fenrir AT videolan DOT org
C: fenrir
D: Intial import, former maintainer
D: Initial import, former maintainer
D: x86 asm (mmx/mmx2)
S: France

Expand Down
4 changes: 2 additions & 2 deletions common/aarch64/mc-c.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explicit if() to force conditional add
src1 += i_src_stride;

if( qpel_idx & 5 ) /* qpel interpolation needed */
Expand All @@ -236,7 +236,7 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explicit if() to force conditional add
src1 += i_src_stride;

if( qpel_idx & 5 ) /* qpel interpolation needed */
Expand Down
4 changes: 2 additions & 2 deletions common/arm/mc-c.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explicit if() to force conditional add
src1 += i_src_stride;

if( qpel_idx & 5 ) /* qpel interpolation needed */
Expand All @@ -244,7 +244,7 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if( (mvy&3) == 3 ) // explict if() to force conditional add
if( (mvy&3) == 3 ) // explicit if() to force conditional add
src1 += i_src_stride;

if( qpel_idx & 5 ) /* qpel interpolation needed */
Expand Down
8 changes: 4 additions & 4 deletions common/bitstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
{
/* Size doesn't include the size of the header we're writing now. */
int chunk_size = size - 4;
orig_dst[0] = chunk_size >> 24;
orig_dst[1] = chunk_size >> 16;
orig_dst[2] = chunk_size >> 8;
orig_dst[3] = chunk_size >> 0;
orig_dst[0] = (uint8_t)(chunk_size >> 24);
orig_dst[1] = (uint8_t)(chunk_size >> 16);
orig_dst[2] = (uint8_t)(chunk_size >> 8);
orig_dst[3] = (uint8_t)(chunk_size >> 0);
}

nal->i_payload = size;
Expand Down
4 changes: 2 additions & 2 deletions common/cabac.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ static inline void cabac_putbyte( x264_cabac_t *cb )
cb->p[-1] += carry;
while( bytes_outstanding > 0 )
{
*(cb->p++) = carry-1;
*(cb->p++) = (uint8_t)(carry-1);
bytes_outstanding--;
}
*(cb->p++) = out;
*(cb->p++) = (uint8_t)out;
cb->i_bytes_outstanding = 0;
}
}
Expand Down
4 changes: 2 additions & 2 deletions common/deblock.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,11 +655,11 @@ void x264_macroblock_deblock( x264_t *h )
} while( 0 )

if( !transform_8x8 ) FILTER( 0, 1 );
FILTER( 0, 2 );
FILTER( 0, 2 );
if( !transform_8x8 ) FILTER( 0, 3 );

if( !transform_8x8 ) FILTER( 1, 1 );
FILTER( 1, 2 );
FILTER( 1, 2 );
if( !transform_8x8 ) FILTER( 1, 3 );

#undef FILTER
Expand Down
2 changes: 1 addition & 1 deletion common/macroblock.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed );
* uses all neighbors, even those that didn't end up using this ref.
* h->mb. need only valid values from other blocks */
#define x264_mb_predict_mv_ref16x16 x264_template(mb_predict_mv_ref16x16)
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc );
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t (*mvc)[2], int *i_mvc );

#define x264_mb_mc x264_template(mb_mc)
void x264_mb_mc( x264_t *h );
Expand Down
4 changes: 2 additions & 2 deletions common/mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ static void mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t
if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
continue;\
\
unsigned mbx = current[0];\
unsigned mby = current[1];\
unsigned mbx = (unsigned)current[0];\
unsigned mby = (unsigned)current[1];\
unsigned idx0 = mbx + mby * stride;\
unsigned idx2 = idx0 + stride;\
\
Expand Down
8 changes: 4 additions & 4 deletions common/mvpred.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
int16_t *mv_b = h->mb.cache.mv[0][X264_SCAN8_0 - 8];

if( i_refa == -2 || i_refb == -2 ||
!( i_refa | M32( mv_a ) ) ||
!( i_refb | M32( mv_b ) ) )
!( (uint32_t)i_refa | M32( mv_a ) ) ||
!( (uint32_t)i_refb | M32( mv_b ) ) )
{
M32( mv ) = 0;
}
Expand Down Expand Up @@ -304,7 +304,7 @@ static ALWAYS_INLINE int mb_predict_mv_direct16x16_spatial( x264_t *h, int b_int
mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
}

int i_ref = X264_MIN3( (unsigned)i_refa, (unsigned)i_refb, (unsigned)i_refc );
int i_ref = (int)X264_MIN3( (unsigned)i_refa, (unsigned)i_refb, (unsigned)i_refc );
if( i_ref < 0 )
{
i_ref = -1;
Expand Down Expand Up @@ -516,7 +516,7 @@ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
}

/* This just improves encoder performance, it's not part of the spec */
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[9][2], int *i_mvc )
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t (*mvc)[2], int *i_mvc )
{
int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
int i = 0;
Expand Down
2 changes: 1 addition & 1 deletion common/osdep.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ static inline int x264_is_regular_file( FILE *filehandle )
#define ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 )
#define ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )

// ARM compiliers don't reliably align stack variables
// ARM compilers don't reliably align stack variables
// - EABI requires only 8 byte stack alignment to be maintained
// - gcc can't align stack variables to more even if the stack were to be correctly aligned outside the function
// - armcc can't either, but is nice enough to actually tell you so
Expand Down
2 changes: 1 addition & 1 deletion common/ppc/pixel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1262,7 +1262,7 @@ static int pixel_sa8d_8x8_core_altivec( uint8_t *pix1, intptr_t i_pix1,
SA8D_1D_ALTIVEC(sa8d0v, sa8d1v, sa8d2v, sa8d3v,
sa8d4v, sa8d5v, sa8d6v, sa8d7v );

/* accumulation of the absolute value of all elements of the resulting bloc */
/* accumulation of the absolute value of all elements of the resulting block */
vec_s16_t abs0v = VEC_ABS(sa8d0v);
vec_s16_t abs1v = VEC_ABS(sa8d1v);
vec_s16_t sum01v = vec_add(abs0v, abs1v);
Expand Down
10 changes: 5 additions & 5 deletions common/rectangle.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, int s, uint32_t v )
{
uint8_t *d = dst;
uint16_t v2 = s == 2 ? v : v * 0x101;
uint32_t v4 = s == 4 ? v : s == 2 ? v * 0x10001 : v * 0x1010101;
uint16_t v2 = s >= 2 ? v : v * 0x101;
uint32_t v4 = s >= 4 ? v : s >= 2 ? v * 0x10001 : v * 0x1010101;
uint64_t v8 = v4 + ((uint64_t)v4 << 32);
s *= 8;

Expand Down Expand Up @@ -142,13 +142,13 @@ static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, in
else
x264_macroblock_cache_rect( mvd_cache, width*2, height, 2, mvd );
}
static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, int8_t ref )
{
void *ref_cache = &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y];
if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, ref );
x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, (uint8_t)ref );
else
x264_macroblock_cache_rect( ref_cache, width, height, 1, ref );
x264_macroblock_cache_rect( ref_cache, width, height, 1, (uint8_t)ref );
}
static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
{
Expand Down
2 changes: 1 addition & 1 deletion common/vlc.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void x264_cavlc_init( x264_t *h )
{
int mask = level >> 15;
int abs_level = (level^mask)-mask;
int i_level_code = abs_level*2-mask-2;
int i_level_code = abs_level ? abs_level*2-mask-2 : 0;
int i_next = i_suffix;
vlc_large_t *vlc = &x264_level_token[i_suffix][level+LEVEL_TABLE_SIZE/2];

Expand Down
2 changes: 1 addition & 1 deletion common/x86/bitstream-a.asm
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ SECTION .text
;-----------------------------------------------------------------------------
%macro NAL_LOOP 2
%%escape:
; Detect false positive to avoid unneccessary escape loop
; Detect false positive to avoid unnecessary escape loop
xor r3d, r3d
cmp byte [r0+r1-1], 0
setnz r3b
Expand Down
2 changes: 1 addition & 1 deletion common/x86/trellis-64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ cglobal %1, 4,15,9
%define cost_siglast stack+80
%define level_tree stack+96

; trellis_node_t is layed out differently than C.
; trellis_node_t is laid out differently than C.
; struct-of-arrays rather than array-of-structs, for simd.
%define nodes_curq r7
%define nodes_prevq r8
Expand Down
2 changes: 1 addition & 1 deletion common/x86/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmx2(uint8_t *mvdleft, uint8_t
"m"(pb_2),"m"(pb_32),"m"(pb_33)
:"mm0", "mm1", "mm2"
);
return amvd;
return (uint16_t)amvd;
}

#define x264_predictor_clip x264_predictor_clip_mmx2
Expand Down
53 changes: 41 additions & 12 deletions common/x86/x86inc.asm
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@
%define mangle(x) x
%endif

; Use VEX-encoding even in non-AVX functions
%ifndef FORCE_VEX_ENCODING
%define FORCE_VEX_ENCODING 0
%endif

%macro SECTION_RODATA 0-1 16
%ifidn __OUTPUT_FORMAT__,win32
SECTION .rdata align=%1
Expand Down Expand Up @@ -1014,7 +1019,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%endmacro

%macro INIT_XMM 0-1+
%assign avx_enabled 0
%assign avx_enabled FORCE_VEX_ENCODING
%define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16
%define mova movdqa
Expand Down Expand Up @@ -1339,26 +1344,50 @@ INIT_XMM
%elif %0 >= 9
__instr %6, %7, %8, %9
%elif %0 == 8
%if avx_enabled && %5
%if avx_enabled && __sizeofreg >= 16 && %4 == 0
%xdefine __src1 %7
%xdefine __src2 %8
%ifnum regnumof%7
%ifnum regnumof%8
%if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
; Most VEX-encoded instructions require an additional byte to encode when
; src2 is a high register (e.g. m8..15). If the instruction is commutative
; we can swap src1 and src2 when doing so reduces the instruction length.
%xdefine __src1 %8
%xdefine __src2 %7
%if %5
%ifnum regnumof%7
%ifnum regnumof%8
%if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
; Most VEX-encoded instructions require an additional byte to encode when
; src2 is a high register (e.g. m8..15). If the instruction is commutative
; we can swap src1 and src2 when doing so reduces the instruction length.
%xdefine __src1 %8
%xdefine __src2 %7
%endif
%endif
%elifnum regnumof%8 ; put memory operands in src2 when possible
%xdefine __src1 %8
%xdefine __src2 %7
%else
%assign __emulate_avx 1
%endif
%elifnnum regnumof%7
; EVEX allows imm8 shift instructions to be used with memory operands,
; but VEX does not. This handles those special cases.
%ifnnum %8
%assign __emulate_avx 1
%elif notcpuflag(avx512)
%assign __emulate_avx 1
%endif
%endif
__instr %6, __src1, __src2
%if __emulate_avx ; a separate load is required
%if %3
vmovaps %6, %7
%else
vmovdqa %6, %7
%endif
__instr %6, %8
%else
__instr %6, __src1, __src2
%endif
%else
__instr %6, %7, %8
%endif
%elif %0 == 7
%if avx_enabled && %5
%if avx_enabled && __sizeofreg >= 16 && %5
%xdefine __src1 %6
%xdefine __src2 %7
%ifnum regnumof%6
Expand Down
2 changes: 1 addition & 1 deletion config.guess
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,7 @@ EOF
# uname -m prints for DJGPP always 'pc', but it prints nothing about
# the processor, so we play safe by assuming i586.
# Note: whatever this is, it MUST be the same as what config.sub
# prints for the "djgpp" host, or else GDB configury will decide that
# prints for the "djgpp" host, or else GDB configure will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
exit ;;
Expand Down
2 changes: 1 addition & 1 deletion doc/threads.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ I enabled the various components of slicing one at a time, and measured the port
* 16% reset cabac contexts
* 6% deblocking between slices (you don't strictly have to turn this off just for standard compliance, but you do if you want to use slices for decoder multithreading)
* 2% cabac neighbors (cbp, skip, etc)
The proportional cost of redundant headers should certainly depend on bitrate (since the header size is constant and everything else depends on bitrate). Deblocking should too (due to varing deblock strength).
The proportional cost of redundant headers should certainly depend on bitrate (since the header size is constant and everything else depends on bitrate). Deblocking should too (due to varying deblock strength).
But none of the proportions should depend strongly on the number of slices: some are triggered per slice while some are triggered per macroblock-that's-on-the-edge-of-a-slice, but as long as there's no more than 1 slice per row, the relative frequency of those two conditions is determined solely by the image width.


Expand Down
4 changes: 2 additions & 2 deletions encoder/analyse.c
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,7 @@ static void mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
i_maxref = ref;\
}

/* early termination: if 16x16 chose ref 0, then evalute no refs older
/* early termination: if 16x16 chose ref 0, then evaluate no refs older
* than those used by the neighbors */
if( a->b_early_terminate && (i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0) )
Expand Down Expand Up @@ -2173,7 +2173,7 @@ static void mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
ALIGNED_ARRAY_16( pixel, pix,[2],[8*8] );
int i_maxref[2] = {h->mb.pic.i_fref[0]-1, h->mb.pic.i_fref[1]-1};

/* early termination: if 16x16 chose ref 0, then evalute no refs older
/* early termination: if 16x16 chose ref 0, then evaluate no refs older
* than those used by the neighbors */
#define CHECK_NEIGHBOUR(i)\
{\
Expand Down
7 changes: 6 additions & 1 deletion encoder/lookahead.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,14 @@ static void lookahead_slicetype_decide( x264_t *h )

REALIGN_STACK static void *lookahead_thread( x264_t *h )
{
while( !h->lookahead->b_exit_thread )
while( 1 )
{
x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
if( h->lookahead->b_exit_thread )
{
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
break;
}
x264_pthread_mutex_lock( &h->lookahead->next.mutex );
int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
Expand Down
4 changes: 2 additions & 2 deletions encoder/ratecontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ int x264_reference_build_list_optimal( x264_t *h )
COPY2_IF_GT( max, refcount[i], bestref, i );

/* FIXME: If there are duplicates from frames other than ref0 then it is possible
* that the optimal ordering doesnt place every duplicate. */
* that the optimal ordering doesn't place every duplicate. */

refcount[bestref] = -1;
h->fref[0][ref] = frames[bestref];
Expand Down Expand Up @@ -1661,7 +1661,7 @@ int x264_ratecontrol_mb( x264_t *h, int bits )
float b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices;
float trust_coeff = x264_clip3f( bits_so_far / slice_size_planned, 0.0, 1.0 );

/* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* Don't increase the row QPs until a sufficient amount of the bits of the frame have been processed, in case a flat */
/* area at the top of the frame was measured inaccurately. */
if( trust_coeff < 0.05f )
qp_max = qp_absolute_max = prev_row_qp;
Expand Down
Loading

0 comments on commit 77bee0c

Please sign in to comment.