Comment 1 for bug 1513985

Revision history for this message
Andreas Cadhalpun (andreas-cadhalpun) wrote : Re: ffmpeg test idct8x8 (NEON) fails on ARM32 when built with binutils from the trunk

Debugging the failing command 'libavcodec/dct-test -i' with gdb shows that this is clearly a binutils bug.

* Working with binutils 2.25.1-6ubuntu1:
Breakpoint 1, __end_a_evaluation () at libavcodec/arm/simple_idct_arm.S:239
239 ldr r10, =MASK_MSHW @ R10=0xFFFF0000
(gdb) disas
Dump of assembler code for function __end_a_evaluation:
   0x00012c08 <+0>: add.w r8, r6, r0
   0x00012c0c <+4>: add.w r9, r2, r1
=> 0x00012c10 <+8>: ldr.w r10, [pc, #540] ; 0x12e30 <__end_bef_a_evaluation+44>
   0x00012c14 <+12>: and.w r9, r10, r9, lsl #5
   0x00012c18 <+16>: mvn.w r11, r10
   0x00012c1c <+20>: and.w r8, r11, r8, asr #11
   0x00012c20 <+24>: orr.w r8, r8, r9
   0x00012c24 <+28>: str.w r8, [lr]
   0x00012c28 <+32>: add.w r8, r3, r5
   0x00012c2c <+36>: add.w r9, r4, r7
   0x00012c30 <+40>: and.w r9, r10, r9, lsl #5
   0x00012c34 <+44>: and.w r8, r11, r8, asr #11
   0x00012c38 <+48>: orr.w r8, r8, r9
   0x00012c3c <+52>: str.w r8, [lr, #4]
   0x00012c40 <+56>: sub.w r8, r4, r7
   0x00012c44 <+60>: sub.w r9, r3, r5
   0x00012c48 <+64>: and.w r9, r10, r9, lsl #5
   0x00012c4c <+68>: and.w r8, r11, r8, asr #11
   0x00012c50 <+72>: orr.w r8, r8, r9
   0x00012c54 <+76>: str.w r8, [lr, #8]
   0x00012c58 <+80>: sub.w r8, r2, r1
   0x00012c5c <+84>: sub.w r9, r6, r0
   0x00012c60 <+88>: and.w r9, r10, r9, lsl #5
   0x00012c64 <+92>: and.w r8, r11, r8, asr #11
   0x00012c68 <+96>: orr.w r8, r8, r9
   0x00012c6c <+100>: str.w r8, [lr, #12]
   0x00012c70 <+104>: b.n 0x12c92 <__end_row_loop>
End of assembler dump.
(gdb) info registers r10
r10 0x22a3 8867
(gdb) n
240 and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a1+b1)<<5)
(gdb) info registers r10
r10 0xffff0000 -65536

This correctly sets the r10 register to 0xFFFF0000.

 * Broken with bintuils 2.25.51.20151028-0ubuntu1:
Breakpoint 1, __end_a_evaluation () at libavcodec/arm/simple_idct_arm.S:239
239 ldr r10, =MASK_MSHW @ R10=0xFFFF0000
(gdb) disas
Dump of assembler code for function __end_a_evaluation:
   0x00012c08 <+0>: add.w r8, r6, r0
   0x00012c0c <+4>: add.w r9, r2, r1
=> 0x00012c10 <+8>: movt r10, #65535 ; 0xffff
   0x00012c14 <+12>: and.w r9, r10, r9, lsl #5
   0x00012c18 <+16>: mvn.w r11, r10
   0x00012c1c <+20>: and.w r8, r11, r8, asr #11
   0x00012c20 <+24>: orr.w r8, r8, r9
   0x00012c24 <+28>: str.w r8, [lr]
   0x00012c28 <+32>: add.w r8, r3, r5
   0x00012c2c <+36>: add.w r9, r4, r7
   0x00012c30 <+40>: and.w r9, r10, r9, lsl #5
   0x00012c34 <+44>: and.w r8, r11, r8, asr #11
   0x00012c38 <+48>: orr.w r8, r8, r9
   0x00012c3c <+52>: str.w r8, [lr, #4]
   0x00012c40 <+56>: sub.w r8, r4, r7
   0x00012c44 <+60>: sub.w r9, r3, r5
   0x00012c48 <+64>: and.w r9, r10, r9, lsl #5
   0x00012c4c <+68>: and.w r8, r11, r8, asr #11
   0x00012c50 <+72>: orr.w r8, r8, r9
   0x00012c54 <+76>: str.w r8, [lr, #8]
   0x00012c58 <+80>: sub.w r8, r2, r1
   0x00012c5c <+84>: sub.w r9, r6, r0
   0x00012c60 <+88>: and.w r9, r10, r9, lsl #5
   0x00012c64 <+92>: and.w r8, r11, r8, asr #11
   0x00012c68 <+96>: orr.w r8, r8, r9
   0x00012c6c <+100>: str.w r8, [lr, #12]
   0x00012c70 <+104>: b.n 0x12c92 <__end_row_loop>
End of assembler dump.
(gdb) info registers r10
r10 0x22a3 8867
(gdb) n
240 and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a1+b1)<<5)
(gdb) info registers r10
r10 0xffff22a3 -56669

This only sets the upper half of r10, leaving the lower half untouched.