Skip to content

Commit 498d9bc

Browse files
chandlercdcommander
authored andcommitted
Fix x86-64 ABI conformance issue in SIMD code
(descriptions cribbed by DRC from discussion in flutter#20) In the x86-64 ABI, the high (unused) DWORD of a 32-bit argument's register is undefined, so it was incorrect to use a 64-bit mov instruction to transfer a JDIMENSION argument in the 64-bit SSE2 SIMD functions. The code worked thus far only because the existing compiler optimizers weren't smart enough to do anything else with the register in question, so the upper 32 bits happened to be all zeroes-- for the past 6 years, on every x86-64 compiler previously known to mankind. The bleeding-edge Clang/LLVM compiler has a smarter optimizer, and under certain circumstances, it will attempt to load-combine adjacent 32-bit integers from one of the libjpeg structures into a single 64-bit integer and pass that 64-bit integer as a 32-bit argument to one of the SIMD functions (which is allowed by the ABI, since the upper 32 bits of the 32-bit argument's register are undefined.) This caused the libjpeg-turbo regression tests to crash. Also enhance the documentation of JDIMENSION to explain that its size is significant to the implementation of the SIMD code. Closes flutter#20. Refer also to http://crbug.com/532214.
1 parent 465a9fe commit 498d9bc

14 files changed

+40
-28
lines changed

ChangeLog.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@ incorrectly encode certain JPEG images when quality=100 and the fast integer
1111
forward DCT were used. This was known to cause 'make test' to fail when the
1212
library was built with '-march=haswell' on x86 systems.
1313

14+
[3] Fixed an issue whereby libjpeg-turbo would crash when built with the latest
15+
& greatest development version of the Clang/LLVM compiler. This was caused by
16+
an x86-64 ABI conformance issue in some of libjpeg-turbo's 64-bit SSE2 SIMD
17+
routines. Those routines were incorrectly using a 64-bit mov instruction to
18+
transfer a 32-bit JDIMENSION argument, whereas the x86-64 ABI allows the upper
19+
(unused) 32 bits of a 32-bit argument's register to be undefined. The new
20+
Clang/LLVM optimizer uses load combining to transfer multiple adjacent 32-bit
21+
structure members into a single 64-bit register, and this exposed the ABI
22+
conformance issue.
23+
1424

1525
1.4.1
1626
=====

jmorecfg.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ typedef long INT32;
162162
* images up to 64K*64K due to 16-bit fields in SOF markers. Therefore
163163
* "unsigned int" is sufficient on all machines. However, if you need to
164164
* handle larger images and you don't mind deviating from the spec, you
165-
* can change this datatype.
165+
* can change this datatype. (Note that changing this datatype will
166+
* potentially require modifying the SIMD code. The x86-64 SIMD extensions,
167+
* in particular, assume a 32-bit JDIMENSION.)
166168
*/
167169

168170
typedef unsigned int JDIMENSION;

simd/jccolext-sse2-64.asm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
5050
collect_args
5151
push rbx
5252

53-
mov rcx, r10
53+
mov ecx, r10d
5454
test rcx,rcx
5555
jz near .return
5656

5757
push rcx
5858

5959
mov rsi, r12
60-
mov rcx, r13
60+
mov ecx, r13d
6161
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
6262
mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
6363
mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]

simd/jcgryext-sse2-64.asm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@ EXTN(jsimd_rgb_gray_convert_sse2):
5050
collect_args
5151
push rbx
5252

53-
mov rcx, r10
53+
mov ecx, r10d
5454
test rcx,rcx
5555
jz near .return
5656

5757
push rcx
5858

5959
mov rsi, r12
60-
mov rcx, r13
60+
mov ecx, r13d
6161
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
6262
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
6363

simd/jcsample-sse2-64.asm

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ EXTN(jsimd_h2v1_downsample_sse2):
4949
mov rbp,rsp
5050
collect_args
5151

52-
mov rcx, r13
52+
mov ecx, r13d
5353
shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols)
5454
jz near .return
5555

56-
mov rdx, r10
56+
mov edx, r10d
5757

5858
; -- expand_right_edge
5959

@@ -90,7 +90,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
9090

9191
; -- h2v1_downsample
9292

93-
mov rax, r12 ; rowctr
93+
mov eax, r12d ; rowctr
9494
test eax,eax
9595
jle near .return
9696

@@ -193,11 +193,11 @@ EXTN(jsimd_h2v2_downsample_sse2):
193193
mov rbp,rsp
194194
collect_args
195195

196-
mov rcx, r13
196+
mov ecx, r13d
197197
shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols)
198198
jz near .return
199199

200-
mov rdx, r10
200+
mov edx, r10d
201201

202202
; -- expand_right_edge
203203

@@ -234,7 +234,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
234234

235235
; -- h2v2_downsample
236236

237-
mov rax, r12 ; rowctr
237+
mov eax, r12d ; rowctr
238238
test rax,rax
239239
jle near .return
240240

simd/jdcolext-sse2-64.asm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
5252
collect_args
5353
push rbx
5454

55-
mov rcx, r10 ; num_cols
55+
mov ecx, r10d ; num_cols
5656
test rcx,rcx
5757
jz near .return
5858

5959
push rcx
6060

6161
mov rdi, r11
62-
mov rcx, r12
62+
mov ecx, r12d
6363
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
6464
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
6565
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]

simd/jdmrgext-sse2-64.asm

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
5252
collect_args
5353
push rbx
5454

55-
mov rcx, r10 ; col
55+
mov ecx, r10d ; col
5656
test rcx,rcx
5757
jz near .return
5858

5959
push rcx
6060

6161
mov rdi, r11
62-
mov rcx, r12
62+
mov ecx, r12d
6363
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
6464
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
6565
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
@@ -455,10 +455,10 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
455455
collect_args
456456
push rbx
457457

458-
mov rax, r10
458+
mov eax, r10d
459459

460460
mov rdi, r11
461-
mov rcx, r12
461+
mov ecx, r12d
462462
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
463463
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
464464
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]

simd/jdsample-sse2-64.asm

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
6767
mov rbp,rsp
6868
collect_args
6969

70-
mov rax, r11 ; colctr
70+
mov eax, r11d ; colctr
7171
test rax,rax
7272
jz near .return
7373

@@ -214,7 +214,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
214214
collect_args
215215
push rbx
216216

217-
mov rax, r11 ; colctr
217+
mov eax, r11d ; colctr
218218
test rax,rax
219219
jz near .return
220220

@@ -506,7 +506,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
506506
mov rbp,rsp
507507
collect_args
508508

509-
mov rdx, r11
509+
mov edx, r11d
510510
add rdx, byte (2*SIZEOF_XMMWORD)-1
511511
and rdx, byte -(2*SIZEOF_XMMWORD)
512512
jz near .return
@@ -596,7 +596,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
596596
collect_args
597597
push rbx
598598

599-
mov rdx, r11
599+
mov edx, r11d
600600
add rdx, byte (2*SIZEOF_XMMWORD)-1
601601
and rdx, byte -(2*SIZEOF_XMMWORD)
602602
jz near .return

simd/jidctflt-sse2-64.asm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ EXTN(jsimd_idct_float_sse2):
326326
mov rax, [original_rbp]
327327
lea rsi, [workspace] ; FAST_FLOAT * wsptr
328328
mov rdi, r12 ; (JSAMPROW *)
329-
mov rax, r13
329+
mov eax, r13d
330330
mov rcx, DCTSIZE/4 ; ctr
331331
.rowloop:
332332

simd/jidctfst-sse2-64.asm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ EXTN(jsimd_idct_ifast_sse2):
323323

324324
mov rax, [original_rbp]
325325
mov rdi, r12 ; (JSAMPROW *)
326-
mov rax, r13
326+
mov eax, r13d
327327

328328
; -- Even part
329329

0 commit comments

Comments
 (0)