Skip to content

Commit 9e6ab7f

Browse files
committed
zstd: Shave some instructions off the amd64 asm
These are all in return sequences, so no performance gain expected, but the generated code is smaller.
1 parent 272fbc7 commit 9e6ab7f

File tree

2 files changed

+22
-41
lines changed

2 files changed

+22
-41
lines changed

zstd/_generate/gen.go

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -502,20 +502,22 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
502502
o.returnWithCode(errorNotEnoughLiterals)
503503
}
504504

505-
Comment("Return with not enough output space error")
506-
{
507-
Label("error_not_enough_space")
508-
if !o.useSeqs {
509-
ctx := Dereference(Param("ctx"))
510-
tmp := GP64()
511-
MOVQ(llP, tmp)
512-
Store(tmp, ctx.Field("ll"))
513-
MOVQ(mlP, tmp)
514-
Store(tmp, ctx.Field("ml"))
515-
Store(ec.outPosition, ctx.Field("outPosition"))
516-
}
505+
if !o.useSeqs {
506+
Comment("Return with not enough output space error")
507+
{
508+
Label("error_not_enough_space")
509+
if !o.useSeqs {
510+
ctx := Dereference(Param("ctx"))
511+
tmp := GP64()
512+
MOVQ(llP, tmp)
513+
Store(tmp, ctx.Field("ll"))
514+
MOVQ(mlP, tmp)
515+
Store(tmp, ctx.Field("ml"))
516+
Store(ec.outPosition, ctx.Field("outPosition"))
517+
}
517518

518-
o.returnWithCode(errorNotEnoughSpace)
519+
o.returnWithCode(errorNotEnoughSpace)
520+
}
519521
}
520522
}
521523

@@ -1062,10 +1064,9 @@ func (e executeSimple) generateProcedure(name string) {
10621064
Store(seqIndex, ctx.Field("seqIndex"))
10631065
Store(outPosition, ctx.Field("outPosition"))
10641066

1065-
// compute litPosition
1066-
tmp := GP64()
1067-
Load(ctx.Field("literals").Base(), tmp)
1068-
SUBQ(tmp, literals) // litPosition := current - initial literals pointer
1067+
// litPosition := current - initial literals pointer
1068+
litField, _ := ctx.Field("literals").Base().Resolve()
1069+
SUBQ(litField.Addr, literals)
10691070
Store(literals, ctx.Field("litPosition"))
10701071
}
10711072
Label("loop_finished")

zstd/seqdec_amd64.s

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -320,10 +320,6 @@ error_not_enough_literals:
320320
MOVQ $0x00000004, ret+24(FP)
321321
RET
322322

323-
// Return with not enough output space error
324-
MOVQ $0x00000005, ret+24(FP)
325-
RET
326-
327323
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
328324
// Requires: CMOV
329325
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
@@ -617,10 +613,6 @@ error_not_enough_literals:
617613
MOVQ $0x00000004, ret+24(FP)
618614
RET
619615

620-
// Return with not enough output space error
621-
MOVQ $0x00000005, ret+24(FP)
622-
RET
623-
624616
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
625617
// Requires: BMI, BMI2, CMOV
626618
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
@@ -897,10 +889,6 @@ error_not_enough_literals:
897889
MOVQ $0x00000004, ret+24(FP)
898890
RET
899891

900-
// Return with not enough output space error
901-
MOVQ $0x00000005, ret+24(FP)
902-
RET
903-
904892
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
905893
// Requires: BMI, BMI2, CMOV
906894
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
@@ -1152,10 +1140,6 @@ error_not_enough_literals:
11521140
MOVQ $0x00000004, ret+24(FP)
11531141
RET
11541142

1155-
// Return with not enough output space error
1156-
MOVQ $0x00000005, ret+24(FP)
1157-
RET
1158-
11591143
// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
11601144
// Requires: SSE
11611145
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
@@ -1389,8 +1373,7 @@ loop_finished:
13891373
MOVQ ctx+0(FP), AX
13901374
MOVQ DX, 24(AX)
13911375
MOVQ DI, 104(AX)
1392-
MOVQ 80(AX), CX
1393-
SUBQ CX, SI
1376+
SUBQ 80(AX), SI
13941377
MOVQ SI, 112(AX)
13951378
RET
13961379

@@ -1402,8 +1385,7 @@ error_match_off_too_big:
14021385
MOVQ ctx+0(FP), AX
14031386
MOVQ DX, 24(AX)
14041387
MOVQ DI, 104(AX)
1405-
MOVQ 80(AX), CX
1406-
SUBQ CX, SI
1388+
SUBQ 80(AX), SI
14071389
MOVQ SI, 112(AX)
14081390
RET
14091391

@@ -1747,8 +1729,7 @@ loop_finished:
17471729
MOVQ ctx+0(FP), AX
17481730
MOVQ DX, 24(AX)
17491731
MOVQ DI, 104(AX)
1750-
MOVQ 80(AX), CX
1751-
SUBQ CX, SI
1732+
SUBQ 80(AX), SI
17521733
MOVQ SI, 112(AX)
17531734
RET
17541735

@@ -1760,8 +1741,7 @@ error_match_off_too_big:
17601741
MOVQ ctx+0(FP), AX
17611742
MOVQ DX, 24(AX)
17621743
MOVQ DI, 104(AX)
1763-
MOVQ 80(AX), CX
1764-
SUBQ CX, SI
1744+
SUBQ 80(AX), SI
17651745
MOVQ SI, 112(AX)
17661746
RET
17671747

0 commit comments

Comments
 (0)