diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go index b121f6df7b2f8e..2d7efa958b1075 100644 --- a/src/cmd/internal/obj/x86/a.out.go +++ b/src/cmd/internal/obj/x86/a.out.go @@ -202,6 +202,15 @@ const ( REG_Z30 REG_Z31 + REG_TM0 + REG_TM1 + REG_TM2 + REG_TM3 + REG_TM4 + REG_TM5 + REG_TM6 + REG_TM7 + REG_CS REG_SS REG_DS diff --git a/src/cmd/internal/obj/x86/aenum.go b/src/cmd/internal/obj/x86/aenum.go index 79cdd241a236a3..fee2ab735bbf0b 100644 --- a/src/cmd/internal/obj/x86/aenum.go +++ b/src/cmd/internal/obj/x86/aenum.go @@ -1607,4 +1607,16 @@ const ( AXSETBV AXTEST ALAST + ALDTILECFG + ASTTILECFG + ATDPBF16PS + ATDPBSSD + ATDPBSUD + ATDPBUSD + ATDPBUUD + ATILELOADD + ATILELOADDT1 + ATILESTORED + ATILEZERO + ATILERELEASE ) diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go index 3966381e50d221..907af13a614200 100644 --- a/src/cmd/internal/obj/x86/anames.go +++ b/src/cmd/internal/obj/x86/anames.go @@ -1605,4 +1605,16 @@ var Anames = []string{ "XSETBV", "XTEST", "LAST", + "LDTILECFG", + "STTILECFG", + "TDPBF16PS", + "TDPBSSD", + "TDPBSUD", + "TDPBUSD", + "TDPBUUD", + "TILELOADD", + "TILELOADDT1", + "TILESTORED", + "TILEZERO", + "TILERELEASE", } diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 718da6a8a2caf6..fd6bb9faab1c50 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -192,6 +192,7 @@ const ( Ytextsize Yindir Ymax + Ytm ) const ( @@ -315,10 +316,6 @@ const ( // The P, L, and W fields are chosen to match // their eventual locations in the VEX prefix bytes. - // Encoding for VEX prefix in tables. - // The P, L, and W fields are chosen to match - // their eventual locations in the VEX prefix bytes. - // Using spare bit to make leading [E]VEX encoding byte different from // 0x0f even if all other VEX fields are 0. avxEscape = 1 << 6 @@ -2471,6 +2468,10 @@ func instinit(ctxt *obj.Link) { } } + if i >= REG_TM0 && i <= REG_TM0+7 { + reg[i] = (i - REG_TM0) & 7 + } + if i >= REG_CR+8 && i <= REG_CR+15 { regrex[i] = Rxr } @@ -3081,6 +3082,16 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { } return Yzr + case REG_TM0 + 0, + REG_TM0 + 1, + REG_TM0 + 2, + REG_TM0 + 3, + REG_TM0 + 4, + REG_TM0 + 5, + REG_TM0 + 6, + REG_TM0 + 7: + return Ytm + case REG_K0: return Yk0 diff --git a/src/cmd/internal/obj/x86/avx_optabs.go b/src/cmd/internal/obj/x86/avx_optabs.go index b8ff4699d1548c..487e34366e73fa 100644 --- a/src/cmd/internal/obj/x86/avx_optabs.go +++ b/src/cmd/internal/obj/x86/avx_optabs.go @@ -928,6 +928,30 @@ var _yvzeroall = []ytab{ {zcase: Zvex, zoffset: 2, args: argList{}}, } +var _yldtilecfg = []ytab{ + {zcase: Zvex_rm_v_ro, zoffset: 3, args: argList{Ym}}, +} + +var _ytdpbf16ps = []ytab{ + {zcase: Zvex_v_rm_r, zoffset: 2, args: argList{Ytm, Ytm, Ytm}}, +} + +var _ytileloadd = []ytab{ + {zcase: Zvex_rm_v_r, zoffset: 2, args: argList{Ym, Ytm}}, +} + +var _ytilestored = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Ytm, Ym}}, +} + +var _ytilezero = []ytab{ + {zcase: Zvex_r_v_rm, zoffset: 2, args: argList{Ytm}}, +} + +var _ytilerelease = []ytab{ + {zcase: Zvex_rm_v_ro, zoffset: 3, args: argList{}}, +} + var avxOptab = [...]Optab{ {as: AANDNL, ytab: _yandnl, prefix: Pavx, op: opBytes{ avxEscape | vex128 | vex0F38 | vexW0, 0xF2, @@ -4625,4 +4649,40 @@ var avxOptab = [...]Optab{ {as: AVZEROUPPER, ytab: _yvzeroall, prefix: Pavx, op: opBytes{ avxEscape | vex128 | vex0F | vexW0, 0x77, }}, + {as: ALDTILECFG, ytab: _yldtilecfg, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0x49, 00, + }}, + {as: ASTTILECFG, ytab: _yldtilecfg, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x49, 00, + }}, + {as: ATDPBF16PS, ytab: _ytdpbf16ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0x5C, + }}, + {as: ATDPBSSD, ytab: _ytdpbf16ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0x5E, + }}, + {as: ATDPBSUD, ytab: _ytdpbf16ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0x5E, + }}, + {as: ATDPBUSD, ytab: _ytdpbf16ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x5E, + }}, + {as: ATDPBUUD, ytab: _ytdpbf16ps, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0x5E, + }}, + {as: ATILELOADD, ytab: _ytileloadd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0x4B, + }}, + {as: ATILELOADDT1, ytab: _ytileloadd, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex66 | vex0F38 | vexW0, 0x4B, + }}, + {as: ATILESTORED, ytab: _ytilestored, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF3 | vex0F38 | vexW0, 0x4B, + }}, + {as: ATILEZERO, ytab: _ytilezero, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vexF2 | vex0F38 | vexW0, 0x49, + }}, + {as: ATILERELEASE, ytab: _ytilerelease, prefix: Pavx, op: opBytes{ + avxEscape | vex128 | vex0F38 | vexW0, 0x49, 00, + }}, } diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go index 1352810f42ddfa..85be3e01b2ad0f 100644 --- a/src/internal/cpu/cpu.go +++ b/src/internal/cpu/cpu.go @@ -29,6 +29,7 @@ var X86 struct { HasADX bool HasAVX bool HasAVX2 bool + HasAMX bool HasBMI1 bool HasBMI2 bool HasERMS bool diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index 96b8ef92b560df..000981f6b0528e 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -41,6 +41,9 @@ const ( cpuid_ADX = 1 << 19 cpuid_SHA = 1 << 29 + // edx bits + cpuid_AMX = 1 << 24 + // edx bits for CPUID 0x80000001 cpuid_RDTSCP = 1 << 27 ) @@ -73,6 +76,7 @@ func doinit() { options = append(options, option{Name: "avx", Feature: &X86.HasAVX}, option{Name: "avx2", Feature: &X86.HasAVX2}, + option{Name: "amx", Feature: &X86.HasAMX}, option{Name: "bmi1", Feature: &X86.HasBMI1}, option{Name: "bmi2", Feature: &X86.HasBMI2}, option{Name: "fma", Feature: &X86.HasFMA}) @@ -121,14 +125,14 @@ func doinit() { return } - _, ebx7, _, _ := cpuid(7, 0) + _, ebx7, _, edx7 := cpuid(7, 0) X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX + X86.HasAMX = isSet(edx7, cpuid_AMX) X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) X86.HasERMS = isSet(ebx7, cpuid_ERMS) X86.HasADX = isSet(ebx7, cpuid_ADX) X86.HasSHA = isSet(ebx7, cpuid_SHA) - var maxExtendedInformation uint32 maxExtendedInformation, _, _, _ = cpuid(0x80000000, 0)