|
| 1 | +; This test is created to check, if llvm-spirv can work with close-to-real life |
| 2 | +; LLVM IR (O0). |
| 3 | +; Compiled from: |
| 4 | +;typedef half vec2half __attribute__((ext_vector_type(2))); |
| 5 | +; |
| 6 | +;typedef _BitInt(4) vec2int4 __attribute__((ext_vector_type(2))); |
| 7 | +; |
| 8 | +;vec2half __builtin_upscale(vec2int4); |
| 9 | +; |
| 10 | +;kernel void quant_add(local char *in1_ptr, local char *in2_ptr, local vec2half *out_ptr) { |
| 11 | +; int idx = get_global_id(0); |
| 12 | +; |
| 13 | +; vec2int4 in1_4bit = (vec2int4)(in1_ptr[idx]); |
| 14 | +; vec2int4 in2_4bit = (vec2int4)(in2_ptr[idx]); |
| 15 | +; |
| 16 | +; vec2half in1_upscaled = __builtin_upscale(in1_4bit); |
| 17 | +; vec2half in2_upscaled = __builtin_upscale(in2_4bit); |
| 18 | +; |
| 19 | +; out_ptr[idx] = in1_upscaled + in2_upscaled; |
| 20 | +;} |
| 21 | +; |
| 22 | +; with __builtin_upscale function substituted with internal builtin |
| 23 | +; |
| 24 | +; compile command: |
| 25 | +; clang -cl-std=cl3.0 -target spir -emit-llvm -Xclang -finclude-default-header -g0 -O0 |
| 26 | + |
| 27 | +; RUN: llvm-as %s -o %t.bc |
| 28 | +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_float4,+SPV_INTEL_int4 |
| 29 | +; RUN: llvm-spirv %t.spv -o %t.spt --to-text |
| 30 | +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV |
| 31 | +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r --spirv-target-env=SPV-IR |
| 32 | +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll |
| 33 | +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM |
| 34 | + |
| 35 | +; CHECK-SPIRV-NOT: _Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv2_i |
| 36 | + |
| 37 | +; CHECK-SPIRV-DAG: Capability Float16Buffer |
| 38 | +; CHECK-SPIRV-DAG: Capability Int4TypeINTEL |
| 39 | +; CHECK-SPIRV-DAG: Capability Float4E2M1INTEL |
| 40 | + |
| 41 | +; CHECK-SPIRV-DAG: TypeInt [[#Int4Ty:]] 4 0 |
| 42 | +; CHECK-SPIRV-DAG: TypeVector [[#VecInt4Ty:]] [[#Int4Ty]] 2 |
| 43 | +; CHECK-SPIRV-DAG: TypePointer [[#PtrVecInt4Ty:]] 7 [[#VecInt4Ty]] |
| 44 | +; CHECK-SPIRV-DAG: TypeFloat [[#HalfTy:]] 16 |
| 45 | +; CHECK-SPIRV-DAG: TypeVector [[#VecHalfTy:]] [[#HalfTy]] 2 |
| 46 | +; CHECK-SPIRV-DAG: TypeFloat [[#FP4Ty:]] 4 6214 |
| 47 | +; CHECK-SPIRV-DAG: TypeVector [[#VecFP4Ty:]] [[#FP4Ty]] 2 |
| 48 | + |
| 49 | +; CHECK-SPIRV: Load [[#VecInt4Ty]] [[#VecInt4Val1:]] [[#]] 2 2 |
| 50 | +; CHECK-SPIRV: Bitcast [[#VecFP4Ty]] [[#Cast1:]] [[#VecInt4Val1]] |
| 51 | +; CHECK-SPIRV: FConvert [[#VecHalfTy]] [[#Conv1:]] [[#Cast1]] |
| 52 | +; CHECK-SPIRV: Store [[#]] [[#Conv1]] 2 4 |
| 53 | + |
| 54 | +; CHECK-SPIRV: Load [[#VecInt4Ty]] [[#VecInt4Val2:]] [[#]] 2 2 |
| 55 | +; CHECK-SPIRV: Bitcast [[#VecFP4Ty]] [[#Cast2:]] [[#VecInt4Val2]] |
| 56 | +; CHECK-SPIRV: FConvert [[#VecHalfTy]] [[#Conv2:]] [[#Cast2]] |
| 57 | +; CHECK-SPIRV: Store [[#]] [[#Conv2]] 2 4 |
| 58 | + |
| 59 | +; CHECK-LLVM: %[[#Conv1:]] = call <2 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv2_i(<2 x i4> %[[#]]) |
| 60 | +; CHECK-LLVM: store <2 x half> %[[#Conv1]], ptr %[[#]] |
| 61 | +; CHECK-LLVM: %[[#Conv2:]] = call <2 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv2_i(<2 x i4> %[[#]]) |
| 62 | +; CHECK-LLVM: store <2 x half> %[[#Conv2]], ptr %[[#]] |
| 63 | + |
| 64 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" |
| 65 | +target triple = "spir-unknown-unknown" |
| 66 | + |
| 67 | +define dso_local spir_kernel void @quant_add(ptr addrspace(3) noundef align 1 %0, ptr addrspace(3) noundef align 1 %1, ptr addrspace(3) noundef align 4 %2) #0 { |
| 68 | + %4 = alloca ptr addrspace(3), align 4 |
| 69 | + %5 = alloca ptr addrspace(3), align 4 |
| 70 | + %6 = alloca ptr addrspace(3), align 4 |
| 71 | + store ptr addrspace(3) %0, ptr %4, align 4 |
| 72 | + store ptr addrspace(3) %1, ptr %5, align 4 |
| 73 | + store ptr addrspace(3) %2, ptr %6, align 4 |
| 74 | + %7 = load ptr addrspace(3), ptr %4, align 4 |
| 75 | + %8 = load ptr addrspace(3), ptr %5, align 4 |
| 76 | + %9 = load ptr addrspace(3), ptr %6, align 4 |
| 77 | + call spir_func void @__clang_ocl_kern_imp_quant_add(ptr addrspace(3) noundef align 1 %7, ptr addrspace(3) noundef align 1 %8, ptr addrspace(3) noundef align 4 %9) #3 |
| 78 | + ret void |
| 79 | +} |
| 80 | + |
| 81 | +define dso_local spir_func void @__clang_ocl_kern_imp_quant_add(ptr addrspace(3) noundef align 1 %0, ptr addrspace(3) noundef align 1 %1, ptr addrspace(3) noundef align 4 %2) #0 { |
| 82 | + %4 = alloca ptr addrspace(3), align 4 |
| 83 | + %5 = alloca ptr addrspace(3), align 4 |
| 84 | + %6 = alloca ptr addrspace(3), align 4 |
| 85 | + %7 = alloca i32, align 4 |
| 86 | + %8 = alloca <2 x i4>, align 2 |
| 87 | + %9 = alloca <2 x i4>, align 2 |
| 88 | + %10 = alloca <2 x half>, align 4 |
| 89 | + %11 = alloca <2 x half>, align 4 |
| 90 | + store ptr addrspace(3) %0, ptr %4, align 4 |
| 91 | + store ptr addrspace(3) %1, ptr %5, align 4 |
| 92 | + store ptr addrspace(3) %2, ptr %6, align 4 |
| 93 | + %12 = call spir_func i32 @_Z13get_global_idj(i32 noundef 0) #4 |
| 94 | + store i32 %12, ptr %7, align 4 |
| 95 | + %13 = load ptr addrspace(3), ptr %4, align 4 |
| 96 | + %14 = load i32, ptr %7, align 4 |
| 97 | + %15 = getelementptr inbounds i8, ptr addrspace(3) %13, i32 %14 |
| 98 | + %16 = load i8, ptr addrspace(3) %15, align 1 |
| 99 | + %17 = trunc i8 %16 to i4 |
| 100 | + %18 = insertelement <2 x i4> poison, i4 %17, i64 0 |
| 101 | + %19 = shufflevector <2 x i4> %18, <2 x i4> poison, <2 x i32> zeroinitializer |
| 102 | + store <2 x i4> %19, ptr %8, align 2 |
| 103 | + %20 = load ptr addrspace(3), ptr %5, align 4 |
| 104 | + %21 = load i32, ptr %7, align 4 |
| 105 | + %22 = getelementptr inbounds i8, ptr addrspace(3) %20, i32 %21 |
| 106 | + %23 = load i8, ptr addrspace(3) %22, align 1 |
| 107 | + %24 = trunc i8 %23 to i4 |
| 108 | + %25 = insertelement <2 x i4> poison, i4 %24, i64 0 |
| 109 | + %26 = shufflevector <2 x i4> %25, <2 x i4> poison, <2 x i32> zeroinitializer |
| 110 | + store <2 x i4> %26, ptr %9, align 2 |
| 111 | + %27 = load <2 x i4>, ptr %8, align 2 |
| 112 | + %28 = call spir_func <2 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv2_i(<2 x i4> noundef %27) #5 |
| 113 | + store <2 x half> %28, ptr %10, align 4 |
| 114 | + %29 = load <2 x i4>, ptr %9, align 2 |
| 115 | + %30 = call spir_func <2 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv2_i(<2 x i4> noundef %29) #5 |
| 116 | + store <2 x half> %30, ptr %11, align 4 |
| 117 | + %31 = load <2 x half>, ptr %10, align 4 |
| 118 | + %32 = load <2 x half>, ptr %11, align 4 |
| 119 | + %33 = fadd <2 x half> %31, %32 |
| 120 | + %34 = load ptr addrspace(3), ptr %6, align 4 |
| 121 | + %35 = load i32, ptr %7, align 4 |
| 122 | + %36 = getelementptr inbounds <2 x half>, ptr addrspace(3) %34, i32 %35 |
| 123 | + store <2 x half> %33, ptr addrspace(3) %36, align 4 |
| 124 | + ret void |
| 125 | +} |
| 126 | + |
| 127 | +declare dso_local spir_func i32 @_Z13get_global_idj(i32 noundef) #1 |
| 128 | + |
| 129 | +declare dso_local spir_func <2 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv2_i(<2 x i4> noundef) #2 |
| 130 | + |
| 131 | +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } |
| 132 | +attributes #1 = { convergent nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 133 | +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 134 | +attributes #3 = { convergent nounwind "uniform-work-group-size"="false" } |
| 135 | +attributes #4 = { convergent nounwind willreturn memory(none) } |
| 136 | +attributes #5 = { convergent nounwind } |
0 commit comments