intel · jinge90 · Jul 28, 2025 · Jul 28, 2025 · Jul 29, 2025 · Jul 29, 2025
@@ -5903,13 +5903,6 @@ class OffloadingActionBuilder final {
           SmallString<128> LibName(LLCandidate);
           llvm::sys::path::append(LibName, DeviceLib);
           if (llvm::sys::fs::exists(LibName)) {
-            // NativeCPU currently only needs libsycl-nativecpu_utils and
-            // libclc, so temporarily skip other device libs in invocation.
-            // Todo: remove once NativeCPU tests the other libraries.
-            if (isNativeCPU &&
-                !LibName.str().contains("libsycl-nativecpu_utils"))
-              continue;
-
             ++NumOfDeviceLibLinked;
             Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
                                          Args.MakeArgString(LibName));
@@ -5934,7 +5927,7 @@ class OffloadingActionBuilder final {
         }
       }
 
-      if (!NumOfDeviceLibLinked)
+      if (!NumOfDeviceLibLinked && !TC->getTriple().isNVPTX())
         return false;
 
       // For NVPTX we need to also link libclc at the same stage that we link

@@ -434,7 +434,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
 
   // For NVPTX and AMDGCN we only use one single bitcode library and ignore
   // manually specified SYCL device libraries.
-  bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();
+  // For NativeCPU, only native_utils devicelib is used.
+  bool UseSingleLib = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN() ||
+                      TargetTriple.isNativeCPU();
+  bool IgnoreSingleLib = false;
 
   struct DeviceLibOptInfo {
     StringRef DeviceLibName;
@@ -474,8 +477,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
 
           // Make sure that internal libraries are still linked against
           // when -fno-sycl-device-lib contains "all" and single libraries
-          // should be ignored.
-          IgnoreSingleLibs = IgnoreSingleLibs && !ExcludeDeviceLibs;
+          // should be ignored. For NativeCPU, the native_cpu utils library
+          // is always linked without '-only-needed' flag.
+          IgnoreSingleLib =
+              UseSingleLib && ExcludeDeviceLibs && !TargetTriple.isNativeCPU();
 
           for (const auto &K : DeviceLibLinkInfo.keys())
             DeviceLibLinkInfo[K] = (K == "internal") || !ExcludeDeviceLibs;
@@ -490,21 +495,24 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
               << A->getSpelling() << Val;
         }
         DeviceLibLinkInfo[Val] = !ExcludeDeviceLibs;
-        PrintUnusedExcludeWarning = IgnoreSingleLibs && ExcludeDeviceLibs;
+        PrintUnusedExcludeWarning = UseSingleLib && ExcludeDeviceLibs;
       }
       if (PrintUnusedExcludeWarning)
         C.getDriver().Diag(diag::warn_drv_unused_argument) << A->getSpelling();
     }
   }
 
-  if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
+  if (TargetTriple.isNVPTX() && !IgnoreSingleLib)
     LibraryList.push_back(
         Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));
 
-  if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
+  if (TargetTriple.isAMDGCN() && !IgnoreSingleLib)
     LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc"));
 
-  if (IgnoreSingleLibs)
+  if (TargetTriple.isNativeCPU() && !IgnoreSingleLib)
+    LibraryList.push_back(Args.MakeArgString("libsycl-nativecpu_utils.bc"));
+
+  if (UseSingleLib)
     return LibraryList;
 
   using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;

@@ -4,21 +4,6 @@
 
 // UNSUPPORTED: system-windows
 
-// Check if internal libraries are still linked against when linkage of all
-// device libs is manually excluded.
-// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
-// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 -fsycl-instrument-device-code %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
-
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
-// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)
-
 // Check that the -fsycl-device-lib flag has no effect when "all" is specified.
 // RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
 // RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \

@@ -4,21 +4,6 @@
 
 // UNSUPPORTED: system-windows
 
-// Check if internal libraries are still linked against when linkage of all
-// device libs is manually excluded.
-// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
-// RUN: -fsycl-targets=nvptx64-nvidia-cuda -fsycl-instrument-device-code %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
-
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
-// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50)
-
 // Check that the -fsycl-device-lib flag has no effect when "all" is specified.
 // RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
 // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \

@@ -37,7 +37,7 @@
 /// Check phases w/out specifying a compute capability.
 // RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
 // RUN: -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all \
-// RUN: -fsycl-instrument-device-code -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
 // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/share/clc/remangled-l32-signed_char.libspirv-nvptx64-nvidia-cuda.bc \
 // RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \
 // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
@@ -54,27 +54,24 @@
 // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
 // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
 // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 13: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50)
-// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object
-// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl)
+// CHK-PHASES-NO-CC: 10: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 11: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 12: linker, {9, 10, 11}, ir, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 13: sycl-post-link, {12}, ir, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 14: file-table-tform, {13}, ir, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 15: backend, {14}, assembler, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 16: assembler, {15}, object, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_50)
+// CHK-PHASES-NO-CC: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {20}, object
+// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)
 //
 /// Check phases specifying a compute capability.
 // RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
 // RUN: -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all \
-// RUN: -fsycl-instrument-device-code -fsycl-targets=nvptx64-nvidia-cuda \
+// RUN: -fsycl-targets=nvptx64-nvidia-cuda \
 // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/share/clc/remangled-l32-signed_char.libspirv-nvptx64-nvidia-cuda.bc \
 // RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \
 // RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \
@@ -92,22 +89,19 @@
 // CHK-PHASES: 7: backend, {6}, assembler, (host-sycl)
 // CHK-PHASES: 8: assembler, {7}, object, (host-sycl)
 // CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35)
-// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35)
-// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35)
-// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35)
-// CHK-PHASES: 13: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_35)
-// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
-// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35)
-// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35)
-// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35)
-// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35)
-// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35)
-// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35)
-// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35)
-// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35)
-// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35)
-// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object
-// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl)
+// CHK-PHASES: 10: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_35)
+// CHK-PHASES: 11: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
+// CHK-PHASES: 12: linker, {9, 10, 11}, ir, (device-sycl, sm_35)
+// CHK-PHASES: 13: sycl-post-link, {12}, ir, (device-sycl, sm_35)
+// CHK-PHASES: 14: file-table-tform, {13}, ir, (device-sycl, sm_35)
+// CHK-PHASES: 15: backend, {14}, assembler, (device-sycl, sm_35)
+// CHK-PHASES: 16: assembler, {15}, object, (device-sycl, sm_35)
+// CHK-PHASES: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_35)
+// CHK-PHASES: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_35)
+// CHK-PHASES: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_35)
+// CHK-PHASES: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_35)
+// CHK-PHASES: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {20}, object
+// CHK-PHASES: 22: linker, {8, 21}, image, (host-sycl)
 
 /// Check calling preprocessor only
 // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \