From 6aeb311f380532c6e0450a49d86bccdbd3ab60d4 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 1 Jul 2022 14:51:48 +0100 Subject: [PATCH 01/36] First commit --- libdevice/cmake/modules/SYCLLibdevice.cmake | 4 ++++ libdevice/device.h | 4 ++-- libdevice/fallback-cmath.cpp | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 6954133d1cb2d..4953aac3c5111 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -22,6 +22,10 @@ string(CONCAT sycl_targets_opt "spir64_fpga-unknown-unknown," "spir64-unknown-unknown") +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") +endif() + set(compile_opts # suppress an error about SYCL_EXTERNAL being used for # a function with a raw pointer parameter. diff --git a/libdevice/device.h b/libdevice/device.h index 2c56794da0b13..0770d7d82d29a 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -15,7 +15,7 @@ #define EXTERN_C #endif // __cplusplus -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #ifdef __SYCL_DEVICE_ONLY__ #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak)) #else // __SYCL_DEVICE_ONLY__ @@ -25,7 +25,7 @@ #define DEVICE_EXTERN_C DEVICE_EXTERNAL EXTERN_C #define DEVICE_EXTERN_C_INLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ #if defined(__SPIR__) || defined(__LIBDEVICE_HOST_IMPL__) #define __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 60f4208f981c0..e458aa1ec95a0 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,6 +8,11 @@ #include "device_math.h" +#ifdef __NVPTX__ +DEVICE_EXTERN_C_INLINE +int __devicelib_abs(int x) { return x < 0 ? -x : x; } +#endif + #ifdef __SPIR__ // To support fallback device libraries on-demand loading, please update the From 1c2436ef0eb06e6842e4d5be98556ba1888a1179 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 4 Jul 2022 18:16:14 +0100 Subject: [PATCH 02/36] Work in Progress --- clang/lib/Driver/Driver.cpp | 21 ++++++++++++--------- clang/lib/Driver/ToolChains/SYCL.cpp | 2 +- clang/lib/Driver/ToolChains/SYCL.h | 1 - libdevice/cmath_wrapper.cpp | 4 ++-- libdevice/fallback-cmath.cpp | 2 +- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 13397cc61bf78..8119087e06572 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4445,6 +4445,8 @@ class OffloadingActionBuilder final { /// targets. SmallVector, 8> GpuArchList; + SYCLInstallationDetector SYCLInstallation; + /// Build the last steps for CUDA after all BC files have been linked. JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( @@ -4478,7 +4480,8 @@ class OffloadingActionBuilder final { public: SYCLActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) - : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {} + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL), + SYCLInstallation(C.getDriver()) {} void withBoundArchForToolChain(const ToolChain *TC, llvm::function_ref Op) { @@ -4826,10 +4829,8 @@ class OffloadingActionBuilder final { } } - const toolchains::SYCLToolChain *SYCLTC = - static_cast(TC); SmallVector, 4> LibLocCandidates; - SYCLTC->SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); + SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); StringRef LibSuffix = isMSVCEnv ? ".obj" : ".o"; using SYCLDeviceLibsList = SmallVector; @@ -4891,7 +4892,7 @@ class OffloadingActionBuilder final { } }; addInputs(sycl_device_wrapper_libs); - if (isSpirvAOT) + if (isSpirvAOT || TC->getTriple().isNVPTX()) addInputs(sycl_device_fallback_libs); if (Args.hasFlag(options::OPT_fsycl_instrument_device_code, options::OPT_fno_sycl_instrument_device_code, true)) @@ -5045,11 +5046,13 @@ class OffloadingActionBuilder final { // When spv online link is supported by all backends, the fallback // device libraries are only needed when current toolchain is using // AOT compilation. - if (isSPIR) { + if (isSPIR || isNVPTX) { bool UseJitLink = - Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, - options::OPT_fno_sycl_device_lib_jit_link, false); - bool UseAOTLink = isSpirvAOT || !UseJitLink; + isSPIR ? Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, + options::OPT_fno_sycl_device_lib_jit_link, + false) + : false; + bool UseAOTLink = isSPIR && (isSpirvAOT || !UseJitLink); SYCLDeviceLibLinked = addSYCLDeviceLibs( TC, FullLinkObjects, UseAOTLink, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment()); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 272462ff2260a..49f66ca311993 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -618,7 +618,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) - : ToolChain(D, Triple, Args), HostTC(HostTC), SYCLInstallation(D) { + : ToolChain(D, Triple, Args), HostTC(HostTC) { // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 8c9bcf56a20bd..7d8ad328ad083 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -174,7 +174,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { const ToolChain &HostTC; - const SYCLInstallationDetector SYCLInstallation; protected: Tool *buildBackendCompiler() const override; diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index fba86584ffca7..09dd84bc99b66 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -147,4 +147,4 @@ float asinhf(float x) { return __devicelib_asinhf(x); } DEVICE_EXTERN_C_INLINE float atanhf(float x) { return __devicelib_atanhf(x); } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index e458aa1ec95a0..1fcc7f9f7e8a2 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -10,7 +10,7 @@ #ifdef __NVPTX__ DEVICE_EXTERN_C_INLINE -int __devicelib_abs(int x) { return x < 0 ? -x : x; } +int __devicelib_abs(int x) { return -999; } // x < 0 ? -x : x; } #endif #ifdef __SPIR__ From 5aff4283f72d49fbbfd97d9f2e488ca58617659e Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 5 Jul 2022 10:27:25 +0100 Subject: [PATCH 03/36] Basic working --- libdevice/cmath_wrapper.cpp | 2 +- libdevice/device_math.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index 09dd84bc99b66..d183a3118ab11 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__NVPTX) +#if defined(__SPIR__) || defined(__NVPTX__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } diff --git a/libdevice/device_math.h b/libdevice/device_math.h index eb86baba931ae..ec7e192d6a4ca 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,7 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #include typedef struct { From 898a637f6d9e1bc1bb28078cbafe49ce3d046712 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 5 Jul 2022 13:05:23 +0100 Subject: [PATCH 04/36] Temporary move things --- libdevice/fallback-cmath.cpp | 160 +++++++++++++- libdevice/include/nv_libdevice.h | 360 +++++++++++++++++++++++++++++++ 2 files changed, 515 insertions(+), 5 deletions(-) create mode 100644 libdevice/include/nv_libdevice.h diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 1fcc7f9f7e8a2..f6410716cafe1 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,11 +8,6 @@ #include "device_math.h" -#ifdef __NVPTX__ -DEVICE_EXTERN_C_INLINE -int __devicelib_abs(int x) { return -999; } // x < 0 ? -x : x; } -#endif - #ifdef __SPIR__ // To support fallback device libraries on-demand loading, please update the @@ -172,3 +167,158 @@ DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } #endif // __SPIR__ + +#ifdef __NVPTX__ +#include "include/nv_libdevice.h" + +DEVICE_EXTERN_C_INLINE +int __devicelib_abs(int x) { return x < 0 ? -x : x; } + +DEVICE_EXTERN_C_INLINE +long int __devicelib_labs(long int x) { return x < 0 ? -x : x; } + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_llabs(long long int x) { return x < 0 ? -x : x; } + +DEVICE_EXTERN_C_INLINE +div_t __devicelib_div(int x, int y) { return {x / y, x % y}; } + +DEVICE_EXTERN_C_INLINE +ldiv_t __devicelib_ldiv(long x, long y) { return {x / y, x % y}; } + +DEVICE_EXTERN_C_INLINE +lldiv_t __devicelib_lldiv(long long x, long long y) { return {x / y, x % y}; } + +DEVICE_EXTERN_C_INLINE +float __devicelib_scalbnf(float x, int n) { return __nv_ldexpf(x, n); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_logf(float x) { return __nv_log(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_expf(float x) { return __nv_exp(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_frexpf(float x, int *exp) { + return __nv_frexpf(x, exp); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_ldexpf(float x, int exp) { return __nv_ldexp(x, exp); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_log10f(float x) { return __nv_log10f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_modff(float x, float *intpart) { + return __nv_modff(x, intpart); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_exp2f(float x) { return __nv_exp2f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_expm1f(float x) { return __nv_expm1f(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_ilogbf(float x) { return __nv_ilogbf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_log1pf(float x) { return __nv_log1pf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_log2f(float x) { return __nv_log2f(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_logbf(float x) { return __nv_logbf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_sqrtf(float x) { return __nv_sqrtf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_cbrtf(float x) { return __nv_cbrtf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_hypotf(float x, float y) { return __nv_hypotf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_erff(float x) { return __nv_erff(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_erfcf(float x) { return __nv_erfcf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_tgammaf(float x) { return __nv_tgammaf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_lgammaf(float x) { return __nv_lgammaf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_fmodf(float x, float y) { return __nv_fmodf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_remainderf(float x, float y) { + return __nv_remainderf(x, y); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_remquof(float x, float y, int *q) { + return __nv_remquof(x, y, q); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_nextafterf(float x, float y) { + return __nv_nextafterf(x, y); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_fdimf(float x, float y) { return __nv_fdimf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_fmaf(float x, float y, float z) { + return __nv_fmaf(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_sinf(float x) { return __nv_sinf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_cosf(float x) { return __nv_cosf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_tanf(float x) { return __nv_tanf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_powf(float x, float y) { return __nv_powf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_acosf(float x) { return __nv_acosf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_asinf(float x) { return __nv_asinf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_atanf(float x) { return __nv_atanf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_atan2f(float x, float y) { return __nv_atan2f(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_coshf(float x) { return __nv_coshf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_sinhf(float x) { return __nv_sinhf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_tanhf(float x) { return __nv_tanhf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_acoshf(float x) { return __nv_acoshf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_asinhf(float x) { return __nv_asinhf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_atanhf(float x) { return __nv_atanhf(x); } + +#endif // __NVPTX__ diff --git a/libdevice/include/nv_libdevice.h b/libdevice/include/nv_libdevice.h new file mode 100644 index 0000000000000..e80dc75334852 --- /dev/null +++ b/libdevice/include/nv_libdevice.h @@ -0,0 +1,360 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_NVIDIACL_LIBDEVICE_H +#define PTX_NVIDIACL_LIBDEVICE_H + +DEVICE_EXTERN_C_INLINE int __nv_abs(int); +DEVICE_EXTERN_C_INLINE double __nv_acos(double); +DEVICE_EXTERN_C_INLINE float __nv_acosf(float); +DEVICE_EXTERN_C_INLINE double __nv_acosh(double); +DEVICE_EXTERN_C_INLINE float __nv_acoshf(float); +DEVICE_EXTERN_C_INLINE double __nv_asin(double); +DEVICE_EXTERN_C_INLINE float __nv_asinf(float); +DEVICE_EXTERN_C_INLINE double __nv_asinh(double); +DEVICE_EXTERN_C_INLINE float __nv_asinhf(float); +DEVICE_EXTERN_C_INLINE double __nv_atan(double); +DEVICE_EXTERN_C_INLINE double __nv_atan2(double, double); +DEVICE_EXTERN_C_INLINE float __nv_atan2f(float, float); +DEVICE_EXTERN_C_INLINE float __nv_atanf(float); +DEVICE_EXTERN_C_INLINE double __nv_atanh(double); +DEVICE_EXTERN_C_INLINE float __nv_atanhf(float); +DEVICE_EXTERN_C_INLINE int __nv_brev(int); +DEVICE_EXTERN_C_INLINE long __nv_brevll(long); +DEVICE_EXTERN_C_INLINE int __nv_byte_perm(int, int, int); +DEVICE_EXTERN_C_INLINE double __nv_cbrt(double); +DEVICE_EXTERN_C_INLINE float __nv_cbrtf(float); +DEVICE_EXTERN_C_INLINE double __nv_ceil(double); +DEVICE_EXTERN_C_INLINE float __nv_ceilf(float); +DEVICE_EXTERN_C_INLINE int __nv_clz(int); +DEVICE_EXTERN_C_INLINE int __nv_clzll(long); +DEVICE_EXTERN_C_INLINE double __nv_copysign(double, double); +DEVICE_EXTERN_C_INLINE float __nv_copysignf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_cos(double); +DEVICE_EXTERN_C_INLINE float __nv_cosf(float); +DEVICE_EXTERN_C_INLINE double __nv_cosh(double); +DEVICE_EXTERN_C_INLINE float __nv_coshf(float); +DEVICE_EXTERN_C_INLINE double __nv_cospi(double); +DEVICE_EXTERN_C_INLINE float __nv_cospif(float); +DEVICE_EXTERN_C_INLINE double __nv_cyl_bessel_i0(double); +DEVICE_EXTERN_C_INLINE float __nv_cyl_bessel_i0f(float); +DEVICE_EXTERN_C_INLINE double __nv_cyl_bessel_i1(double); +DEVICE_EXTERN_C_INLINE float __nv_cyl_bessel_i1f(float); +DEVICE_EXTERN_C_INLINE double __nv_dadd_rd(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dadd_rn(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dadd_ru(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dadd_rz(double, double); +DEVICE_EXTERN_C_INLINE double __nv_ddiv_rd(double, double); +DEVICE_EXTERN_C_INLINE double __nv_ddiv_rn(double, double); +DEVICE_EXTERN_C_INLINE double __nv_ddiv_ru(double, double); +DEVICE_EXTERN_C_INLINE double __nv_ddiv_rz(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dmul_rd(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dmul_rn(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dmul_ru(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dmul_rz(double, double); +DEVICE_EXTERN_C_INLINE float __nv_double2float_rd(double); +DEVICE_EXTERN_C_INLINE float __nv_double2float_rn(double); +DEVICE_EXTERN_C_INLINE float __nv_double2float_ru(double); +DEVICE_EXTERN_C_INLINE float __nv_double2float_rz(double); +DEVICE_EXTERN_C_INLINE int __nv_double2hiint(double); +DEVICE_EXTERN_C_INLINE int __nv_double2int_rd(double); +DEVICE_EXTERN_C_INLINE int __nv_double2int_rn(double); +DEVICE_EXTERN_C_INLINE int __nv_double2int_ru(double); +DEVICE_EXTERN_C_INLINE int __nv_double2int_rz(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ll_rd(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ll_rn(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ll_ru(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ll_rz(double); +DEVICE_EXTERN_C_INLINE int __nv_double2loint(double); +DEVICE_EXTERN_C_INLINE int __nv_double2uint_rd(double); +DEVICE_EXTERN_C_INLINE int __nv_double2uint_rn(double); +DEVICE_EXTERN_C_INLINE int __nv_double2uint_ru(double); +DEVICE_EXTERN_C_INLINE int __nv_double2uint_rz(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ull_rd(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ull_rn(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ull_ru(double); +DEVICE_EXTERN_C_INLINE long __nv_double2ull_rz(double); +DEVICE_EXTERN_C_INLINE long __nv_double_as_longlong(double); +DEVICE_EXTERN_C_INLINE double __nv_drcp_rd(double); +DEVICE_EXTERN_C_INLINE double __nv_drcp_rn(double); +DEVICE_EXTERN_C_INLINE double __nv_drcp_ru(double); +DEVICE_EXTERN_C_INLINE double __nv_drcp_rz(double); +DEVICE_EXTERN_C_INLINE double __nv_dsqrt_rd(double); +DEVICE_EXTERN_C_INLINE double __nv_dsqrt_rn(double); +DEVICE_EXTERN_C_INLINE double __nv_dsqrt_ru(double); +DEVICE_EXTERN_C_INLINE double __nv_dsqrt_rz(double); +DEVICE_EXTERN_C_INLINE double __nv_dsub_rd(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dsub_rn(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dsub_ru(double, double); +DEVICE_EXTERN_C_INLINE double __nv_dsub_rz(double, double); +DEVICE_EXTERN_C_INLINE double __nv_erf(double); +DEVICE_EXTERN_C_INLINE double __nv_erfc(double); +DEVICE_EXTERN_C_INLINE float __nv_erfcf(float); +DEVICE_EXTERN_C_INLINE double __nv_erfcinv(double); +DEVICE_EXTERN_C_INLINE float __nv_erfcinvf(float); +DEVICE_EXTERN_C_INLINE double __nv_erfcx(double); +DEVICE_EXTERN_C_INLINE float __nv_erfcxf(float); +DEVICE_EXTERN_C_INLINE float __nv_erff(float); +DEVICE_EXTERN_C_INLINE double __nv_erfinv(double); +DEVICE_EXTERN_C_INLINE float __nv_erfinvf(float); +DEVICE_EXTERN_C_INLINE double __nv_exp(double); +DEVICE_EXTERN_C_INLINE double __nv_exp10(double); +DEVICE_EXTERN_C_INLINE float __nv_exp10f(float); +DEVICE_EXTERN_C_INLINE double __nv_exp2(double); +DEVICE_EXTERN_C_INLINE float __nv_exp2f(float); +DEVICE_EXTERN_C_INLINE float __nv_expf(float); +DEVICE_EXTERN_C_INLINE double __nv_expm1(double); +DEVICE_EXTERN_C_INLINE float __nv_expm1f(float); +DEVICE_EXTERN_C_INLINE double __nv_fabs(double); +DEVICE_EXTERN_C_INLINE float __nv_fabsf(float); +DEVICE_EXTERN_C_INLINE float __nv_fadd_rd(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fadd_rn(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fadd_ru(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fadd_rz(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fast_cosf(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_exp10f(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_expf(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_fdividef(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fast_log10f(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_log2f(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_logf(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_powf(float, float); +DEVICE_EXTERN_C_INLINE void __nv_fast_sincosf(float, float *, float *); +DEVICE_EXTERN_C_INLINE float __nv_fast_sinf(float); +DEVICE_EXTERN_C_INLINE float __nv_fast_tanf(float); +DEVICE_EXTERN_C_INLINE double __nv_fdim(double, double); +DEVICE_EXTERN_C_INLINE float __nv_fdimf(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fdiv_rd(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fdiv_rn(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fdiv_ru(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fdiv_rz(float, float); +DEVICE_EXTERN_C_INLINE int __nv_ffs(int); +DEVICE_EXTERN_C_INLINE int __nv_ffsll(long); +DEVICE_EXTERN_C_INLINE int __nv_finitef(float); +DEVICE_EXTERN_C_INLINE short __nv_float2half_rn(float); +DEVICE_EXTERN_C_INLINE int __nv_float2int_rd(float); +DEVICE_EXTERN_C_INLINE int __nv_float2int_rn(float); +DEVICE_EXTERN_C_INLINE int __nv_float2int_ru(float); +DEVICE_EXTERN_C_INLINE int __nv_float2int_rz(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ll_rd(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ll_rn(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ll_ru(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ll_rz(float); +DEVICE_EXTERN_C_INLINE int __nv_float2uint_rd(float); +DEVICE_EXTERN_C_INLINE int __nv_float2uint_rn(float); +DEVICE_EXTERN_C_INLINE int __nv_float2uint_ru(float); +DEVICE_EXTERN_C_INLINE int __nv_float2uint_rz(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ull_rd(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ull_rn(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ull_ru(float); +DEVICE_EXTERN_C_INLINE long __nv_float2ull_rz(float); +DEVICE_EXTERN_C_INLINE int __nv_float_as_int(float); +DEVICE_EXTERN_C_INLINE int __nv_float_as_uint(float); +DEVICE_EXTERN_C_INLINE double __nv_floor(double); +DEVICE_EXTERN_C_INLINE float __nv_floorf(float); +DEVICE_EXTERN_C_INLINE double __nv_fma(double, double, double); +DEVICE_EXTERN_C_INLINE double __nv_fma_rd(double, double, double); +DEVICE_EXTERN_C_INLINE double __nv_fma_rn(double, double, double); +DEVICE_EXTERN_C_INLINE double __nv_fma_ru(double, double, double); +DEVICE_EXTERN_C_INLINE double __nv_fma_rz(double, double, double); +DEVICE_EXTERN_C_INLINE float __nv_fmaf(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_rd(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_rn(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_ru(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_rz(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_rd(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_rn(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_ru(float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmaf_rz(float, float, float); +DEVICE_EXTERN_C_INLINE double __nv_fmax(double, double); +DEVICE_EXTERN_C_INLINE float __nv_fmaxf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_fmin(double, double); +DEVICE_EXTERN_C_INLINE float __nv_fminf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_fmod(double, double); +DEVICE_EXTERN_C_INLINE float __nv_fmodf(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmul_rd(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmul_rn(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmul_ru(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fmul_rz(float, float); +DEVICE_EXTERN_C_INLINE float __nv_frcp_rd(float); +DEVICE_EXTERN_C_INLINE float __nv_frcp_rn(float); +DEVICE_EXTERN_C_INLINE float __nv_frcp_ru(float); +DEVICE_EXTERN_C_INLINE float __nv_frcp_rz(float); +DEVICE_EXTERN_C_INLINE double __nv_frexp(double, int *); +DEVICE_EXTERN_C_INLINE float __nv_frexpf(float, int *); +DEVICE_EXTERN_C_INLINE float __nv_frsqrt_rn(float); +DEVICE_EXTERN_C_INLINE float __nv_fsqrt_rd(float); +DEVICE_EXTERN_C_INLINE float __nv_fsqrt_rn(float); +DEVICE_EXTERN_C_INLINE float __nv_fsqrt_ru(float); +DEVICE_EXTERN_C_INLINE float __nv_fsqrt_rz(float); +DEVICE_EXTERN_C_INLINE float __nv_fsub_rd(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fsub_rn(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fsub_ru(float, float); +DEVICE_EXTERN_C_INLINE float __nv_fsub_rz(float, float); +DEVICE_EXTERN_C_INLINE int __nv_hadd(int, int); +DEVICE_EXTERN_C_INLINE float __nv_half2float(short); +DEVICE_EXTERN_C_INLINE double __nv_hiloint2double(int, int); +DEVICE_EXTERN_C_INLINE double __nv_hypot(double, double); +DEVICE_EXTERN_C_INLINE float __nv_hypotf(float, float); +DEVICE_EXTERN_C_INLINE int __nv_ilogb(double); +DEVICE_EXTERN_C_INLINE int __nv_ilogbf(float); +DEVICE_EXTERN_C_INLINE double __nv_int2double_rn(int); +DEVICE_EXTERN_C_INLINE float __nv_int2float_rd(int); +DEVICE_EXTERN_C_INLINE float __nv_int2float_rn(int); +DEVICE_EXTERN_C_INLINE float __nv_int2float_ru(int); +DEVICE_EXTERN_C_INLINE float __nv_int2float_rz(int); +DEVICE_EXTERN_C_INLINE float __nv_int_as_float(int); +DEVICE_EXTERN_C_INLINE int __nv_isfinited(double); +DEVICE_EXTERN_C_INLINE int __nv_isinfd(double); +DEVICE_EXTERN_C_INLINE int __nv_isinff(float); +DEVICE_EXTERN_C_INLINE int __nv_isnand(double); +DEVICE_EXTERN_C_INLINE int __nv_isnanf(float); +DEVICE_EXTERN_C_INLINE double __nv_j0(double); +DEVICE_EXTERN_C_INLINE float __nv_j0f(float); +DEVICE_EXTERN_C_INLINE double __nv_j1(double); +DEVICE_EXTERN_C_INLINE float __nv_j1f(float); +DEVICE_EXTERN_C_INLINE double __nv_jn(int, double); +DEVICE_EXTERN_C_INLINE float __nv_jnf(int, float); +DEVICE_EXTERN_C_INLINE double __nv_ldexp(double, int); +DEVICE_EXTERN_C_INLINE float __nv_ldexpf(float, int); +DEVICE_EXTERN_C_INLINE double __nv_lgamma(double); +DEVICE_EXTERN_C_INLINE float __nv_lgammaf(float); +DEVICE_EXTERN_C_INLINE double __nv_ll2double_rd(long); +DEVICE_EXTERN_C_INLINE double __nv_ll2double_rn(long); +DEVICE_EXTERN_C_INLINE double __nv_ll2double_ru(long); +DEVICE_EXTERN_C_INLINE double __nv_ll2double_rz(long); +DEVICE_EXTERN_C_INLINE float __nv_ll2float_rd(long); +DEVICE_EXTERN_C_INLINE float __nv_ll2float_rn(long); +DEVICE_EXTERN_C_INLINE float __nv_ll2float_ru(long); +DEVICE_EXTERN_C_INLINE float __nv_ll2float_rz(long); +DEVICE_EXTERN_C_INLINE long __nv_llabs(long); +DEVICE_EXTERN_C_INLINE long __nv_llmax(long, long); +DEVICE_EXTERN_C_INLINE long __nv_llmin(long, long); +DEVICE_EXTERN_C_INLINE long __nv_llrint(double); +DEVICE_EXTERN_C_INLINE long __nv_llrintf(float); +DEVICE_EXTERN_C_INLINE long __nv_llround(double); +DEVICE_EXTERN_C_INLINE long __nv_llroundf(float); +DEVICE_EXTERN_C_INLINE double __nv_log(double); +DEVICE_EXTERN_C_INLINE double __nv_log10(double); +DEVICE_EXTERN_C_INLINE float __nv_log10f(float); +DEVICE_EXTERN_C_INLINE double __nv_log1p(double); +DEVICE_EXTERN_C_INLINE float __nv_log1pf(float); +DEVICE_EXTERN_C_INLINE double __nv_log2(double); +DEVICE_EXTERN_C_INLINE float __nv_log2f(float); +DEVICE_EXTERN_C_INLINE double __nv_logb(double); +DEVICE_EXTERN_C_INLINE float __nv_logbf(float); +DEVICE_EXTERN_C_INLINE float __nv_logf(float); +DEVICE_EXTERN_C_INLINE double __nv_longlong_as_double(long); +DEVICE_EXTERN_C_INLINE int __nv_max(int, int); +DEVICE_EXTERN_C_INLINE int __nv_min(int, int); +DEVICE_EXTERN_C_INLINE double __nv_modf(double, double *); +DEVICE_EXTERN_C_INLINE float __nv_modff(float, float *); +DEVICE_EXTERN_C_INLINE int __nv_mul24(int, int); +DEVICE_EXTERN_C_INLINE long __nv_mul64hi(long, long); +DEVICE_EXTERN_C_INLINE int __nv_mulhi(int, int); +DEVICE_EXTERN_C_INLINE double __nv_nan(char *); +DEVICE_EXTERN_C_INLINE float __nv_nanf(char *); +DEVICE_EXTERN_C_INLINE double __nv_nearbyint(double); +DEVICE_EXTERN_C_INLINE float __nv_nearbyintf(float); +DEVICE_EXTERN_C_INLINE double __nv_nextafter(double, double); +DEVICE_EXTERN_C_INLINE float __nv_nextafterf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_norm(int, double *); +DEVICE_EXTERN_C_INLINE double __nv_norm3d(double, double, double); +DEVICE_EXTERN_C_INLINE float __nv_norm3df(float, float, float); +DEVICE_EXTERN_C_INLINE double __nv_norm4d(double, double, double, double); +DEVICE_EXTERN_C_INLINE float __nv_norm4df(float, float, float, float); +DEVICE_EXTERN_C_INLINE double __nv_normcdf(double); +DEVICE_EXTERN_C_INLINE float __nv_normcdff(float); +DEVICE_EXTERN_C_INLINE double __nv_normcdfinv(double); +DEVICE_EXTERN_C_INLINE float __nv_normcdfinvf(float); +DEVICE_EXTERN_C_INLINE float __nv_normf(int, float *); +DEVICE_EXTERN_C_INLINE int __nv_popc(int); +DEVICE_EXTERN_C_INLINE int __nv_popcll(long); +DEVICE_EXTERN_C_INLINE double __nv_pow(double, double); +DEVICE_EXTERN_C_INLINE float __nv_powf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_powi(double, int); +DEVICE_EXTERN_C_INLINE float __nv_powif(float, int); +DEVICE_EXTERN_C_INLINE double __nv_rcbrt(double); +DEVICE_EXTERN_C_INLINE float __nv_rcbrtf(float); +DEVICE_EXTERN_C_INLINE double __nv_rcp64h(double); +DEVICE_EXTERN_C_INLINE double __nv_remainder(double, double); +DEVICE_EXTERN_C_INLINE float __nv_remainderf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_remquo(double, double, int *); +DEVICE_EXTERN_C_INLINE float __nv_remquof(float, float, int *); +DEVICE_EXTERN_C_INLINE int __nv_rhadd(int, int); +DEVICE_EXTERN_C_INLINE double __nv_rhypot(double, double); +DEVICE_EXTERN_C_INLINE float __nv_rhypotf(float, float); +DEVICE_EXTERN_C_INLINE double __nv_rint(double); +DEVICE_EXTERN_C_INLINE float __nv_rintf(float); +DEVICE_EXTERN_C_INLINE double __nv_rnorm(int, double *); +DEVICE_EXTERN_C_INLINE double __nv_rnorm3d(double, double, double); +DEVICE_EXTERN_C_INLINE float __nv_rnorm3df(float, float, float); +DEVICE_EXTERN_C_INLINE double __nv_rnorm4d(double, double, double, double); +DEVICE_EXTERN_C_INLINE float __nv_rnorm4df(float, float, float, float); +DEVICE_EXTERN_C_INLINE float __nv_rnormf(int, float *); +DEVICE_EXTERN_C_INLINE double __nv_round(double); +DEVICE_EXTERN_C_INLINE float __nv_roundf(float); +DEVICE_EXTERN_C_INLINE double __nv_rsqrt(double); +DEVICE_EXTERN_C_INLINE float __nv_rsqrtf(float); +DEVICE_EXTERN_C_INLINE int __nv_sad(int, int, int); +DEVICE_EXTERN_C_INLINE float __nv_saturatef(float); +DEVICE_EXTERN_C_INLINE double __nv_scalbn(double, int); +DEVICE_EXTERN_C_INLINE float __nv_scalbnf(float, int); +DEVICE_EXTERN_C_INLINE int __nv_signbitd(double); +DEVICE_EXTERN_C_INLINE int __nv_signbitf(float); +DEVICE_EXTERN_C_INLINE double __nv_sin(double); +DEVICE_EXTERN_C_INLINE void __nv_sincos(double, double *, double *); +DEVICE_EXTERN_C_INLINE void __nv_sincosf(float, float *, float *); +DEVICE_EXTERN_C_INLINE void __nv_sincospi(double, double *, double *); +DEVICE_EXTERN_C_INLINE void __nv_sincospif(float, float *, float *); +DEVICE_EXTERN_C_INLINE float __nv_sinf(float); +DEVICE_EXTERN_C_INLINE double __nv_sinh(double); +DEVICE_EXTERN_C_INLINE float __nv_sinhf(float); +DEVICE_EXTERN_C_INLINE double __nv_sinpi(double); +DEVICE_EXTERN_C_INLINE float __nv_sinpif(float); +DEVICE_EXTERN_C_INLINE double __nv_sqrt(double); +DEVICE_EXTERN_C_INLINE float __nv_sqrtf(float); +DEVICE_EXTERN_C_INLINE double __nv_tan(double); +DEVICE_EXTERN_C_INLINE float __nv_tanf(float); +DEVICE_EXTERN_C_INLINE double __nv_tanh(double); +DEVICE_EXTERN_C_INLINE float __nv_tanhf(float); +DEVICE_EXTERN_C_INLINE double __nv_tgamma(double); +DEVICE_EXTERN_C_INLINE float __nv_tgammaf(float); +DEVICE_EXTERN_C_INLINE double __nv_trunc(double); +DEVICE_EXTERN_C_INLINE float __nv_truncf(float); +DEVICE_EXTERN_C_INLINE int __nv_uhadd(int, int); +DEVICE_EXTERN_C_INLINE double __nv_uint2double_rn(int); +DEVICE_EXTERN_C_INLINE float __nv_uint2float_rd(int); +DEVICE_EXTERN_C_INLINE float __nv_uint2float_rn(int); +DEVICE_EXTERN_C_INLINE float __nv_uint2float_ru(int); +DEVICE_EXTERN_C_INLINE float __nv_uint2float_rz(int); +DEVICE_EXTERN_C_INLINE float __nv_uint_as_float(int); +DEVICE_EXTERN_C_INLINE double __nv_ull2double_rd(long); +DEVICE_EXTERN_C_INLINE double __nv_ull2double_rn(long); +DEVICE_EXTERN_C_INLINE double __nv_ull2double_ru(long); +DEVICE_EXTERN_C_INLINE double __nv_ull2double_rz(long); +DEVICE_EXTERN_C_INLINE float __nv_ull2float_rd(long); +DEVICE_EXTERN_C_INLINE float __nv_ull2float_rn(long); +DEVICE_EXTERN_C_INLINE float __nv_ull2float_ru(long); +DEVICE_EXTERN_C_INLINE float __nv_ull2float_rz(long); +DEVICE_EXTERN_C_INLINE long __nv_ullmax(long, long); +DEVICE_EXTERN_C_INLINE long __nv_ullmin(long, long); +DEVICE_EXTERN_C_INLINE int __nv_umax(int, int); +DEVICE_EXTERN_C_INLINE int __nv_umin(int, int); +DEVICE_EXTERN_C_INLINE int __nv_umul24(int, int); +DEVICE_EXTERN_C_INLINE long __nv_umul64hi(long, long); +DEVICE_EXTERN_C_INLINE int __nv_umulhi(int, int); +DEVICE_EXTERN_C_INLINE int __nv_urhadd(int, int); +DEVICE_EXTERN_C_INLINE int __nv_usad(int, int, int); +DEVICE_EXTERN_C_INLINE double __nv_y0(double); +DEVICE_EXTERN_C_INLINE float __nv_y0f(float); +DEVICE_EXTERN_C_INLINE double __nv_y1(double); +DEVICE_EXTERN_C_INLINE float __nv_y1f(float); +DEVICE_EXTERN_C_INLINE double __nv_yn(int, double); +DEVICE_EXTERN_C_INLINE float __nv_ynf(int, float); +#endif From 1b4afb77c1c28eed1910b9a32b5f74292d2589a6 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 6 Jul 2022 09:08:15 +0100 Subject: [PATCH 05/36] Update cmake --- libdevice/cmake/modules/SYCLLibdevice.cmake | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 4953aac3c5111..547b7deb370c9 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -22,9 +22,6 @@ string(CONCAT sycl_targets_opt "spir64_fpga-unknown-unknown," "spir64-unknown-unknown") -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") -endif() set(compile_opts # suppress an error about SYCL_EXTERNAL being used for @@ -36,6 +33,11 @@ set(compile_opts -sycl-std=2020 ) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") + list(APPEND compile_opts "-fno-sycl-libspirv") +endif() + if (WIN32) list(APPEND compile_opts -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) list(APPEND compile_opts -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) From 9fa9bbae3f25b13ae2073bfc08701bb6bbbafdb7 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 6 Jul 2022 15:20:01 +0100 Subject: [PATCH 06/36] Add compiler flag --- clang/include/clang/Basic/CodeGenOptions.def | 2 ++ clang/include/clang/Driver/Options.td | 7 +++++++ clang/lib/Driver/ToolChains/Clang.cpp | 3 ++- libdevice/cmake/modules/SYCLLibdevice.cmake | 4 +++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index d564b5737f67a..4585253657938 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -127,6 +127,8 @@ CODEGENOPT(XRayAlwaysEmitTypedEvents , 1, 0) ///< Set when -fxray-ignore-loops is enabled. CODEGENOPT(XRayIgnoreLoops , 1, 0) +CODEGENOPT(BundleNoOffloadArch, 1, 0) ///< Set when -fbundle-no-offload-arch is enabled. + ///< Set with -fno-xray-function-index to omit the index section. CODEGENOPT(XRayOmitFunctionIndex , 1, 0) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8dcc26f02eef7..f1e7c7aafddb4 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -952,6 +952,13 @@ def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOpt HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>, HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; +def fbundle_no_offload_arch : Flag<["-"], "fbundle-no-offload-arch">, Flags<[CC1Option]>, + HelpText<"Specify that the offload bundler should not identify a bundle with " + "any specific arch. When used the bundle for, for instance, " + "`nvptx64-nvidia-cuda-sm_80` becomes instead `nvptx64-nvidia-cuda`. " + "This allows .o files to contain .bc bundles that are unspecific " + "to a particular arch version.">, + MarshallingInfoFlag>; def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>, HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target ID in the form of a " "device architecture followed by target ID features delimited by a colon. Each target ID feature " diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 43a1ebef50f44..10822a4e911aa 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8673,7 +8673,8 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, Triples += CurTC->getTriple().normalize(); if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP || CurKind == Action::OFK_Cuda || CurKind == Action::OFK_SYCL) && - !StringRef(CurDep->getOffloadingArch()).empty()) { + !StringRef(CurDep->getOffloadingArch()).empty() && + !TCArgs.hasArg(options::OPT_fbundle_no_offload_arch)) { Triples += '-'; Triples += CurDep->getOffloadingArch(); } diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 547b7deb370c9..cd9e86d7dc660 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -35,7 +35,9 @@ set(compile_opts if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") - list(APPEND compile_opts "-fno-sycl-libspirv") + list(APPEND compile_opts + "-fno-sycl-libspirv" + "-fbundle-no-offload-arch") endif() if (WIN32) From 9daef09a4fa626b2d4c77cd39ec81a44b294ef4a Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 8 Jul 2022 10:04:37 +0100 Subject: [PATCH 07/36] WIP --- clang/lib/Driver/Driver.cpp | 17 ++++++++++++++--- clang/lib/Driver/ToolChains/Clang.cpp | 3 ++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8119087e06572..26cb695d2371e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1,3 +1,4 @@ +#include //===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -4776,7 +4777,7 @@ class OffloadingActionBuilder final { llvm::zip(SYCLDeviceActions, SYCLTargetInfoList)) { Action *A = std::get<0>(TargetActionInfo); DeviceTargetInfo &TargetInfo = std::get<1>(TargetActionInfo); - + OffloadAction::DeviceDependences Dep; Dep.add(*A, *TargetInfo.TC, TargetInfo.BoundArch, Action::OFK_SYCL); AL.push_back(C.MakeAction(Dep, A->getType())); @@ -4883,8 +4884,17 @@ class OffloadingActionBuilder final { auto *SYCLDeviceLibsUnbundleAction = C.MakeAction( SYCLDeviceLibsInputAction); - addDeviceDepences(SYCLDeviceLibsUnbundleAction); - DeviceLinkObjects.push_back(SYCLDeviceLibsUnbundleAction); + + OffloadAction::DeviceDependences Dep; + //Dep.add(*SYCLDeviceLibsInputAction, *TC, /*BoundArch=*/nullptr, + // Action::OFK_SYCL); + Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/nullptr, + Action::OFK_SYCL); + auto *SYCLDeviceLibsDependenciesAction = + C.MakeAction( + Dep, SYCLDeviceLibsUnbundleAction->getType()); + + DeviceLinkObjects.push_back(SYCLDeviceLibsDependenciesAction); if (!LibLocSelected) LibLocSelected = !LibLocSelected; } @@ -7778,6 +7788,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( // be returned for the current depending action. std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + std::cerr << "ActionTC.second " << ActionTC.second << std::endl; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); Result = CachedResults[ActionTC].front(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 10822a4e911aa..371eed013856f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8854,7 +8854,8 @@ void OffloadBundler::ConstructJobMultipleOutputs( Dep.DependentOffloadKind == Action::OFK_OpenMP || Dep.DependentOffloadKind == Action::OFK_Cuda || Dep.DependentOffloadKind == Action::OFK_SYCL) && - !Dep.DependentBoundArch.empty()) { + !Dep.DependentBoundArch.empty() && + !TCArgs.hasArg(options::OPT_fbundle_no_offload_arch)) { Triples += '-'; Triples += Dep.DependentBoundArch; } From 19a2aa721915f42bfc0e46c477c26445091eabd5 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 15 Jul 2022 10:19:51 +0100 Subject: [PATCH 08/36] Working unbundle --- clang/lib/Driver/Driver.cpp | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 26cb695d2371e..5328d10127d82 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3479,7 +3479,8 @@ class OffloadingActionBuilder final { /// Update the state to include the provided host action \a HostAction as a /// dependency of the current device action. By default it is inactive. - virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) { + virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction, + const bool UseArch=true) { return ABRT_Inactive; } @@ -3573,7 +3574,8 @@ class OffloadingActionBuilder final { Action::OffloadKind OFKind) : DeviceActionBuilder(C, Args, Inputs, OFKind) {} - ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + ActionBuilderReturnCode addDeviceDepences(Action *HostAction, + const bool UseArch) override { // While generating code for CUDA, we only depend on the host input action // to trigger the creation of all the CUDA device actions. @@ -4244,7 +4246,8 @@ class OffloadingActionBuilder final { return ABRT_Success; } - ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + ActionBuilderReturnCode + addDeviceDepences(Action *HostAction, const bool UseArch = true) override { // If this is an input action replicate it for each OpenMP toolchain. if (auto *IA = dyn_cast(HostAction)) { @@ -4680,7 +4683,7 @@ class OffloadingActionBuilder final { return ABRT_Success; } - ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + ActionBuilderReturnCode addDeviceDepences(Action *HostAction, const bool UseArch=true) override { // If this is an input action replicate it for each SYCL toolchain. if (auto *IA = dyn_cast(HostAction)) { @@ -4740,7 +4743,8 @@ class OffloadingActionBuilder final { // Create 1 device action per triple/bound arch for (auto &TargetInfo : SYCLTargetInfoList) { SYCLDeviceActions.push_back(UA); - UA->registerDependentActionInfo(TargetInfo.TC, TargetInfo.BoundArch, + const auto *Arch = UseArch ? TargetInfo.BoundArch : nullptr; + UA->registerDependentActionInfo(TargetInfo.TC, Arch, Action::OFK_SYCL); } return ABRT_Success; @@ -4884,17 +4888,8 @@ class OffloadingActionBuilder final { auto *SYCLDeviceLibsUnbundleAction = C.MakeAction( SYCLDeviceLibsInputAction); - - OffloadAction::DeviceDependences Dep; - //Dep.add(*SYCLDeviceLibsInputAction, *TC, /*BoundArch=*/nullptr, - // Action::OFK_SYCL); - Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/nullptr, - Action::OFK_SYCL); - auto *SYCLDeviceLibsDependenciesAction = - C.MakeAction( - Dep, SYCLDeviceLibsUnbundleAction->getType()); - - DeviceLinkObjects.push_back(SYCLDeviceLibsDependenciesAction); + addDeviceDepences(SYCLDeviceLibsUnbundleAction, false); + DeviceLinkObjects.push_back(SYCLDeviceLibsUnbundleAction); if (!LibLocSelected) LibLocSelected = !LibLocSelected; } @@ -7788,8 +7783,12 @@ InputInfoList Driver::BuildJobsForActionNoCache( // be returned for the current depending action. std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; - std::cerr << "ActionTC.second " << ActionTC.second << std::endl; - assert(CachedResults.find(ActionTC) != CachedResults.end() && + + if (CachedResults.find(ActionTC) == CachedResults.end()) + ActionTC = { + A, GetTriplePlusArchString(TC, "", TargetDeviceOffloadKind)}; + + assert((CachedResults.find(ActionTC) != CachedResults.end()) && "Result does not exist??"); Result = CachedResults[ActionTC].front(); } From 36558314d034bb09e6aa9be14d627762d4e87a22 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 15 Jul 2022 10:33:00 +0100 Subject: [PATCH 09/36] Some tidying --- clang/lib/Driver/Driver.cpp | 19 +++++++++---------- clang/lib/Driver/ToolChains/SYCL.cpp | 2 +- clang/lib/Driver/ToolChains/SYCL.h | 1 + libdevice/cmake/modules/SYCLLibdevice.cmake | 1 - 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5328d10127d82..14a3ee34b9a26 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1,4 +1,3 @@ -#include //===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -3574,8 +3573,8 @@ class OffloadingActionBuilder final { Action::OffloadKind OFKind) : DeviceActionBuilder(C, Args, Inputs, OFKind) {} - ActionBuilderReturnCode addDeviceDepences(Action *HostAction, - const bool UseArch) override { + ActionBuilderReturnCode + addDeviceDepences(Action *HostAction, const bool UseArch = true) override { // While generating code for CUDA, we only depend on the host input action // to trigger the creation of all the CUDA device actions. @@ -4449,8 +4448,6 @@ class OffloadingActionBuilder final { /// targets. SmallVector, 8> GpuArchList; - SYCLInstallationDetector SYCLInstallation; - /// Build the last steps for CUDA after all BC files have been linked. JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( @@ -4484,8 +4481,7 @@ class OffloadingActionBuilder final { public: SYCLActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) - : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL), - SYCLInstallation(C.getDriver()) {} + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {} void withBoundArchForToolChain(const ToolChain *TC, llvm::function_ref Op) { @@ -4683,7 +4679,8 @@ class OffloadingActionBuilder final { return ABRT_Success; } - ActionBuilderReturnCode addDeviceDepences(Action *HostAction, const bool UseArch=true) override { + ActionBuilderReturnCode + addDeviceDepences(Action *HostAction, const bool UseArch = true) override { // If this is an input action replicate it for each SYCL toolchain. if (auto *IA = dyn_cast(HostAction)) { @@ -4781,7 +4778,7 @@ class OffloadingActionBuilder final { llvm::zip(SYCLDeviceActions, SYCLTargetInfoList)) { Action *A = std::get<0>(TargetActionInfo); DeviceTargetInfo &TargetInfo = std::get<1>(TargetActionInfo); - + OffloadAction::DeviceDependences Dep; Dep.add(*A, *TargetInfo.TC, TargetInfo.BoundArch, Action::OFK_SYCL); AL.push_back(C.MakeAction(Dep, A->getType())); @@ -4834,8 +4831,10 @@ class OffloadingActionBuilder final { } } + const toolchains::SYCLToolChain *SYCLTC = + static_cast(TC); SmallVector, 4> LibLocCandidates; - SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); + SYCLTC->SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); StringRef LibSuffix = isMSVCEnv ? ".obj" : ".o"; using SYCLDeviceLibsList = SmallVector; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 49f66ca311993..272462ff2260a 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -618,7 +618,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) - : ToolChain(D, Triple, Args), HostTC(HostTC) { + : ToolChain(D, Triple, Args), HostTC(HostTC), SYCLInstallation(D) { // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 7d8ad328ad083..8c9bcf56a20bd 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -174,6 +174,7 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { const ToolChain &HostTC; + const SYCLInstallationDetector SYCLInstallation; protected: Tool *buildBackendCompiler() const override; diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index cd9e86d7dc660..a18bcd7aff5ef 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -22,7 +22,6 @@ string(CONCAT sycl_targets_opt "spir64_fpga-unknown-unknown," "spir64-unknown-unknown") - set(compile_opts # suppress an error about SYCL_EXTERNAL being used for # a function with a raw pointer parameter. From f668efb27eb1838987de620dc35d25c82ad83437 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 15 Jul 2022 11:25:13 +0100 Subject: [PATCH 10/36] Moving SYCLInstallationDetector from SYCL toolchain to SYCLActionBuilder. This is because the SYCLToolChain is only used for SPIR-V backends --- clang/lib/Driver/Driver.cpp | 10 ++++++---- clang/lib/Driver/ToolChains/SYCL.cpp | 2 +- clang/lib/Driver/ToolChains/SYCL.h | 1 - 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 14a3ee34b9a26..01724ddf3fa53 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4448,6 +4448,9 @@ class OffloadingActionBuilder final { /// targets. SmallVector, 8> GpuArchList; + // SYCLInstallation is needed in order to link SYCLDeviceLibs + SYCLInstallationDetector SYCLInstallation; + /// Build the last steps for CUDA after all BC files have been linked. JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( @@ -4481,7 +4484,8 @@ class OffloadingActionBuilder final { public: SYCLActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) - : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {} + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL), + SYCLInstallation(C.getDriver()) {} void withBoundArchForToolChain(const ToolChain *TC, llvm::function_ref Op) { @@ -4831,10 +4835,8 @@ class OffloadingActionBuilder final { } } - const toolchains::SYCLToolChain *SYCLTC = - static_cast(TC); SmallVector, 4> LibLocCandidates; - SYCLTC->SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); + SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); StringRef LibSuffix = isMSVCEnv ? ".obj" : ".o"; using SYCLDeviceLibsList = SmallVector; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 272462ff2260a..49f66ca311993 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -618,7 +618,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) - : ToolChain(D, Triple, Args), HostTC(HostTC), SYCLInstallation(D) { + : ToolChain(D, Triple, Args), HostTC(HostTC) { // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 8c9bcf56a20bd..7d8ad328ad083 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -174,7 +174,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { const ToolChain &HostTC; - const SYCLInstallationDetector SYCLInstallation; protected: Tool *buildBackendCompiler() const override; From ecde47d860843d4c098b14e7facd1628e2a78b02 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 19 Jul 2022 10:08:16 +0100 Subject: [PATCH 11/36] Adding dependencies for the unbundle action using registerDependentActionInfo --- clang/lib/Driver/Driver.cpp | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 01724ddf3fa53..dfd15fc2f9ee1 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3478,8 +3478,7 @@ class OffloadingActionBuilder final { /// Update the state to include the provided host action \a HostAction as a /// dependency of the current device action. By default it is inactive. - virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction, - const bool UseArch=true) { + virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) { return ABRT_Inactive; } @@ -3573,8 +3572,7 @@ class OffloadingActionBuilder final { Action::OffloadKind OFKind) : DeviceActionBuilder(C, Args, Inputs, OFKind) {} - ActionBuilderReturnCode - addDeviceDepences(Action *HostAction, const bool UseArch = true) override { + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { // While generating code for CUDA, we only depend on the host input action // to trigger the creation of all the CUDA device actions. @@ -4246,7 +4244,7 @@ class OffloadingActionBuilder final { } ActionBuilderReturnCode - addDeviceDepences(Action *HostAction, const bool UseArch = true) override { + addDeviceDepences(Action *HostAction) override { // If this is an input action replicate it for each OpenMP toolchain. if (auto *IA = dyn_cast(HostAction)) { @@ -4444,13 +4442,13 @@ class OffloadingActionBuilder final { /// List of static archives to extract FPGA dependency info from ActionList FPGAArchiveInputs; + // SYCLInstallation is needed in order to link SYCLDeviceLibs + SYCLInstallationDetector SYCLInstallation; + /// List of GPU architectures to use in this compilation with NVPTX/AMDGCN /// targets. SmallVector, 8> GpuArchList; - // SYCLInstallation is needed in order to link SYCLDeviceLibs - SYCLInstallationDetector SYCLInstallation; - /// Build the last steps for CUDA after all BC files have been linked. JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( @@ -4683,8 +4681,7 @@ class OffloadingActionBuilder final { return ABRT_Success; } - ActionBuilderReturnCode - addDeviceDepences(Action *HostAction, const bool UseArch = true) override { + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { // If this is an input action replicate it for each SYCL toolchain. if (auto *IA = dyn_cast(HostAction)) { @@ -4744,8 +4741,7 @@ class OffloadingActionBuilder final { // Create 1 device action per triple/bound arch for (auto &TargetInfo : SYCLTargetInfoList) { SYCLDeviceActions.push_back(UA); - const auto *Arch = UseArch ? TargetInfo.BoundArch : nullptr; - UA->registerDependentActionInfo(TargetInfo.TC, Arch, + UA->registerDependentActionInfo(TargetInfo.TC, TargetInfo.BoundArch, Action::OFK_SYCL); } return ABRT_Success; @@ -4889,8 +4885,16 @@ class OffloadingActionBuilder final { auto *SYCLDeviceLibsUnbundleAction = C.MakeAction( SYCLDeviceLibsInputAction); - addDeviceDepences(SYCLDeviceLibsUnbundleAction, false); - DeviceLinkObjects.push_back(SYCLDeviceLibsUnbundleAction); + SYCLDeviceLibsUnbundleAction->registerDependentActionInfo( + TC, /*BoundArch=*/"", Action::OFK_SYCL); + OffloadAction::DeviceDependences Dep; + Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/nullptr, + Action::OFK_SYCL); + auto *SYCLDeviceLibsDependenciesAction = + C.MakeAction( + Dep, SYCLDeviceLibsUnbundleAction->getType()); + + DeviceLinkObjects.push_back(SYCLDeviceLibsDependenciesAction); if (!LibLocSelected) LibLocSelected = !LibLocSelected; } @@ -7784,11 +7788,11 @@ InputInfoList Driver::BuildJobsForActionNoCache( // be returned for the current depending action. std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; - +/* if (CachedResults.find(ActionTC) == CachedResults.end()) ActionTC = { A, GetTriplePlusArchString(TC, "", TargetDeviceOffloadKind)}; - +*/ assert((CachedResults.find(ActionTC) != CachedResults.end()) && "Result does not exist??"); Result = CachedResults[ActionTC].front(); From 5f57d39712e916e70fa39288882e355ad3bfd66a Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 26 Jul 2022 11:39:07 +0100 Subject: [PATCH 12/36] Working on adding cuda devicelib --- clang/lib/Driver/Driver.cpp | 69 ++++++ clang/lib/Driver/ToolChains/Cuda.h | 1 - libdevice/cmath_wrapper.cpp | 6 + libdevice/cmath_wrapper_fp64.cpp | 4 +- libdevice/device.h | 2 + libdevice/fallback-cmath-fp64.cpp | 4 +- libdevice/fallback-cmath.cpp | 159 +------------ libdevice/include/nv_libdevice.h | 360 ----------------------------- 8 files changed, 83 insertions(+), 522 deletions(-) delete mode 100644 libdevice/include/nv_libdevice.h diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index dfd15fc2f9ee1..b188a85363579 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include #include "clang/Driver/Driver.h" #include "ToolChains/AIX.h" #include "ToolChains/AMDGPU.h" @@ -4899,6 +4900,74 @@ class OffloadingActionBuilder final { LibLocSelected = !LibLocSelected; } } + if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) { + std::string LibSpirvFile; + if (Args.hasArg( + clang::driver::options::OPT_fsycl_libspirv_path_EQ)) { + auto ProvidedPath = + Args.getLastArgValue( + clang::driver::options::OPT_fsycl_libspirv_path_EQ) + .str(); + if (llvm::sys::fs::exists(ProvidedPath)) + LibSpirvFile = ProvidedPath; + } else { + SmallVector LibraryPaths; + + // Expected path w/out install. + SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithoutInstallPath, Twine("../../clc")); + LibraryPaths.emplace_back(WithoutInstallPath.c_str()); + + // Expected path w/ install. + SmallString<256> WithInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithInstallPath, + Twine("../../../share/clc")); + LibraryPaths.emplace_back(WithInstallPath.c_str()); + + // Select remangled libclc variant + std::string LibSpirvTargetName = + (TC->getAuxTriple()->isOSWindows()) + ? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl." + "bc" + : "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl." + "bc"; + + for (StringRef LibraryPath : LibraryPaths) { + SmallString<128> LibSpirvTargetFile(LibraryPath); + llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName); + if (llvm::sys::fs::exists(LibSpirvTargetFile) || + Args.hasArg(options::OPT__HASH_HASH_HASH)) { + LibSpirvFile = std::string(LibSpirvTargetFile.str()); + break; + } + } + } + + if (!LibSpirvFile.empty()) { + Arg *LibClcInputArg = + MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibSpirvFile)); + auto *SYCLLibClcInputAction = + C.MakeAction(*LibClcInputArg, types::TY_Object); + DeviceLinkObjects.push_back(SYCLLibClcInputAction); + } + + const toolchains::CudaToolChain *CudaTC = + static_cast(TC); + std::string LibDeviceFile = + CudaTC->CudaInstallation.getLibDeviceFile( + Args.getLastArgValue(options::OPT_march_EQ)); + if (!LibDeviceFile.empty()) { + Arg *CudaDeviceLibInputArg = + MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibDeviceFile)); + auto *SYCLDeviceLibInputAction = C.MakeAction( + *CudaDeviceLibInputArg, types::TY_Object); + DeviceLinkObjects.push_back(SYCLDeviceLibInputAction); + } else { + std::cout << "LibDeviceFile was empty!!\n"; + } + } } }; addInputs(sycl_device_wrapper_libs); diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index e25affa91a6a7..9edd7dc936cdb 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -184,7 +184,6 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain { bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override; void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override; - bool IsMathErrnoDefault() const override { return false; } void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index d183a3118ab11..bb6ebb020db13 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -106,9 +106,15 @@ float fdimf(float x, float y) { return __devicelib_fdimf(x, y); } DEVICE_EXTERN_C_INLINE float fmaf(float x, float y, float z) { return __devicelib_fmaf(x, y, z); } +DEVICE_EXTERN_C_INLINE +float sin(float x) { return __devicelib_sinf(x); } + DEVICE_EXTERN_C_INLINE float sinf(float x) { return __devicelib_sinf(x); } +DEVICE_EXTERN_C_INLINE +float cos(float x) { return __devicelib_cosf(x); } + DEVICE_EXTERN_C_INLINE float cosf(float x) { return __devicelib_cosf(x); } diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 351d760fc51e6..be68e7ab56389 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,7 +9,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -444,4 +444,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/device.h b/libdevice/device.h index 0770d7d82d29a..5aeaedca342cf 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -25,6 +25,7 @@ #define DEVICE_EXTERN_C DEVICE_EXTERNAL EXTERN_C #define DEVICE_EXTERN_C_INLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) +#define DEVICE_EXTERNAL_INLINE DEVICE_EXTERNAL __attribute__((always_inline)) #endif // __SPIR__ || __NVPTX__ #if defined(__SPIR__) || defined(__LIBDEVICE_HOST_IMPL__) @@ -36,6 +37,7 @@ // and it will be linked with user's host code by default. If those functions // are decorated with "weak" attribute, compiler will use PLT entry to call // all __device_imf_* functions, this will lead to crash. +#define DEVICE_EXTERNAL_INLINE DEVICE_EXTERNAL __attribute__((always_inline)) #define DEVICE_EXTERN_C EXTERN_C #define DEVICE_EXTERN_C_INLINE DEVICE_EXTERN_C __attribute__((always_inline)) #endif // __LIBDEVICE_HOST_IMPL__ diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index f3e1606a51dbf..de95639613b42 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -9,7 +9,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -149,4 +149,4 @@ DEVICE_EXTERN_C_INLINE double __devicelib_scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index f6410716cafe1..46fd3326385d6 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,7 +8,7 @@ #include "device_math.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -166,159 +166,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ - -#ifdef __NVPTX__ -#include "include/nv_libdevice.h" - -DEVICE_EXTERN_C_INLINE -int __devicelib_abs(int x) { return x < 0 ? -x : x; } - -DEVICE_EXTERN_C_INLINE -long int __devicelib_labs(long int x) { return x < 0 ? -x : x; } - -DEVICE_EXTERN_C_INLINE -long long int __devicelib_llabs(long long int x) { return x < 0 ? -x : x; } - -DEVICE_EXTERN_C_INLINE -div_t __devicelib_div(int x, int y) { return {x / y, x % y}; } - -DEVICE_EXTERN_C_INLINE -ldiv_t __devicelib_ldiv(long x, long y) { return {x / y, x % y}; } - -DEVICE_EXTERN_C_INLINE -lldiv_t __devicelib_lldiv(long long x, long long y) { return {x / y, x % y}; } - -DEVICE_EXTERN_C_INLINE -float __devicelib_scalbnf(float x, int n) { return __nv_ldexpf(x, n); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_logf(float x) { return __nv_log(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_expf(float x) { return __nv_exp(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_frexpf(float x, int *exp) { - return __nv_frexpf(x, exp); -} - -DEVICE_EXTERN_C_INLINE -float __devicelib_ldexpf(float x, int exp) { return __nv_ldexp(x, exp); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_log10f(float x) { return __nv_log10f(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_modff(float x, float *intpart) { - return __nv_modff(x, intpart); -} - -DEVICE_EXTERN_C_INLINE -float __devicelib_exp2f(float x) { return __nv_exp2f(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_expm1f(float x) { return __nv_expm1f(x); } - -DEVICE_EXTERN_C_INLINE -int __devicelib_ilogbf(float x) { return __nv_ilogbf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_log1pf(float x) { return __nv_log1pf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_log2f(float x) { return __nv_log2f(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_logbf(float x) { return __nv_logbf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_sqrtf(float x) { return __nv_sqrtf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_cbrtf(float x) { return __nv_cbrtf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_hypotf(float x, float y) { return __nv_hypotf(x, y); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_erff(float x) { return __nv_erff(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_erfcf(float x) { return __nv_erfcf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_tgammaf(float x) { return __nv_tgammaf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_lgammaf(float x) { return __nv_lgammaf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_fmodf(float x, float y) { return __nv_fmodf(x, y); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_remainderf(float x, float y) { - return __nv_remainderf(x, y); -} - -DEVICE_EXTERN_C_INLINE -float __devicelib_remquof(float x, float y, int *q) { - return __nv_remquof(x, y, q); -} - -DEVICE_EXTERN_C_INLINE -float __devicelib_nextafterf(float x, float y) { - return __nv_nextafterf(x, y); -} - -DEVICE_EXTERN_C_INLINE -float __devicelib_fdimf(float x, float y) { return __nv_fdimf(x, y); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_fmaf(float x, float y, float z) { - return __nv_fmaf(x, y, z); -} - -DEVICE_EXTERN_C_INLINE -float __devicelib_sinf(float x) { return __nv_sinf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_cosf(float x) { return __nv_cosf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_tanf(float x) { return __nv_tanf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_powf(float x, float y) { return __nv_powf(x, y); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_acosf(float x) { return __nv_acosf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_asinf(float x) { return __nv_asinf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_atanf(float x) { return __nv_atanf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_atan2f(float x, float y) { return __nv_atan2f(x, y); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_coshf(float x) { return __nv_coshf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_sinhf(float x) { return __nv_sinhf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_tanhf(float x) { return __nv_tanhf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_acoshf(float x) { return __nv_acoshf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_asinhf(float x) { return __nv_asinhf(x); } - -DEVICE_EXTERN_C_INLINE -float __devicelib_atanhf(float x) { return __nv_atanhf(x); } - -#endif // __NVPTX__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/include/nv_libdevice.h b/libdevice/include/nv_libdevice.h deleted file mode 100644 index e80dc75334852..0000000000000 --- a/libdevice/include/nv_libdevice.h +++ /dev/null @@ -1,360 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_NVIDIACL_LIBDEVICE_H -#define PTX_NVIDIACL_LIBDEVICE_H - -DEVICE_EXTERN_C_INLINE int __nv_abs(int); -DEVICE_EXTERN_C_INLINE double __nv_acos(double); -DEVICE_EXTERN_C_INLINE float __nv_acosf(float); -DEVICE_EXTERN_C_INLINE double __nv_acosh(double); -DEVICE_EXTERN_C_INLINE float __nv_acoshf(float); -DEVICE_EXTERN_C_INLINE double __nv_asin(double); -DEVICE_EXTERN_C_INLINE float __nv_asinf(float); -DEVICE_EXTERN_C_INLINE double __nv_asinh(double); -DEVICE_EXTERN_C_INLINE float __nv_asinhf(float); -DEVICE_EXTERN_C_INLINE double __nv_atan(double); -DEVICE_EXTERN_C_INLINE double __nv_atan2(double, double); -DEVICE_EXTERN_C_INLINE float __nv_atan2f(float, float); -DEVICE_EXTERN_C_INLINE float __nv_atanf(float); -DEVICE_EXTERN_C_INLINE double __nv_atanh(double); -DEVICE_EXTERN_C_INLINE float __nv_atanhf(float); -DEVICE_EXTERN_C_INLINE int __nv_brev(int); -DEVICE_EXTERN_C_INLINE long __nv_brevll(long); -DEVICE_EXTERN_C_INLINE int __nv_byte_perm(int, int, int); -DEVICE_EXTERN_C_INLINE double __nv_cbrt(double); -DEVICE_EXTERN_C_INLINE float __nv_cbrtf(float); -DEVICE_EXTERN_C_INLINE double __nv_ceil(double); -DEVICE_EXTERN_C_INLINE float __nv_ceilf(float); -DEVICE_EXTERN_C_INLINE int __nv_clz(int); -DEVICE_EXTERN_C_INLINE int __nv_clzll(long); -DEVICE_EXTERN_C_INLINE double __nv_copysign(double, double); -DEVICE_EXTERN_C_INLINE float __nv_copysignf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_cos(double); -DEVICE_EXTERN_C_INLINE float __nv_cosf(float); -DEVICE_EXTERN_C_INLINE double __nv_cosh(double); -DEVICE_EXTERN_C_INLINE float __nv_coshf(float); -DEVICE_EXTERN_C_INLINE double __nv_cospi(double); -DEVICE_EXTERN_C_INLINE float __nv_cospif(float); -DEVICE_EXTERN_C_INLINE double __nv_cyl_bessel_i0(double); -DEVICE_EXTERN_C_INLINE float __nv_cyl_bessel_i0f(float); -DEVICE_EXTERN_C_INLINE double __nv_cyl_bessel_i1(double); -DEVICE_EXTERN_C_INLINE float __nv_cyl_bessel_i1f(float); -DEVICE_EXTERN_C_INLINE double __nv_dadd_rd(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dadd_rn(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dadd_ru(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dadd_rz(double, double); -DEVICE_EXTERN_C_INLINE double __nv_ddiv_rd(double, double); -DEVICE_EXTERN_C_INLINE double __nv_ddiv_rn(double, double); -DEVICE_EXTERN_C_INLINE double __nv_ddiv_ru(double, double); -DEVICE_EXTERN_C_INLINE double __nv_ddiv_rz(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dmul_rd(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dmul_rn(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dmul_ru(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dmul_rz(double, double); -DEVICE_EXTERN_C_INLINE float __nv_double2float_rd(double); -DEVICE_EXTERN_C_INLINE float __nv_double2float_rn(double); -DEVICE_EXTERN_C_INLINE float __nv_double2float_ru(double); -DEVICE_EXTERN_C_INLINE float __nv_double2float_rz(double); -DEVICE_EXTERN_C_INLINE int __nv_double2hiint(double); -DEVICE_EXTERN_C_INLINE int __nv_double2int_rd(double); -DEVICE_EXTERN_C_INLINE int __nv_double2int_rn(double); -DEVICE_EXTERN_C_INLINE int __nv_double2int_ru(double); -DEVICE_EXTERN_C_INLINE int __nv_double2int_rz(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ll_rd(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ll_rn(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ll_ru(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ll_rz(double); -DEVICE_EXTERN_C_INLINE int __nv_double2loint(double); -DEVICE_EXTERN_C_INLINE int __nv_double2uint_rd(double); -DEVICE_EXTERN_C_INLINE int __nv_double2uint_rn(double); -DEVICE_EXTERN_C_INLINE int __nv_double2uint_ru(double); -DEVICE_EXTERN_C_INLINE int __nv_double2uint_rz(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ull_rd(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ull_rn(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ull_ru(double); -DEVICE_EXTERN_C_INLINE long __nv_double2ull_rz(double); -DEVICE_EXTERN_C_INLINE long __nv_double_as_longlong(double); -DEVICE_EXTERN_C_INLINE double __nv_drcp_rd(double); -DEVICE_EXTERN_C_INLINE double __nv_drcp_rn(double); -DEVICE_EXTERN_C_INLINE double __nv_drcp_ru(double); -DEVICE_EXTERN_C_INLINE double __nv_drcp_rz(double); -DEVICE_EXTERN_C_INLINE double __nv_dsqrt_rd(double); -DEVICE_EXTERN_C_INLINE double __nv_dsqrt_rn(double); -DEVICE_EXTERN_C_INLINE double __nv_dsqrt_ru(double); -DEVICE_EXTERN_C_INLINE double __nv_dsqrt_rz(double); -DEVICE_EXTERN_C_INLINE double __nv_dsub_rd(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dsub_rn(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dsub_ru(double, double); -DEVICE_EXTERN_C_INLINE double __nv_dsub_rz(double, double); -DEVICE_EXTERN_C_INLINE double __nv_erf(double); -DEVICE_EXTERN_C_INLINE double __nv_erfc(double); -DEVICE_EXTERN_C_INLINE float __nv_erfcf(float); -DEVICE_EXTERN_C_INLINE double __nv_erfcinv(double); -DEVICE_EXTERN_C_INLINE float __nv_erfcinvf(float); -DEVICE_EXTERN_C_INLINE double __nv_erfcx(double); -DEVICE_EXTERN_C_INLINE float __nv_erfcxf(float); -DEVICE_EXTERN_C_INLINE float __nv_erff(float); -DEVICE_EXTERN_C_INLINE double __nv_erfinv(double); -DEVICE_EXTERN_C_INLINE float __nv_erfinvf(float); -DEVICE_EXTERN_C_INLINE double __nv_exp(double); -DEVICE_EXTERN_C_INLINE double __nv_exp10(double); -DEVICE_EXTERN_C_INLINE float __nv_exp10f(float); -DEVICE_EXTERN_C_INLINE double __nv_exp2(double); -DEVICE_EXTERN_C_INLINE float __nv_exp2f(float); -DEVICE_EXTERN_C_INLINE float __nv_expf(float); -DEVICE_EXTERN_C_INLINE double __nv_expm1(double); -DEVICE_EXTERN_C_INLINE float __nv_expm1f(float); -DEVICE_EXTERN_C_INLINE double __nv_fabs(double); -DEVICE_EXTERN_C_INLINE float __nv_fabsf(float); -DEVICE_EXTERN_C_INLINE float __nv_fadd_rd(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fadd_rn(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fadd_ru(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fadd_rz(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fast_cosf(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_exp10f(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_expf(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_fdividef(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fast_log10f(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_log2f(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_logf(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_powf(float, float); -DEVICE_EXTERN_C_INLINE void __nv_fast_sincosf(float, float *, float *); -DEVICE_EXTERN_C_INLINE float __nv_fast_sinf(float); -DEVICE_EXTERN_C_INLINE float __nv_fast_tanf(float); -DEVICE_EXTERN_C_INLINE double __nv_fdim(double, double); -DEVICE_EXTERN_C_INLINE float __nv_fdimf(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fdiv_rd(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fdiv_rn(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fdiv_ru(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fdiv_rz(float, float); -DEVICE_EXTERN_C_INLINE int __nv_ffs(int); -DEVICE_EXTERN_C_INLINE int __nv_ffsll(long); -DEVICE_EXTERN_C_INLINE int __nv_finitef(float); -DEVICE_EXTERN_C_INLINE short __nv_float2half_rn(float); -DEVICE_EXTERN_C_INLINE int __nv_float2int_rd(float); -DEVICE_EXTERN_C_INLINE int __nv_float2int_rn(float); -DEVICE_EXTERN_C_INLINE int __nv_float2int_ru(float); -DEVICE_EXTERN_C_INLINE int __nv_float2int_rz(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ll_rd(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ll_rn(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ll_ru(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ll_rz(float); -DEVICE_EXTERN_C_INLINE int __nv_float2uint_rd(float); -DEVICE_EXTERN_C_INLINE int __nv_float2uint_rn(float); -DEVICE_EXTERN_C_INLINE int __nv_float2uint_ru(float); -DEVICE_EXTERN_C_INLINE int __nv_float2uint_rz(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ull_rd(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ull_rn(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ull_ru(float); -DEVICE_EXTERN_C_INLINE long __nv_float2ull_rz(float); -DEVICE_EXTERN_C_INLINE int __nv_float_as_int(float); -DEVICE_EXTERN_C_INLINE int __nv_float_as_uint(float); -DEVICE_EXTERN_C_INLINE double __nv_floor(double); -DEVICE_EXTERN_C_INLINE float __nv_floorf(float); -DEVICE_EXTERN_C_INLINE double __nv_fma(double, double, double); -DEVICE_EXTERN_C_INLINE double __nv_fma_rd(double, double, double); -DEVICE_EXTERN_C_INLINE double __nv_fma_rn(double, double, double); -DEVICE_EXTERN_C_INLINE double __nv_fma_ru(double, double, double); -DEVICE_EXTERN_C_INLINE double __nv_fma_rz(double, double, double); -DEVICE_EXTERN_C_INLINE float __nv_fmaf(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_rd(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_rn(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_ru(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_ieee_rz(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_rd(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_rn(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_ru(float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmaf_rz(float, float, float); -DEVICE_EXTERN_C_INLINE double __nv_fmax(double, double); -DEVICE_EXTERN_C_INLINE float __nv_fmaxf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_fmin(double, double); -DEVICE_EXTERN_C_INLINE float __nv_fminf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_fmod(double, double); -DEVICE_EXTERN_C_INLINE float __nv_fmodf(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmul_rd(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmul_rn(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmul_ru(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fmul_rz(float, float); -DEVICE_EXTERN_C_INLINE float __nv_frcp_rd(float); -DEVICE_EXTERN_C_INLINE float __nv_frcp_rn(float); -DEVICE_EXTERN_C_INLINE float __nv_frcp_ru(float); -DEVICE_EXTERN_C_INLINE float __nv_frcp_rz(float); -DEVICE_EXTERN_C_INLINE double __nv_frexp(double, int *); -DEVICE_EXTERN_C_INLINE float __nv_frexpf(float, int *); -DEVICE_EXTERN_C_INLINE float __nv_frsqrt_rn(float); -DEVICE_EXTERN_C_INLINE float __nv_fsqrt_rd(float); -DEVICE_EXTERN_C_INLINE float __nv_fsqrt_rn(float); -DEVICE_EXTERN_C_INLINE float __nv_fsqrt_ru(float); -DEVICE_EXTERN_C_INLINE float __nv_fsqrt_rz(float); -DEVICE_EXTERN_C_INLINE float __nv_fsub_rd(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fsub_rn(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fsub_ru(float, float); -DEVICE_EXTERN_C_INLINE float __nv_fsub_rz(float, float); -DEVICE_EXTERN_C_INLINE int __nv_hadd(int, int); -DEVICE_EXTERN_C_INLINE float __nv_half2float(short); -DEVICE_EXTERN_C_INLINE double __nv_hiloint2double(int, int); -DEVICE_EXTERN_C_INLINE double __nv_hypot(double, double); -DEVICE_EXTERN_C_INLINE float __nv_hypotf(float, float); -DEVICE_EXTERN_C_INLINE int __nv_ilogb(double); -DEVICE_EXTERN_C_INLINE int __nv_ilogbf(float); -DEVICE_EXTERN_C_INLINE double __nv_int2double_rn(int); -DEVICE_EXTERN_C_INLINE float __nv_int2float_rd(int); -DEVICE_EXTERN_C_INLINE float __nv_int2float_rn(int); -DEVICE_EXTERN_C_INLINE float __nv_int2float_ru(int); -DEVICE_EXTERN_C_INLINE float __nv_int2float_rz(int); -DEVICE_EXTERN_C_INLINE float __nv_int_as_float(int); -DEVICE_EXTERN_C_INLINE int __nv_isfinited(double); -DEVICE_EXTERN_C_INLINE int __nv_isinfd(double); -DEVICE_EXTERN_C_INLINE int __nv_isinff(float); -DEVICE_EXTERN_C_INLINE int __nv_isnand(double); -DEVICE_EXTERN_C_INLINE int __nv_isnanf(float); -DEVICE_EXTERN_C_INLINE double __nv_j0(double); -DEVICE_EXTERN_C_INLINE float __nv_j0f(float); -DEVICE_EXTERN_C_INLINE double __nv_j1(double); -DEVICE_EXTERN_C_INLINE float __nv_j1f(float); -DEVICE_EXTERN_C_INLINE double __nv_jn(int, double); -DEVICE_EXTERN_C_INLINE float __nv_jnf(int, float); -DEVICE_EXTERN_C_INLINE double __nv_ldexp(double, int); -DEVICE_EXTERN_C_INLINE float __nv_ldexpf(float, int); -DEVICE_EXTERN_C_INLINE double __nv_lgamma(double); -DEVICE_EXTERN_C_INLINE float __nv_lgammaf(float); -DEVICE_EXTERN_C_INLINE double __nv_ll2double_rd(long); -DEVICE_EXTERN_C_INLINE double __nv_ll2double_rn(long); -DEVICE_EXTERN_C_INLINE double __nv_ll2double_ru(long); -DEVICE_EXTERN_C_INLINE double __nv_ll2double_rz(long); -DEVICE_EXTERN_C_INLINE float __nv_ll2float_rd(long); -DEVICE_EXTERN_C_INLINE float __nv_ll2float_rn(long); -DEVICE_EXTERN_C_INLINE float __nv_ll2float_ru(long); -DEVICE_EXTERN_C_INLINE float __nv_ll2float_rz(long); -DEVICE_EXTERN_C_INLINE long __nv_llabs(long); -DEVICE_EXTERN_C_INLINE long __nv_llmax(long, long); -DEVICE_EXTERN_C_INLINE long __nv_llmin(long, long); -DEVICE_EXTERN_C_INLINE long __nv_llrint(double); -DEVICE_EXTERN_C_INLINE long __nv_llrintf(float); -DEVICE_EXTERN_C_INLINE long __nv_llround(double); -DEVICE_EXTERN_C_INLINE long __nv_llroundf(float); -DEVICE_EXTERN_C_INLINE double __nv_log(double); -DEVICE_EXTERN_C_INLINE double __nv_log10(double); -DEVICE_EXTERN_C_INLINE float __nv_log10f(float); -DEVICE_EXTERN_C_INLINE double __nv_log1p(double); -DEVICE_EXTERN_C_INLINE float __nv_log1pf(float); -DEVICE_EXTERN_C_INLINE double __nv_log2(double); -DEVICE_EXTERN_C_INLINE float __nv_log2f(float); -DEVICE_EXTERN_C_INLINE double __nv_logb(double); -DEVICE_EXTERN_C_INLINE float __nv_logbf(float); -DEVICE_EXTERN_C_INLINE float __nv_logf(float); -DEVICE_EXTERN_C_INLINE double __nv_longlong_as_double(long); -DEVICE_EXTERN_C_INLINE int __nv_max(int, int); -DEVICE_EXTERN_C_INLINE int __nv_min(int, int); -DEVICE_EXTERN_C_INLINE double __nv_modf(double, double *); -DEVICE_EXTERN_C_INLINE float __nv_modff(float, float *); -DEVICE_EXTERN_C_INLINE int __nv_mul24(int, int); -DEVICE_EXTERN_C_INLINE long __nv_mul64hi(long, long); -DEVICE_EXTERN_C_INLINE int __nv_mulhi(int, int); -DEVICE_EXTERN_C_INLINE double __nv_nan(char *); -DEVICE_EXTERN_C_INLINE float __nv_nanf(char *); -DEVICE_EXTERN_C_INLINE double __nv_nearbyint(double); -DEVICE_EXTERN_C_INLINE float __nv_nearbyintf(float); -DEVICE_EXTERN_C_INLINE double __nv_nextafter(double, double); -DEVICE_EXTERN_C_INLINE float __nv_nextafterf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_norm(int, double *); -DEVICE_EXTERN_C_INLINE double __nv_norm3d(double, double, double); -DEVICE_EXTERN_C_INLINE float __nv_norm3df(float, float, float); -DEVICE_EXTERN_C_INLINE double __nv_norm4d(double, double, double, double); -DEVICE_EXTERN_C_INLINE float __nv_norm4df(float, float, float, float); -DEVICE_EXTERN_C_INLINE double __nv_normcdf(double); -DEVICE_EXTERN_C_INLINE float __nv_normcdff(float); -DEVICE_EXTERN_C_INLINE double __nv_normcdfinv(double); -DEVICE_EXTERN_C_INLINE float __nv_normcdfinvf(float); -DEVICE_EXTERN_C_INLINE float __nv_normf(int, float *); -DEVICE_EXTERN_C_INLINE int __nv_popc(int); -DEVICE_EXTERN_C_INLINE int __nv_popcll(long); -DEVICE_EXTERN_C_INLINE double __nv_pow(double, double); -DEVICE_EXTERN_C_INLINE float __nv_powf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_powi(double, int); -DEVICE_EXTERN_C_INLINE float __nv_powif(float, int); -DEVICE_EXTERN_C_INLINE double __nv_rcbrt(double); -DEVICE_EXTERN_C_INLINE float __nv_rcbrtf(float); -DEVICE_EXTERN_C_INLINE double __nv_rcp64h(double); -DEVICE_EXTERN_C_INLINE double __nv_remainder(double, double); -DEVICE_EXTERN_C_INLINE float __nv_remainderf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_remquo(double, double, int *); -DEVICE_EXTERN_C_INLINE float __nv_remquof(float, float, int *); -DEVICE_EXTERN_C_INLINE int __nv_rhadd(int, int); -DEVICE_EXTERN_C_INLINE double __nv_rhypot(double, double); -DEVICE_EXTERN_C_INLINE float __nv_rhypotf(float, float); -DEVICE_EXTERN_C_INLINE double __nv_rint(double); -DEVICE_EXTERN_C_INLINE float __nv_rintf(float); -DEVICE_EXTERN_C_INLINE double __nv_rnorm(int, double *); -DEVICE_EXTERN_C_INLINE double __nv_rnorm3d(double, double, double); -DEVICE_EXTERN_C_INLINE float __nv_rnorm3df(float, float, float); -DEVICE_EXTERN_C_INLINE double __nv_rnorm4d(double, double, double, double); -DEVICE_EXTERN_C_INLINE float __nv_rnorm4df(float, float, float, float); -DEVICE_EXTERN_C_INLINE float __nv_rnormf(int, float *); -DEVICE_EXTERN_C_INLINE double __nv_round(double); -DEVICE_EXTERN_C_INLINE float __nv_roundf(float); -DEVICE_EXTERN_C_INLINE double __nv_rsqrt(double); -DEVICE_EXTERN_C_INLINE float __nv_rsqrtf(float); -DEVICE_EXTERN_C_INLINE int __nv_sad(int, int, int); -DEVICE_EXTERN_C_INLINE float __nv_saturatef(float); -DEVICE_EXTERN_C_INLINE double __nv_scalbn(double, int); -DEVICE_EXTERN_C_INLINE float __nv_scalbnf(float, int); -DEVICE_EXTERN_C_INLINE int __nv_signbitd(double); -DEVICE_EXTERN_C_INLINE int __nv_signbitf(float); -DEVICE_EXTERN_C_INLINE double __nv_sin(double); -DEVICE_EXTERN_C_INLINE void __nv_sincos(double, double *, double *); -DEVICE_EXTERN_C_INLINE void __nv_sincosf(float, float *, float *); -DEVICE_EXTERN_C_INLINE void __nv_sincospi(double, double *, double *); -DEVICE_EXTERN_C_INLINE void __nv_sincospif(float, float *, float *); -DEVICE_EXTERN_C_INLINE float __nv_sinf(float); -DEVICE_EXTERN_C_INLINE double __nv_sinh(double); -DEVICE_EXTERN_C_INLINE float __nv_sinhf(float); -DEVICE_EXTERN_C_INLINE double __nv_sinpi(double); -DEVICE_EXTERN_C_INLINE float __nv_sinpif(float); -DEVICE_EXTERN_C_INLINE double __nv_sqrt(double); -DEVICE_EXTERN_C_INLINE float __nv_sqrtf(float); -DEVICE_EXTERN_C_INLINE double __nv_tan(double); -DEVICE_EXTERN_C_INLINE float __nv_tanf(float); -DEVICE_EXTERN_C_INLINE double __nv_tanh(double); -DEVICE_EXTERN_C_INLINE float __nv_tanhf(float); -DEVICE_EXTERN_C_INLINE double __nv_tgamma(double); -DEVICE_EXTERN_C_INLINE float __nv_tgammaf(float); -DEVICE_EXTERN_C_INLINE double __nv_trunc(double); -DEVICE_EXTERN_C_INLINE float __nv_truncf(float); -DEVICE_EXTERN_C_INLINE int __nv_uhadd(int, int); -DEVICE_EXTERN_C_INLINE double __nv_uint2double_rn(int); -DEVICE_EXTERN_C_INLINE float __nv_uint2float_rd(int); -DEVICE_EXTERN_C_INLINE float __nv_uint2float_rn(int); -DEVICE_EXTERN_C_INLINE float __nv_uint2float_ru(int); -DEVICE_EXTERN_C_INLINE float __nv_uint2float_rz(int); -DEVICE_EXTERN_C_INLINE float __nv_uint_as_float(int); -DEVICE_EXTERN_C_INLINE double __nv_ull2double_rd(long); -DEVICE_EXTERN_C_INLINE double __nv_ull2double_rn(long); -DEVICE_EXTERN_C_INLINE double __nv_ull2double_ru(long); -DEVICE_EXTERN_C_INLINE double __nv_ull2double_rz(long); -DEVICE_EXTERN_C_INLINE float __nv_ull2float_rd(long); -DEVICE_EXTERN_C_INLINE float __nv_ull2float_rn(long); -DEVICE_EXTERN_C_INLINE float __nv_ull2float_ru(long); -DEVICE_EXTERN_C_INLINE float __nv_ull2float_rz(long); -DEVICE_EXTERN_C_INLINE long __nv_ullmax(long, long); -DEVICE_EXTERN_C_INLINE long __nv_ullmin(long, long); -DEVICE_EXTERN_C_INLINE int __nv_umax(int, int); -DEVICE_EXTERN_C_INLINE int __nv_umin(int, int); -DEVICE_EXTERN_C_INLINE int __nv_umul24(int, int); -DEVICE_EXTERN_C_INLINE long __nv_umul64hi(long, long); -DEVICE_EXTERN_C_INLINE int __nv_umulhi(int, int); -DEVICE_EXTERN_C_INLINE int __nv_urhadd(int, int); -DEVICE_EXTERN_C_INLINE int __nv_usad(int, int, int); -DEVICE_EXTERN_C_INLINE double __nv_y0(double); -DEVICE_EXTERN_C_INLINE float __nv_y0f(float); -DEVICE_EXTERN_C_INLINE double __nv_y1(double); -DEVICE_EXTERN_C_INLINE float __nv_y1f(float); -DEVICE_EXTERN_C_INLINE double __nv_yn(int, double); -DEVICE_EXTERN_C_INLINE float __nv_ynf(int, float); -#endif From 49b30e93562f9f5a5f01bca4b84bdbd08b542dc8 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 26 Jul 2022 14:58:22 +0100 Subject: [PATCH 13/36] Working compilation for -O0 --- clang/lib/Driver/Driver.cpp | 116 ++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index b188a85363579..f20fc1f558e3a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include #include "clang/Driver/Driver.h" #include "ToolChains/AIX.h" #include "ToolChains/AMDGPU.h" @@ -4866,7 +4865,8 @@ class OffloadingActionBuilder final { {"libsycl-itt-user-wrappers", "internal"}, {"libsycl-itt-compiler-wrappers", "internal"}, {"libsycl-itt-stubs", "internal"}}; - auto addInputs = [&](const SYCLDeviceLibsList &LibsList) { + auto addInputs = [&](const SYCLDeviceLibsList &LibsList, + bool LinkNVPTXLibs = false) { bool LibLocSelected = false; for (const auto &LLCandidate : LibLocCandidates) { if (LibLocSelected) @@ -4900,63 +4900,67 @@ class OffloadingActionBuilder final { LibLocSelected = !LibLocSelected; } } - if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) { - std::string LibSpirvFile; - if (Args.hasArg( - clang::driver::options::OPT_fsycl_libspirv_path_EQ)) { - auto ProvidedPath = - Args.getLastArgValue( - clang::driver::options::OPT_fsycl_libspirv_path_EQ) - .str(); - if (llvm::sys::fs::exists(ProvidedPath)) - LibSpirvFile = ProvidedPath; - } else { - SmallVector LibraryPaths; - - // Expected path w/out install. - SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir); - llvm::sys::path::append(WithoutInstallPath, Twine("../../clc")); - LibraryPaths.emplace_back(WithoutInstallPath.c_str()); - - // Expected path w/ install. - SmallString<256> WithInstallPath(C.getDriver().ResourceDir); - llvm::sys::path::append(WithInstallPath, - Twine("../../../share/clc")); - LibraryPaths.emplace_back(WithInstallPath.c_str()); - - // Select remangled libclc variant - std::string LibSpirvTargetName = - (TC->getAuxTriple()->isOSWindows()) - ? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl." - "bc" - : "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl." - "bc"; - - for (StringRef LibraryPath : LibraryPaths) { - SmallString<128> LibSpirvTargetFile(LibraryPath); - llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName); - if (llvm::sys::fs::exists(LibSpirvTargetFile) || - Args.hasArg(options::OPT__HASH_HASH_HASH)) { - LibSpirvFile = std::string(LibSpirvTargetFile.str()); - break; - } + } + + if (TC->getTriple().isNVPTX() && LinkNVPTXLibs && + NumOfDeviceLibLinked) { + std::string LibSpirvFile; + if (Args.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) { + auto ProvidedPath = + Args.getLastArgValue( + clang::driver::options::OPT_fsycl_libspirv_path_EQ) + .str(); + if (llvm::sys::fs::exists(ProvidedPath)) + LibSpirvFile = ProvidedPath; + } else { + SmallVector LibraryPaths; + + // Expected path w/out install. + SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithoutInstallPath, Twine("../../clc")); + LibraryPaths.emplace_back(WithoutInstallPath.c_str()); + + // Expected path w/ install. + SmallString<256> WithInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithInstallPath, + Twine("../../../share/clc")); + LibraryPaths.emplace_back(WithInstallPath.c_str()); + + // Select remangled libclc variant + std::string LibSpirvTargetName = + (TC->getAuxTriple()->isOSWindows()) + ? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl." + "bc" + : "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl." + "bc"; + + for (StringRef LibraryPath : LibraryPaths) { + SmallString<128> LibSpirvTargetFile(LibraryPath); + llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName); + if (llvm::sys::fs::exists(LibSpirvTargetFile) || + Args.hasArg(options::OPT__HASH_HASH_HASH)) { + LibSpirvFile = std::string(LibSpirvTargetFile.str()); + break; } } + } - if (!LibSpirvFile.empty()) { - Arg *LibClcInputArg = - MakeInputArg(Args, C.getDriver().getOpts(), - Args.MakeArgString(LibSpirvFile)); - auto *SYCLLibClcInputAction = - C.MakeAction(*LibClcInputArg, types::TY_Object); - DeviceLinkObjects.push_back(SYCLLibClcInputAction); - } + if (!LibSpirvFile.empty()) { + Arg *LibClcInputArg = + MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibSpirvFile)); + auto *SYCLLibClcInputAction = + C.MakeAction(*LibClcInputArg, types::TY_Object); + DeviceLinkObjects.push_back(SYCLLibClcInputAction); + } - const toolchains::CudaToolChain *CudaTC = - static_cast(TC); + const toolchains::CudaToolChain *CudaTC = + static_cast(TC); + for (auto LinkInputEnum : enumerate(DeviceLinkerInputs)) { + const char *BoundArch = + SYCLTargetInfoList[LinkInputEnum.index()].BoundArch; std::string LibDeviceFile = - CudaTC->CudaInstallation.getLibDeviceFile( - Args.getLastArgValue(options::OPT_march_EQ)); + CudaTC->CudaInstallation.getLibDeviceFile(BoundArch); if (!LibDeviceFile.empty()) { Arg *CudaDeviceLibInputArg = MakeInputArg(Args, C.getDriver().getOpts(), @@ -4964,8 +4968,6 @@ class OffloadingActionBuilder final { auto *SYCLDeviceLibInputAction = C.MakeAction( *CudaDeviceLibInputArg, types::TY_Object); DeviceLinkObjects.push_back(SYCLDeviceLibInputAction); - } else { - std::cout << "LibDeviceFile was empty!!\n"; } } } @@ -4975,7 +4977,7 @@ class OffloadingActionBuilder final { addInputs(sycl_device_fallback_libs); if (Args.hasFlag(options::OPT_fsycl_instrument_device_code, options::OPT_fno_sycl_instrument_device_code, true)) - addInputs(sycl_device_annotation_libs); + addInputs(sycl_device_annotation_libs, true); return NumOfDeviceLibLinked != 0; } From fb01ac9b1964c738f12818518907332af1dfdc32 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 26 Jul 2022 15:02:20 +0100 Subject: [PATCH 14/36] Remove stuff --- libdevice/cmath_wrapper.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index bb6ebb020db13..d183a3118ab11 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -106,15 +106,9 @@ float fdimf(float x, float y) { return __devicelib_fdimf(x, y); } DEVICE_EXTERN_C_INLINE float fmaf(float x, float y, float z) { return __devicelib_fmaf(x, y, z); } -DEVICE_EXTERN_C_INLINE -float sin(float x) { return __devicelib_sinf(x); } - DEVICE_EXTERN_C_INLINE float sinf(float x) { return __devicelib_sinf(x); } -DEVICE_EXTERN_C_INLINE -float cos(float x) { return __devicelib_cosf(x); } - DEVICE_EXTERN_C_INLINE float cosf(float x) { return __devicelib_cosf(x); } From 9b5835ee7acd10edf80b335e880a01346c87ba9d Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 26 Jul 2022 17:06:27 +0100 Subject: [PATCH 15/36] Adding assert --- libdevice/fallback-cassert.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index b03a3409b7bf8..880cbab0693a7 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -99,3 +99,21 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // *die = 0xdead; } #endif // __SPIR__ + +#ifdef __NVPTX__ + +SYCL_EXTERNAL extern "C" void __assertfail(const char *__message, + const char *__file, unsigned __line, + const char *__function); + +DEVICE_EXTERN_C void __assert_fail(const char *expr, const char *file, + unsigned int line, const char *func) { + __assertfail(expr, file, line, func); +} + +DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File, + unsigned _Line) { + __assertfail(_Message, _File, _Line, 0); +} + +#endif From 64eb48288be94e817f4f822c264bf161d4a14351 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 27 Jul 2022 11:06:38 +0100 Subject: [PATCH 16/36] Working impl --- clang/lib/Driver/Driver.cpp | 8 +++++++- libclc/ptx-nvidiacl/libspirv/SOURCES | 1 - .../libspirv/assert/__assert_fail.cl | 14 -------------- libdevice/crt_wrapper.cpp | 4 ++-- libdevice/device_math.h | 2 +- libdevice/fallback-cassert.cpp | 17 ++++++++++------- libdevice/fallback-cstring.cpp | 4 ++-- libdevice/spirv_vars.h | 9 +++++++-- libdevice/wrapper.h | 4 ++-- 9 files changed, 31 insertions(+), 32 deletions(-) delete mode 100644 libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f20fc1f558e3a..2dc7ee4c35114 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4886,10 +4886,13 @@ class OffloadingActionBuilder final { auto *SYCLDeviceLibsUnbundleAction = C.MakeAction( SYCLDeviceLibsInputAction); + + // We are using BoundArch="" here since the NVPTX bundles in + // the devicelib .o files do not contain any arch information SYCLDeviceLibsUnbundleAction->registerDependentActionInfo( TC, /*BoundArch=*/"", Action::OFK_SYCL); OffloadAction::DeviceDependences Dep; - Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/nullptr, + Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/"", Action::OFK_SYCL); auto *SYCLDeviceLibsDependenciesAction = C.MakeAction( @@ -4902,6 +4905,9 @@ class OffloadingActionBuilder final { } } + // For NVPTX backend we need to also include libclc and CUDA libdevice + // at the same stage that we link all of the unbundled SYCL libdevice + // objects together. if (TC->getTriple().isNVPTX() && LinkNVPTXLibs && NumOfDeviceLibLinked) { std::string LibSpirvFile; diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 44a3bc186a48f..bec378d428511 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -1,4 +1,3 @@ -assert/__assert_fail.cl reflect.ll atomic/loadstore_helpers.ll cl_khr_int64_extended_atomics/minmax_helpers.ll diff --git a/libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl b/libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl deleted file mode 100644 index 5787cdb4b02be..0000000000000 --- a/libclc/ptx-nvidiacl/libspirv/assert/__assert_fail.cl +++ /dev/null @@ -1,14 +0,0 @@ -#include - -void __assertfail(const char *__message, const char *__file, unsigned __line, - const char *__function); - -_CLC_DECL void __assert_fail(const char *expr, const char *file, - unsigned int line, const char *func) { - __assertfail(expr, file, line, func); -} - -_CLC_DECL void _wassert(const char *_Message, const char *_File, - unsigned _Line) { - __assertfail(_Message, _File, _Line, 0); -} diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index 13cb984633920..d0f3709233795 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -8,7 +8,7 @@ #include "wrapper.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); @@ -64,4 +64,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line, __spirv_LocalInvocationId_z()); } #endif -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index ec7e192d6a4ca..10a23c8748678 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -289,5 +289,5 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 880cbab0693a7..47bae9f54714a 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -102,18 +102,21 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, #ifdef __NVPTX__ -SYCL_EXTERNAL extern "C" void __assertfail(const char *__message, - const char *__file, unsigned __line, - const char *__function); +DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file, + unsigned __line, const char *__function, + size_t charSize); -DEVICE_EXTERN_C void __assert_fail(const char *expr, const char *file, - unsigned int line, const char *func) { - __assertfail(expr, file, line, func); +DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, + int32_t line, const char *func, + uint64_t gid0, uint64_t gid1, + uint64_t gid2, uint64_t lid0, + uint64_t lid1, uint64_t lid2) { + __assertfail(expr, file, line, func, 1); } DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File, unsigned _Line) { - __assertfail(_Message, _File, _Line, 0); + __assertfail(_Message, _File, _Line, 0, 1); } #endif diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp index 4410ff238720e..bebfc621857d7 100644 --- a/libdevice/fallback-cstring.cpp +++ b/libdevice/fallback-cstring.cpp @@ -9,7 +9,7 @@ #include "wrapper.h" #include -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src, size_t n) { @@ -202,4 +202,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) { return head_cmp; } -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index eab02e7a860be..2b8c5ee41e6f0 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,14 +11,14 @@ #include "device.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #include #include #define __SPIRV_VAR_QUALIFIERS EXTERN_C const typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; __SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; @@ -51,5 +51,10 @@ DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_z() { return __spirv_BuiltInLocalInvocationId.z; } +#ifndef __SPIR__ +const size_t_vec __spirv_BuiltInGlobalInvocationId{}; +const size_t_vec __spirv_BuiltInLocalInvocationId{}; #endif // __SPIR__ + +#endif // __SPIR__ || __NVPTX__ #endif // __LIBDEVICE_SPIRV_VARS_H diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h index 75d47f7f98afb..c3ec6ec1fa785 100644 --- a/libdevice/wrapper.h +++ b/libdevice/wrapper.h @@ -11,7 +11,7 @@ #include "device.h" -#ifdef __SPIR__ +#if defined(__SPIR__) || defined(__NVPTX__) #include #include @@ -29,5 +29,5 @@ void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2); -#endif // __SPIR__ +#endif // __SPIR__ || __NVPTX__ #endif // __LIBDEVICE_WRAPPER_H__ From 8cd42c416c9ff6163b3f7ffe98de9cfc512dc4c6 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 27 Jul 2022 11:31:57 +0100 Subject: [PATCH 17/36] Restructuring --- clang/lib/Driver/Driver.cpp | 152 ++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 78 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 2dc7ee4c35114..370853eed9bec 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4243,8 +4243,7 @@ class OffloadingActionBuilder final { return ABRT_Success; } - ActionBuilderReturnCode - addDeviceDepences(Action *HostAction) override { + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { // If this is an input action replicate it for each OpenMP toolchain. if (auto *IA = dyn_cast(HostAction)) { @@ -4865,8 +4864,7 @@ class OffloadingActionBuilder final { {"libsycl-itt-user-wrappers", "internal"}, {"libsycl-itt-compiler-wrappers", "internal"}, {"libsycl-itt-stubs", "internal"}}; - auto addInputs = [&](const SYCLDeviceLibsList &LibsList, - bool LinkNVPTXLibs = false) { + auto addInputs = [&](const SYCLDeviceLibsList &LibsList) { bool LibLocSelected = false; for (const auto &LLCandidate : LibLocCandidates) { if (LibLocSelected) @@ -4904,86 +4902,84 @@ class OffloadingActionBuilder final { } } } - - // For NVPTX backend we need to also include libclc and CUDA libdevice - // at the same stage that we link all of the unbundled SYCL libdevice - // objects together. - if (TC->getTriple().isNVPTX() && LinkNVPTXLibs && - NumOfDeviceLibLinked) { - std::string LibSpirvFile; - if (Args.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) { - auto ProvidedPath = - Args.getLastArgValue( - clang::driver::options::OPT_fsycl_libspirv_path_EQ) - .str(); - if (llvm::sys::fs::exists(ProvidedPath)) - LibSpirvFile = ProvidedPath; - } else { - SmallVector LibraryPaths; - - // Expected path w/out install. - SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir); - llvm::sys::path::append(WithoutInstallPath, Twine("../../clc")); - LibraryPaths.emplace_back(WithoutInstallPath.c_str()); - - // Expected path w/ install. - SmallString<256> WithInstallPath(C.getDriver().ResourceDir); - llvm::sys::path::append(WithInstallPath, - Twine("../../../share/clc")); - LibraryPaths.emplace_back(WithInstallPath.c_str()); - - // Select remangled libclc variant - std::string LibSpirvTargetName = - (TC->getAuxTriple()->isOSWindows()) - ? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl." - "bc" - : "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl." - "bc"; - - for (StringRef LibraryPath : LibraryPaths) { - SmallString<128> LibSpirvTargetFile(LibraryPath); - llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName); - if (llvm::sys::fs::exists(LibSpirvTargetFile) || - Args.hasArg(options::OPT__HASH_HASH_HASH)) { - LibSpirvFile = std::string(LibSpirvTargetFile.str()); - break; - } - } - } - - if (!LibSpirvFile.empty()) { - Arg *LibClcInputArg = - MakeInputArg(Args, C.getDriver().getOpts(), - Args.MakeArgString(LibSpirvFile)); - auto *SYCLLibClcInputAction = - C.MakeAction(*LibClcInputArg, types::TY_Object); - DeviceLinkObjects.push_back(SYCLLibClcInputAction); - } - - const toolchains::CudaToolChain *CudaTC = - static_cast(TC); - for (auto LinkInputEnum : enumerate(DeviceLinkerInputs)) { - const char *BoundArch = - SYCLTargetInfoList[LinkInputEnum.index()].BoundArch; - std::string LibDeviceFile = - CudaTC->CudaInstallation.getLibDeviceFile(BoundArch); - if (!LibDeviceFile.empty()) { - Arg *CudaDeviceLibInputArg = - MakeInputArg(Args, C.getDriver().getOpts(), - Args.MakeArgString(LibDeviceFile)); - auto *SYCLDeviceLibInputAction = C.MakeAction( - *CudaDeviceLibInputArg, types::TY_Object); - DeviceLinkObjects.push_back(SYCLDeviceLibInputAction); - } - } - } }; + addInputs(sycl_device_wrapper_libs); if (isSpirvAOT || TC->getTriple().isNVPTX()) addInputs(sycl_device_fallback_libs); if (Args.hasFlag(options::OPT_fsycl_instrument_device_code, options::OPT_fno_sycl_instrument_device_code, true)) - addInputs(sycl_device_annotation_libs, true); + addInputs(sycl_device_annotation_libs); + + // For NVPTX backend we need to also link libclc and CUDA libdevice + // at the same stage that we link all of the unbundled SYCL libdevice + // objects together. + if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) { + std::string LibSpirvFile; + if (Args.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) { + auto ProvidedPath = + Args.getLastArgValue( + clang::driver::options::OPT_fsycl_libspirv_path_EQ) + .str(); + if (llvm::sys::fs::exists(ProvidedPath)) + LibSpirvFile = ProvidedPath; + } else { + SmallVector LibraryPaths; + + // Expected path w/out install. + SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithoutInstallPath, Twine("../../clc")); + LibraryPaths.emplace_back(WithoutInstallPath.c_str()); + + // Expected path w/ install. + SmallString<256> WithInstallPath(C.getDriver().ResourceDir); + llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc")); + LibraryPaths.emplace_back(WithInstallPath.c_str()); + + // Select remangled libclc variant + std::string LibSpirvTargetName = + (TC->getAuxTriple()->isOSWindows()) + ? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl." + "bc" + : "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl." + "bc"; + + for (StringRef LibraryPath : LibraryPaths) { + SmallString<128> LibSpirvTargetFile(LibraryPath); + llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName); + if (llvm::sys::fs::exists(LibSpirvTargetFile) || + Args.hasArg(options::OPT__HASH_HASH_HASH)) { + LibSpirvFile = std::string(LibSpirvTargetFile.str()); + break; + } + } + } + + if (!LibSpirvFile.empty()) { + Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibSpirvFile)); + auto *SYCLLibClcInputAction = + C.MakeAction(*LibClcInputArg, types::TY_Object); + DeviceLinkObjects.push_back(SYCLLibClcInputAction); + } + + const toolchains::CudaToolChain *CudaTC = + static_cast(TC); + for (auto LinkInputEnum : enumerate(DeviceLinkerInputs)) { + const char *BoundArch = + SYCLTargetInfoList[LinkInputEnum.index()].BoundArch; + std::string LibDeviceFile = + CudaTC->CudaInstallation.getLibDeviceFile(BoundArch); + if (!LibDeviceFile.empty()) { + Arg *CudaDeviceLibInputArg = + MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibDeviceFile)); + auto *SYCLDeviceLibInputAction = C.MakeAction( + *CudaDeviceLibInputArg, types::TY_Object); + DeviceLinkObjects.push_back(SYCLDeviceLibInputAction); + } + } + } return NumOfDeviceLibLinked != 0; } From 04b65f6f552ff494f05a3cdb47b1688744149f26 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 27 Jul 2022 11:36:49 +0100 Subject: [PATCH 18/36] Tidying --- libdevice/device.h | 2 -- libdevice/spirv_vars.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/libdevice/device.h b/libdevice/device.h index 5aeaedca342cf..0770d7d82d29a 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -25,7 +25,6 @@ #define DEVICE_EXTERN_C DEVICE_EXTERNAL EXTERN_C #define DEVICE_EXTERN_C_INLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) -#define DEVICE_EXTERNAL_INLINE DEVICE_EXTERNAL __attribute__((always_inline)) #endif // __SPIR__ || __NVPTX__ #if defined(__SPIR__) || defined(__LIBDEVICE_HOST_IMPL__) @@ -37,7 +36,6 @@ // and it will be linked with user's host code by default. If those functions // are decorated with "weak" attribute, compiler will use PLT entry to call // all __device_imf_* functions, this will lead to crash. -#define DEVICE_EXTERNAL_INLINE DEVICE_EXTERNAL __attribute__((always_inline)) #define DEVICE_EXTERN_C EXTERN_C #define DEVICE_EXTERN_C_INLINE DEVICE_EXTERN_C __attribute__((always_inline)) #endif // __LIBDEVICE_HOST_IMPL__ diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 2b8c5ee41e6f0..640f0dbf3cd9e 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -18,7 +18,7 @@ #define __SPIRV_VAR_QUALIFIERS EXTERN_C const typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; __SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; From 9df15a86a399cc52a7ac8ba6b042ff40f4602c8d Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 27 Jul 2022 12:09:13 +0100 Subject: [PATCH 19/36] Remove typo --- clang/lib/Driver/Driver.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 370853eed9bec..508bc54b269e2 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7861,11 +7861,6 @@ InputInfoList Driver::BuildJobsForActionNoCache( // be returned for the current depending action. std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; -/* - if (CachedResults.find(ActionTC) == CachedResults.end()) - ActionTC = { - A, GetTriplePlusArchString(TC, "", TargetDeviceOffloadKind)}; -*/ assert((CachedResults.find(ActionTC) != CachedResults.end()) && "Result does not exist??"); Result = CachedResults[ActionTC].front(); From d312088cafa80007322804d0d76e71e41507d9f2 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 27 Jul 2022 12:10:19 +0100 Subject: [PATCH 20/36] Typo --- clang/lib/Driver/Driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 508bc54b269e2..1b642337846e3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7861,7 +7861,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( // be returned for the current depending action. std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; - assert((CachedResults.find(ActionTC) != CachedResults.end()) && + assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); Result = CachedResults[ActionTC].front(); } From c07d2e29041fc4bf21875e65e34420d6cf7691cd Mon Sep 17 00:00:00 2001 From: Hugh Delaney <46290137+hdelan@users.noreply.github.com> Date: Wed, 27 Jul 2022 14:47:16 +0100 Subject: [PATCH 21/36] Update clang/include/clang/Basic/CodeGenOptions.def Co-authored-by: smanna12 --- clang/include/clang/Basic/CodeGenOptions.def | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 4585253657938..59bfbfed1b148 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -127,7 +127,8 @@ CODEGENOPT(XRayAlwaysEmitTypedEvents , 1, 0) ///< Set when -fxray-ignore-loops is enabled. CODEGENOPT(XRayIgnoreLoops , 1, 0) -CODEGENOPT(BundleNoOffloadArch, 1, 0) ///< Set when -fbundle-no-offload-arch is enabled. +///< Set when -fbundle-no-offload-arch is enabled. +CODEGENOPT(BundleNoOffloadArch, 1, 0) ///< Set with -fno-xray-function-index to omit the index section. CODEGENOPT(XRayOmitFunctionIndex , 1, 0) From 08683ef79d05ae5238bf516c89d33dcebbfcfb5f Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 27 Jul 2022 15:08:21 +0100 Subject: [PATCH 22/36] clang-format --- clang/lib/Driver/Driver.cpp | 5 ++--- clang/lib/Driver/ToolChains/SYCL.h | 1 - libdevice/crt_wrapper.cpp | 2 +- libdevice/device_math.h | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ea0358e486bde..70b74787115c3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4517,8 +4517,7 @@ class OffloadingActionBuilder final { const Driver::InputList &Inputs, OffloadingActionBuilder &OAB) : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB), - SYCLInstallation(C.getDriver()) {} - + SYCLInstallation(C.getDriver()) {} void withBoundArchForToolChain(const ToolChain *TC, llvm::function_ref Op) { @@ -4970,7 +4969,7 @@ class OffloadingActionBuilder final { } } }; - + addInputs(sycl_device_wrapper_libs); if (isSpirvAOT || TC->getTriple().isNVPTX()) addInputs(sycl_device_fallback_libs); diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 7d8ad328ad083..7281b50865f69 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -172,7 +172,6 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override; - const ToolChain &HostTC; protected: diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index d0f3709233795..94481bc640de1 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -8,7 +8,7 @@ #include "wrapper.h" -#if defined(__SPIR__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__NVPTX__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); diff --git a/libdevice/device_math.h b/libdevice/device_math.h index 10a23c8748678..006d2d5b8c57c 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,7 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#if defined(__SPIR__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__NVPTX__) #include typedef struct { From 8146781d8c7addf846d26621292016c07181912c Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 11:43:23 +0100 Subject: [PATCH 23/36] Change flag and fix clang tests. --- clang/include/clang/Driver/Options.td | 9 +- clang/lib/Driver/ToolChains/Clang.cpp | 4 +- clang/test/Driver/sycl-cuda-tu-offload.cu | 79 +++++++++-- clang/test/Driver/sycl-instrumentation.c | 3 +- clang/test/Driver/sycl-offload-nvptx.cpp | 164 ++++++++++++++++++---- 5 files changed, 213 insertions(+), 46 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e618d5ed43691..9f650e9821dda 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -956,13 +956,12 @@ def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOpt HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>, HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">; -def fbundle_no_offload_arch : Flag<["-"], "fbundle-no-offload-arch">, Flags<[CC1Option]>, +def fno_bundle_offload_arch : Flag<["-"], "fno-bundle-offload-arch">, HelpText<"Specify that the offload bundler should not identify a bundle with " - "any specific arch. When used the bundle for, for instance, " - "`nvptx64-nvidia-cuda-sm_80` becomes instead `nvptx64-nvidia-cuda`. " + "specific arch. For example, the bundle for `nvptx64-nvidia-cuda-sm_80` " + "uses the bundle tag `nvptx64-nvidia-cuda` when used. " "This allows .o files to contain .bc bundles that are unspecific " - "to a particular arch version.">, - MarshallingInfoFlag>; + "to a particular arch version.">; def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>, HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target ID in the form of a " "device architecture followed by target ID features delimited by a colon. Each target ID feature " diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 028da4cbb1a24..125538ba87f45 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8731,7 +8731,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP || CurKind == Action::OFK_Cuda || CurKind == Action::OFK_SYCL) && !StringRef(CurDep->getOffloadingArch()).empty() && - !TCArgs.hasArg(options::OPT_fbundle_no_offload_arch)) { + !TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) { Triples += '-'; Triples += CurDep->getOffloadingArch(); } @@ -8912,7 +8912,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( Dep.DependentOffloadKind == Action::OFK_Cuda || Dep.DependentOffloadKind == Action::OFK_SYCL) && !Dep.DependentBoundArch.empty() && - !TCArgs.hasArg(options::OPT_fbundle_no_offload_arch)) { + !TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) { Triples += '-'; Triples += Dep.DependentBoundArch; } diff --git a/clang/test/Driver/sycl-cuda-tu-offload.cu b/clang/test/Driver/sycl-cuda-tu-offload.cu index 113112798e666..18f968bfa3cbb 100644 --- a/clang/test/Driver/sycl-cuda-tu-offload.cu +++ b/clang/test/Driver/sycl-cuda-tu-offload.cu @@ -37,14 +37,71 @@ // DEFAULT-PHASES2: +- 13: assembler, {12}, object, (host-cuda-sycl) // DEFAULT-PHASES2: +- 14: offload, "host-cuda-sycl (x86_64-unknown-linux-gnu)" {13}, object // DEFAULT-PHASES2:+- 15: linker, {14}, image, (host-cuda-sycl) -// DEFAULT-PHASES2:| +- 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_80)" {5}, ir -// DEFAULT-PHASES2:| +- 17: linker, {16}, ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| +- 18: sycl-post-link, {17}, ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | +- 19: file-table-tform, {18}, ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | | +- 20: backend, {19}, assembler, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | | |- 21: assembler, {20}, object, (device-sycl, sm_80) -// DEFAULT-PHASES2:| | |- 22: linker, {20, 21}, cuda-fatbin, (device-sycl, sm_80) -// DEFAULT-PHASES2:| |- 23: foreach, {19, 22}, cuda-fatbin, (device-sycl, sm_80) -// DEFAULT-PHASES2:| +- 24: file-table-tform, {18, 23}, tempfiletable, (device-sycl, sm_80) -// DEFAULT-PHASES2:|- 25: clang-offload-wrapper, {24}, object, (device-sycl, sm_80) -// DEFAULT-PHASES2:26: offload, "host-cuda-sycl (x86_64-unknown-linux-gnu)" {15}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {25}, image +// DEFAULT-PHASES2:| +- 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_80)" {5}, ir +// DEFAULT-PHASES2:| +- 17: linker, {16}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | +- 18: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 19: clang-offload-unbundler, {18}, object +// DEFAULT-PHASES2:| |- 20: offload, " (nvptx64-nvidia-cuda)" {19}, object +// DEFAULT-PHASES2:| | +- 21: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 22: clang-offload-unbundler, {21}, object +// DEFAULT-PHASES2:| |- 23: offload, " (nvptx64-nvidia-cuda)" {22}, object +// DEFAULT-PHASES2:| | +- 24: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 25: clang-offload-unbundler, {24}, object +// DEFAULT-PHASES2:| |- 26: offload, " (nvptx64-nvidia-cuda)" {25}, object +// DEFAULT-PHASES2:| | +- 27: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 28: clang-offload-unbundler, {27}, object +// DEFAULT-PHASES2:| |- 29: offload, " (nvptx64-nvidia-cuda)" {28}, object +// DEFAULT-PHASES2:| | +- 30: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 31: clang-offload-unbundler, {30}, object +// DEFAULT-PHASES2:| |- 32: offload, " (nvptx64-nvidia-cuda)" {31}, object +// DEFAULT-PHASES2:| | +- 33: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 34: clang-offload-unbundler, {33}, object +// DEFAULT-PHASES2:| |- 35: offload, " (nvptx64-nvidia-cuda)" {34}, object +// DEFAULT-PHASES2:| | +- 36: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 37: clang-offload-unbundler, {36}, object +// DEFAULT-PHASES2:| |- 38: offload, " (nvptx64-nvidia-cuda)" {37}, object +// DEFAULT-PHASES2:| | +- 39: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 40: clang-offload-unbundler, {39}, object +// DEFAULT-PHASES2:| |- 41: offload, " (nvptx64-nvidia-cuda)" {40}, object +// DEFAULT-PHASES2:| | +- 42: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 43: clang-offload-unbundler, {42}, object +// DEFAULT-PHASES2:| |- 44: offload, " (nvptx64-nvidia-cuda)" {43}, object +// DEFAULT-PHASES2:| | +- 45: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 46: clang-offload-unbundler, {45}, object +// DEFAULT-PHASES2:| |- 47: offload, " (nvptx64-nvidia-cuda)" {46}, object +// DEFAULT-PHASES2:| | +- 48: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 49: clang-offload-unbundler, {48}, object +// DEFAULT-PHASES2:| |- 50: offload, " (nvptx64-nvidia-cuda)" {49}, object +// DEFAULT-PHASES2:| | +- 51: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 52: clang-offload-unbundler, {51}, object +// DEFAULT-PHASES2:| |- 53: offload, " (nvptx64-nvidia-cuda)" {52}, object +// DEFAULT-PHASES2:| | +- 54: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 55: clang-offload-unbundler, {54}, object +// DEFAULT-PHASES2:| |- 56: offload, " (nvptx64-nvidia-cuda)" {55}, object +// DEFAULT-PHASES2:| | +- 57: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 58: clang-offload-unbundler, {57}, object +// DEFAULT-PHASES2:| |- 59: offload, " (nvptx64-nvidia-cuda)" {58}, object +// DEFAULT-PHASES2:| | +- 60: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 61: clang-offload-unbundler, {60}, object +// DEFAULT-PHASES2:| |- 62: offload, " (nvptx64-nvidia-cuda)" {61}, object +// DEFAULT-PHASES2:| | +- 63: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 64: clang-offload-unbundler, {63}, object +// DEFAULT-PHASES2:| |- 65: offload, " (nvptx64-nvidia-cuda)" {64}, object +// DEFAULT-PHASES2:| | +- 66: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 67: clang-offload-unbundler, {66}, object +// DEFAULT-PHASES2:| |- 68: offload, " (nvptx64-nvidia-cuda)" {67}, object +// DEFAULT-PHASES2:| | +- 69: input, "{{.*}}", object +// DEFAULT-PHASES2:| | +- 70: clang-offload-unbundler, {69}, object +// DEFAULT-PHASES2:| |- 71: offload, " (nvptx64-nvidia-cuda)" {70}, object +// DEFAULT-PHASES2:| |- 72: input, "{{.*}}", object, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 73: input, "/opt/cuda/nvvm/libdevice/libdevice.10.bc", object, (device-sycl, sm_80) +// DEFAULT-PHASES2:| +- 74: linker, {17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 72, 73}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| +- 75: sycl-post-link, {74}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | +- 76: file-table-tform, {75}, ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | | +- 77: backend, {76}, assembler, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | | |- 78: assembler, {77}, object, (device-sycl, sm_80) +// DEFAULT-PHASES2:| | |- 79: linker, {77, 78}, cuda-fatbin, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 80: foreach, {76, 79}, cuda-fatbin, (device-sycl, sm_80) +// DEFAULT-PHASES2:| +- 81: file-table-tform, {75, 80}, tempfiletable, (device-sycl, sm_80) +// DEFAULT-PHASES2:|- 82: clang-offload-wrapper, {81}, object, (device-sycl, sm_80) +// DEFAULT-PHASES2:83: offload, "host-cuda-sycl (x86_64-unknown-linux-gnu)" {15}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {82}, image diff --git a/clang/test/Driver/sycl-instrumentation.c b/clang/test/Driver/sycl-instrumentation.c index af1c40d1d632d..7689b83f89356 100644 --- a/clang/test/Driver/sycl-instrumentation.c +++ b/clang/test/Driver/sycl-instrumentation.c @@ -22,7 +22,6 @@ // RUN: %clangxx -fsycl -fno-sycl-instrument-device-code -fsycl-targets=spir64 -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s // RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-sycl-instrument-device-code -nocudalib -### %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHECK-NONPASSED,CHECK-WARNING %s +// RUN: | FileCheck -check-prefixes=CHECK-NONPASSED %s // CHECK-NONPASSED-NOT: "-fsycl-instrument-device-code" // CHECK-NONPASSED-NOT: "-input={{.*}}libsycl-itt-{{.*}}.{{o|obj}}" -// CHECK-WARNING: warning: argument unused during compilation: '-fno-sycl-instrument-device-code' diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index 455d0cd0c238d..0a5674e931243 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -38,9 +38,7 @@ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s // -// RUN: %clang_cl -ccc-print-phases -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s +// TODO: Enable for clang_cl once device lib linking works for clang_cl // // CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl) // CHK-PHASES-NO-CC: 1: append-footer, {0}, c++, (host-sycl) @@ -54,26 +52,81 @@ // CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl) // CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 16: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 17: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {19}, image - +// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-crt.o", object +// CHK-PHASES-NO-CC: 13: clang-offload-unbundler, {12}, object +// CHK-PHASES-NO-CC: 14: offload, " (nvptx64-nvidia-cuda)" {13}, object +// CHK-PHASES-NO-CC: 15: input, "{{.*}}libsycl-complex.o", object +// CHK-PHASES-NO-CC: 16: clang-offload-unbundler, {15}, object +// CHK-PHASES-NO-CC: 17: offload, " (nvptx64-nvidia-cuda)" {16}, object +// CHK-PHASES-NO-CC: 18: input, "{{.*}}libsycl-complex-fp64.o", object +// CHK-PHASES-NO-CC: 19: clang-offload-unbundler, {18}, object +// CHK-PHASES-NO-CC: 20: offload, " (nvptx64-nvidia-cuda)" {19}, object +// CHK-PHASES-NO-CC: 21: input, "{{.*}}libsycl-cmath.o", object +// CHK-PHASES-NO-CC: 22: clang-offload-unbundler, {21}, object +// CHK-PHASES-NO-CC: 23: offload, " (nvptx64-nvidia-cuda)" {22}, object +// CHK-PHASES-NO-CC: 24: input, "{{.*}}libsycl-cmath-fp64.o", object +// CHK-PHASES-NO-CC: 25: clang-offload-unbundler, {24}, object +// CHK-PHASES-NO-CC: 26: offload, " (nvptx64-nvidia-cuda)" {25}, object +// CHK-PHASES-NO-CC: 27: input, "{{.*}}libsycl-imf.o", object +// CHK-PHASES-NO-CC: 28: clang-offload-unbundler, {27}, object +// CHK-PHASES-NO-CC: 29: offload, " (nvptx64-nvidia-cuda)" {28}, object +// CHK-PHASES-NO-CC: 30: input, "{{.*}}libsycl-imf-fp64.o", object +// CHK-PHASES-NO-CC: 31: clang-offload-unbundler, {30}, object +// CHK-PHASES-NO-CC: 32: offload, " (nvptx64-nvidia-cuda)" {31}, object +// CHK-PHASES-NO-CC: 33: input, "{{.*}}libsycl-fallback-cassert.o", object +// CHK-PHASES-NO-CC: 34: clang-offload-unbundler, {33}, object +// CHK-PHASES-NO-CC: 35: offload, " (nvptx64-nvidia-cuda)" {34}, object +// CHK-PHASES-NO-CC: 36: input, "{{.*}}libsycl-fallback-cstring.o", object +// CHK-PHASES-NO-CC: 37: clang-offload-unbundler, {36}, object +// CHK-PHASES-NO-CC: 38: offload, " (nvptx64-nvidia-cuda)" {37}, object +// CHK-PHASES-NO-CC: 39: input, "{{.*}}libsycl-fallback-complex.o", object +// CHK-PHASES-NO-CC: 40: clang-offload-unbundler, {39}, object +// CHK-PHASES-NO-CC: 41: offload, " (nvptx64-nvidia-cuda)" {40}, object +// CHK-PHASES-NO-CC: 42: input, "{{.*}}libsycl-fallback-complex-fp64.o", object +// CHK-PHASES-NO-CC: 43: clang-offload-unbundler, {42}, object +// CHK-PHASES-NO-CC: 44: offload, " (nvptx64-nvidia-cuda)" {43}, object +// CHK-PHASES-NO-CC: 45: input, "{{.*}}libsycl-fallback-cmath.o", object +// CHK-PHASES-NO-CC: 46: clang-offload-unbundler, {45}, object +// CHK-PHASES-NO-CC: 47: offload, " (nvptx64-nvidia-cuda)" {46}, object +// CHK-PHASES-NO-CC: 48: input, "{{.*}}libsycl-fallback-cmath-fp64.o", object +// CHK-PHASES-NO-CC: 49: clang-offload-unbundler, {48}, object +// CHK-PHASES-NO-CC: 50: offload, " (nvptx64-nvidia-cuda)" {49}, object +// CHK-PHASES-NO-CC: 51: input, "{{.*}}libsycl-fallback-imf.o", object +// CHK-PHASES-NO-CC: 52: clang-offload-unbundler, {51}, object +// CHK-PHASES-NO-CC: 53: offload, " (nvptx64-nvidia-cuda)" {52}, object +// CHK-PHASES-NO-CC: 54: input, "{{.*}}libsycl-fallback-imf-fp64.o", object +// CHK-PHASES-NO-CC: 55: clang-offload-unbundler, {54}, object +// CHK-PHASES-NO-CC: 56: offload, " (nvptx64-nvidia-cuda)" {55}, object +// CHK-PHASES-NO-CC: 57: input, "{{.*}}libsycl-itt-user-wrappers.o", object +// CHK-PHASES-NO-CC: 58: clang-offload-unbundler, {57}, object +// CHK-PHASES-NO-CC: 59: offload, " (nvptx64-nvidia-cuda)" {58}, object +// CHK-PHASES-NO-CC: 60: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object +// CHK-PHASES-NO-CC: 61: clang-offload-unbundler, {60}, object +// CHK-PHASES-NO-CC: 62: offload, " (nvptx64-nvidia-cuda)" {61}, object +// CHK-PHASES-NO-CC: 63: input, "{{.*}}libsycl-itt-stubs.o", object +// CHK-PHASES-NO-CC: 64: clang-offload-unbundler, {63}, object +// CHK-PHASES-NO-CC: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object +// CHK-PHASES-NO-CC: 66: input, "{{.*}}spirv-nvptx64--nvidiacl.bc", object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 67: input, "{{.*}}libdevice{{.*}}bc", object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 69: sycl-post-link, {68}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 70: file-table-tform, {69}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 71: backend, {70}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 72: assembler, {71}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 73: linker, {71, 72}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 74: foreach, {70, 73}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 75: file-table-tform, {69, 74}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 76: clang-offload-wrapper, {75}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 77: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {76}, image +// +// /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda \ // RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES %s // -// RUN: %clang_cl -ccc-print-phases -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-cuda \ -// RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PHASES %s +// TODO: Enable for clang_cl once device lib linking works for clang_cl // // CHK-PHASES: 0: input, "{{.*}}", c++, (host-sycl) // CHK-PHASES: 1: append-footer, {0}, c++, (host-sycl) @@ -87,15 +140,72 @@ // CHK-PHASES: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES: 10: linker, {9}, image, (host-sycl) // CHK-PHASES: 11: linker, {5}, ir, (device-sycl, sm_35) -// CHK-PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_35) -// CHK-PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_35) -// CHK-PHASES: 14: backend, {13}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 15: assembler, {14}, object, (device-sycl, sm_35) -// CHK-PHASES: 16: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 17: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 19: clang-offload-wrapper, {18}, object, (device-sycl, sm_35) -// CHK-PHASES: 20: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {19}, image +// CHK-PHASES: 12: input, "{{.*}}libsycl-crt.o", object +// CHK-PHASES: 13: clang-offload-unbundler, {12}, object +// CHK-PHASES: 14: offload, " (nvptx64-nvidia-cuda)" {13}, object +// CHK-PHASES: 15: input, "{{.*}}libsycl-complex.o", object +// CHK-PHASES: 16: clang-offload-unbundler, {15}, object +// CHK-PHASES: 17: offload, " (nvptx64-nvidia-cuda)" {16}, object +// CHK-PHASES: 18: input, "{{.*}}libsycl-complex-fp64.o", object +// CHK-PHASES: 19: clang-offload-unbundler, {18}, object +// CHK-PHASES: 20: offload, " (nvptx64-nvidia-cuda)" {19}, object +// CHK-PHASES: 21: input, "{{.*}}libsycl-cmath.o", object +// CHK-PHASES: 22: clang-offload-unbundler, {21}, object +// CHK-PHASES: 23: offload, " (nvptx64-nvidia-cuda)" {22}, object +// CHK-PHASES: 24: input, "{{.*}}libsycl-cmath-fp64.o", object +// CHK-PHASES: 25: clang-offload-unbundler, {24}, object +// CHK-PHASES: 26: offload, " (nvptx64-nvidia-cuda)" {25}, object +// CHK-PHASES: 27: input, "{{.*}}libsycl-imf.o", object +// CHK-PHASES: 28: clang-offload-unbundler, {27}, object +// CHK-PHASES: 29: offload, " (nvptx64-nvidia-cuda)" {28}, object +// CHK-PHASES: 30: input, "{{.*}}libsycl-imf-fp64.o", object +// CHK-PHASES: 31: clang-offload-unbundler, {30}, object +// CHK-PHASES: 32: offload, " (nvptx64-nvidia-cuda)" {31}, object +// CHK-PHASES: 33: input, "{{.*}}libsycl-fallback-cassert.o", object +// CHK-PHASES: 34: clang-offload-unbundler, {33}, object +// CHK-PHASES: 35: offload, " (nvptx64-nvidia-cuda)" {34}, object +// CHK-PHASES: 36: input, "{{.*}}libsycl-fallback-cstring.o", object +// CHK-PHASES: 37: clang-offload-unbundler, {36}, object +// CHK-PHASES: 38: offload, " (nvptx64-nvidia-cuda)" {37}, object +// CHK-PHASES: 39: input, "{{.*}}libsycl-fallback-complex.o", object +// CHK-PHASES: 40: clang-offload-unbundler, {39}, object +// CHK-PHASES: 41: offload, " (nvptx64-nvidia-cuda)" {40}, object +// CHK-PHASES: 42: input, "{{.*}}libsycl-fallback-complex-fp64.o", object +// CHK-PHASES: 43: clang-offload-unbundler, {42}, object +// CHK-PHASES: 44: offload, " (nvptx64-nvidia-cuda)" {43}, object +// CHK-PHASES: 45: input, "{{.*}}libsycl-fallback-cmath.o", object +// CHK-PHASES: 46: clang-offload-unbundler, {45}, object +// CHK-PHASES: 47: offload, " (nvptx64-nvidia-cuda)" {46}, object +// CHK-PHASES: 48: input, "{{.*}}libsycl-fallback-cmath-fp64.o", object +// CHK-PHASES: 49: clang-offload-unbundler, {48}, object +// CHK-PHASES: 50: offload, " (nvptx64-nvidia-cuda)" {49}, object +// CHK-PHASES: 51: input, "{{.*}}libsycl-fallback-imf.o", object +// CHK-PHASES: 52: clang-offload-unbundler, {51}, object +// CHK-PHASES: 53: offload, " (nvptx64-nvidia-cuda)" {52}, object +// CHK-PHASES: 54: input, "{{.*}}libsycl-fallback-imf-fp64.o", object +// CHK-PHASES: 55: clang-offload-unbundler, {54}, object +// CHK-PHASES: 56: offload, " (nvptx64-nvidia-cuda)" {55}, object +// CHK-PHASES: 57: input, "{{.*}}libsycl-itt-user-wrappers.o", object +// CHK-PHASES: 58: clang-offload-unbundler, {57}, object +// CHK-PHASES: 59: offload, " (nvptx64-nvidia-cuda)" {58}, object +// CHK-PHASES: 60: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object +// CHK-PHASES: 61: clang-offload-unbundler, {60}, object +// CHK-PHASES: 62: offload, " (nvptx64-nvidia-cuda)" {61}, object +// CHK-PHASES: 63: input, "{{.*}}libsycl-itt-stubs.o", object +// CHK-PHASES: 64: clang-offload-unbundler, {63}, object +// CHK-PHASES: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object +// CHK-PHASES: 66: input, "{{.*}}spirv-nvptx64--nvidiacl.bc", object, (device-sycl, sm_35) +// CHK-PHASES: 67: input, "{{.*}}libdevice{{.*}}bc", object, (device-sycl, sm_35) +// CHK-PHASES: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_35) +// CHK-PHASES: 69: sycl-post-link, {68}, ir, (device-sycl, sm_35) +// CHK-PHASES: 70: file-table-tform, {69}, ir, (device-sycl, sm_35) +// CHK-PHASES: 71: backend, {70}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 72: assembler, {71}, object, (device-sycl, sm_35) +// CHK-PHASES: 73: linker, {71, 72}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 74: foreach, {70, 73}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 75: file-table-tform, {69, 74}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 76: clang-offload-wrapper, {75}, object, (device-sycl, sm_35) +// CHK-PHASES: 77: offload, "host-sycl (x86_64-{{.*}})" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {76}, image /// Check calling preprocessor only // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ @@ -104,6 +214,8 @@ // CHK-PREPROC: 2: offload, "device-sycl (nvptx64-nvidia-cuda:sm_[[CUDA_VERSION]])" {1}, c++-cpp-output // CHK-PREPROC: 4: compiler, {1}, none, (device-sycl, sm_[[CUDA_VERSION]]) // +// +// // RUN: %clangxx -### -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/no/CUDA/path/here \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc %s 2>&1 \ From 5c3116e86a66dab29b10ce7d6cae7ddb4f1b8edf Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 11:47:44 +0100 Subject: [PATCH 24/36] Respond to comments --- clang/lib/Driver/Driver.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 70b74787115c3..bcdeb90184ed8 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4982,11 +4982,9 @@ class OffloadingActionBuilder final { // objects together. if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) { std::string LibSpirvFile; - if (Args.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) { + if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) { auto ProvidedPath = - Args.getLastArgValue( - clang::driver::options::OPT_fsycl_libspirv_path_EQ) - .str(); + Args.getLastArgValue(options::OPT_fsycl_libspirv_path_EQ).str(); if (llvm::sys::fs::exists(ProvidedPath)) LibSpirvFile = ProvidedPath; } else { From 8e35ff30ff8c83d567c6c1daec59c1a497b6fad6 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 12:04:28 +0100 Subject: [PATCH 25/36] cmake fix --- libdevice/cmake/modules/SYCLLibdevice.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 2026cd9a63c8e..02b2dbfdf9875 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -36,7 +36,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") list(APPEND compile_opts "-fno-sycl-libspirv" - "-fbundle-no-offload-arch") + "-fno-bundle-offload-arch") endif() if (WIN32) From da0ede398b439adecd1f9e317f9cadf23ce8d7e3 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 12:26:32 +0100 Subject: [PATCH 26/36] Remove codegen opt --- clang/include/clang/Basic/CodeGenOptions.def | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 84c8f2a4b972b..6783d97e87739 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -128,9 +128,6 @@ CODEGENOPT(XRayAlwaysEmitTypedEvents , 1, 0) ///< Set when -fxray-ignore-loops is enabled. CODEGENOPT(XRayIgnoreLoops , 1, 0) -///< Set when -fbundle-no-offload-arch is enabled. -CODEGENOPT(BundleNoOffloadArch, 1, 0) - ///< Set with -fno-xray-function-index to omit the index section. CODEGENOPT(XRayOmitFunctionIndex , 1, 0) From 005e012d62b9b7ecf8ee066a39fa6ef0c387d9cf Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 16:34:54 +0100 Subject: [PATCH 27/36] Fix path --- clang/test/Driver/sycl-cuda-tu-offload.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-cuda-tu-offload.cu b/clang/test/Driver/sycl-cuda-tu-offload.cu index 18f968bfa3cbb..bb2683b8b4aba 100644 --- a/clang/test/Driver/sycl-cuda-tu-offload.cu +++ b/clang/test/Driver/sycl-cuda-tu-offload.cu @@ -94,7 +94,7 @@ // DEFAULT-PHASES2:| | +- 70: clang-offload-unbundler, {69}, object // DEFAULT-PHASES2:| |- 71: offload, " (nvptx64-nvidia-cuda)" {70}, object // DEFAULT-PHASES2:| |- 72: input, "{{.*}}", object, (device-sycl, sm_80) -// DEFAULT-PHASES2:| |- 73: input, "/opt/cuda/nvvm/libdevice/libdevice.10.bc", object, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 73: input, "{{.*}}libdevice.10.bc", object, (device-sycl, sm_80) // DEFAULT-PHASES2:| +- 74: linker, {17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 72, 73}, ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| +- 75: sycl-post-link, {74}, ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| | +- 76: file-table-tform, {75}, ir, (device-sycl, sm_80) From 9e23342fe1f59edff7cd6441d4c87d743f248068 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 17:33:43 +0100 Subject: [PATCH 28/36] Change obj types to LLVM BC --- clang/lib/Driver/Driver.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index bcdeb90184ed8..1774b49832437 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5023,7 +5023,7 @@ class OffloadingActionBuilder final { Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibSpirvFile)); auto *SYCLLibClcInputAction = - C.MakeAction(*LibClcInputArg, types::TY_Object); + C.MakeAction(*LibClcInputArg, types::TY_LLVM_BC); DeviceLinkObjects.push_back(SYCLLibClcInputAction); } @@ -5039,7 +5039,7 @@ class OffloadingActionBuilder final { MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibDeviceFile)); auto *SYCLDeviceLibInputAction = C.MakeAction( - *CudaDeviceLibInputArg, types::TY_Object); + *CudaDeviceLibInputArg, types::TY_LLVM_BC); DeviceLinkObjects.push_back(SYCLDeviceLibInputAction); } } From 7ac0262b197d57d8d390e9db7a8adfa95d0d351b Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 18:34:48 +0100 Subject: [PATCH 29/36] Add new test for fno-bundle-offload-arch and fix other tests --- clang/test/Driver/sycl-cuda-tu-offload.cu | 4 ++-- clang/test/Driver/sycl-no-bundle-offload-arch.cpp | 13 +++++++++++++ clang/test/Driver/sycl-offload-nvptx.cpp | 8 ++++---- 3 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 clang/test/Driver/sycl-no-bundle-offload-arch.cpp diff --git a/clang/test/Driver/sycl-cuda-tu-offload.cu b/clang/test/Driver/sycl-cuda-tu-offload.cu index bb2683b8b4aba..dd6414cc1a95d 100644 --- a/clang/test/Driver/sycl-cuda-tu-offload.cu +++ b/clang/test/Driver/sycl-cuda-tu-offload.cu @@ -93,8 +93,8 @@ // DEFAULT-PHASES2:| | +- 69: input, "{{.*}}", object // DEFAULT-PHASES2:| | +- 70: clang-offload-unbundler, {69}, object // DEFAULT-PHASES2:| |- 71: offload, " (nvptx64-nvidia-cuda)" {70}, object -// DEFAULT-PHASES2:| |- 72: input, "{{.*}}", object, (device-sycl, sm_80) -// DEFAULT-PHASES2:| |- 73: input, "{{.*}}libdevice.10.bc", object, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 72: input, "{{.*}}nvidiacl.bc", ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 73: input, "{{.*}}libdevice.10.bc", ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| +- 74: linker, {17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 72, 73}, ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| +- 75: sycl-post-link, {74}, ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| | +- 76: file-table-tform, {75}, ir, (device-sycl, sm_80) diff --git a/clang/test/Driver/sycl-no-bundle-offload-arch.cpp b/clang/test/Driver/sycl-no-bundle-offload-arch.cpp new file mode 100644 index 0000000000000..6186a3cc8323f --- /dev/null +++ b/clang/test/Driver/sycl-no-bundle-offload-arch.cpp @@ -0,0 +1,13 @@ +// RUN: %clangxx -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fno-bundle-offload-arch -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUNDLE-TAG-OBJ %s +// +// CHK-BUNDLE-TAG-OBJ-NOT: clang-offload-bundler{{.*}}-targets=sycl-nvptx64-nvidia-cuda-sm_" + +// RUN: %clangxx -### -fsycl -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fno-bundle-offload-arch %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUNDLE-TAG %s +// +// CHK-BUNDLE-TAG-NOT: clang-offload-bundler{{.*}}-targets=sycl-nvptx64-nvidia-cuda-sm_" + +void func(){}; diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index 0a5674e931243..ba2fdeacc6e81 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -106,8 +106,8 @@ // CHK-PHASES-NO-CC: 63: input, "{{.*}}libsycl-itt-stubs.o", object // CHK-PHASES-NO-CC: 64: clang-offload-unbundler, {63}, object // CHK-PHASES-NO-CC: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object -// CHK-PHASES-NO-CC: 66: input, "{{.*}}spirv-nvptx64--nvidiacl.bc", object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 67: input, "{{.*}}libdevice{{.*}}bc", object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 66: input, "{{.*}}nvidiacl.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 67: input, "{{.*}}libdevice{{.*}}bc", ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 69: sycl-post-link, {68}, ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 70: file-table-tform, {69}, ir, (device-sycl, sm_50) @@ -194,8 +194,8 @@ // CHK-PHASES: 63: input, "{{.*}}libsycl-itt-stubs.o", object // CHK-PHASES: 64: clang-offload-unbundler, {63}, object // CHK-PHASES: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object -// CHK-PHASES: 66: input, "{{.*}}spirv-nvptx64--nvidiacl.bc", object, (device-sycl, sm_35) -// CHK-PHASES: 67: input, "{{.*}}libdevice{{.*}}bc", object, (device-sycl, sm_35) +// CHK-PHASES: 66: input, "{{.*}}nvidiacl.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 67: input, "{{.*}}libdevice{{.*}}bc", ir, (device-sycl, sm_35) // CHK-PHASES: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_35) // CHK-PHASES: 69: sycl-post-link, {68}, ir, (device-sycl, sm_35) // CHK-PHASES: 70: file-table-tform, {69}, ir, (device-sycl, sm_35) From 9ce10d4aaa311513afa4b581a718cd4bbe152924 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 28 Jul 2022 22:53:04 +0100 Subject: [PATCH 30/36] Update check-clang dependencies --- clang/test/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index e8fb32cc362d5..b8e386868cda8 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -139,6 +139,10 @@ if( NOT CLANG_BUILT_STANDALONE ) if(TARGET llvm-lto) list(APPEND CLANG_TEST_DEPS llvm-lto) endif() + + if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND CLANG_TEST_DEPS libspirv-builtins libsycldevice) + endif() endif() if(CLANG_ENABLE_STATIC_ANALYZER) From 5d666dbd3ca8644e1e7487cdab89a9e44afe2b1f Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 1 Aug 2022 11:38:28 +0100 Subject: [PATCH 31/36] Update tests --- clang/test/CMakeLists.txt | 4 ---- ...-signed_char.libspirv-nvptx64--nvidiacl.bc | 0 ...-signed_char.libspirv-nvptx64--nvidiacl.bc | 0 clang/test/Driver/sycl-cuda-tu-offload.cu | 8 ++++---- clang/test/Driver/sycl-offload-nvptx.cpp | 20 +++++++++++-------- 5 files changed, 16 insertions(+), 16 deletions(-) create mode 100644 clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc create mode 100644 clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index b8e386868cda8..e8fb32cc362d5 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -139,10 +139,6 @@ if( NOT CLANG_BUILT_STANDALONE ) if(TARGET llvm-lto) list(APPEND CLANG_TEST_DEPS llvm-lto) endif() - - if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - list(APPEND CLANG_TEST_DEPS libspirv-builtins libsycldevice) - endif() endif() if(CLANG_ENABLE_STATIC_ANALYZER) diff --git a/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc b/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc b/clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-cuda-tu-offload.cu b/clang/test/Driver/sycl-cuda-tu-offload.cu index dd6414cc1a95d..8340d71144cdb 100644 --- a/clang/test/Driver/sycl-cuda-tu-offload.cu +++ b/clang/test/Driver/sycl-cuda-tu-offload.cu @@ -1,4 +1,4 @@ -// RUN: %clangxx -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 -c %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 -c %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES // Test the correct placement of the offloading actions for compiling CUDA sources (*.cu) in SYCL. @@ -19,7 +19,7 @@ // DEFAULT-PHASES:|- 14: assembler, {13}, object, (host-cuda-sycl) // DEFAULT-PHASES:15: clang-offload-bundler, {3, 14}, object, (host-cuda-sycl) -// RUN: %clangxx -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES2 +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda -fsycl-libspirv-path=%S/Inputs/SYCL/lib/nvidiacl -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 --cuda-gpu-arch=sm_80 %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-PHASES2 // DEFAULT-PHASES2: +- 0: input, "{{.*}}", cuda, (host-cuda) // DEFAULT-PHASES2: +- 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) @@ -93,8 +93,8 @@ // DEFAULT-PHASES2:| | +- 69: input, "{{.*}}", object // DEFAULT-PHASES2:| | +- 70: clang-offload-unbundler, {69}, object // DEFAULT-PHASES2:| |- 71: offload, " (nvptx64-nvidia-cuda)" {70}, object -// DEFAULT-PHASES2:| |- 72: input, "{{.*}}nvidiacl.bc", ir, (device-sycl, sm_80) -// DEFAULT-PHASES2:| |- 73: input, "{{.*}}libdevice.10.bc", ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 72: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_80) +// DEFAULT-PHASES2:| |- 73: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| +- 74: linker, {17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 72, 73}, ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| +- 75: sycl-post-link, {74}, ir, (device-sycl, sm_80) // DEFAULT-PHASES2:| | +- 76: file-table-tform, {75}, ir, (device-sycl, sm_80) diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index ba2fdeacc6e81..36ee0168225bf 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -34,8 +34,11 @@ // CHK-ACTIONS-WIN: clang-offload-wrapper"{{.*}} "-host=x86_64-pc-windows-msvc" "-target=nvptx64" "-kind=sycl"{{.*}} /// Check phases w/out specifying a compute capability. -// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ +// RUN: -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/lib/nvidiacl \ +// RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \ // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s // // TODO: Enable for clang_cl once device lib linking works for clang_cl @@ -106,8 +109,8 @@ // CHK-PHASES-NO-CC: 63: input, "{{.*}}libsycl-itt-stubs.o", object // CHK-PHASES-NO-CC: 64: clang-offload-unbundler, {63}, object // CHK-PHASES-NO-CC: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object -// CHK-PHASES-NO-CC: 66: input, "{{.*}}nvidiacl.bc", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 67: input, "{{.*}}libdevice{{.*}}bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 66: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 67: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 69: sycl-post-link, {68}, ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 70: file-table-tform, {69}, ir, (device-sycl, sm_50) @@ -121,8 +124,11 @@ // // /// Check phases specifying a compute capability. -// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ +// RUN: -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/lib/nvidiacl \ +// RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \ // RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES %s // @@ -194,8 +200,8 @@ // CHK-PHASES: 63: input, "{{.*}}libsycl-itt-stubs.o", object // CHK-PHASES: 64: clang-offload-unbundler, {63}, object // CHK-PHASES: 65: offload, " (nvptx64-nvidia-cuda)" {64}, object -// CHK-PHASES: 66: input, "{{.*}}nvidiacl.bc", ir, (device-sycl, sm_35) -// CHK-PHASES: 67: input, "{{.*}}libdevice{{.*}}bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 66: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 67: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) // CHK-PHASES: 68: linker, {11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 66, 67}, ir, (device-sycl, sm_35) // CHK-PHASES: 69: sycl-post-link, {68}, ir, (device-sycl, sm_35) // CHK-PHASES: 70: file-table-tform, {69}, ir, (device-sycl, sm_35) @@ -214,8 +220,6 @@ // CHK-PREPROC: 2: offload, "device-sycl (nvptx64-nvidia-cuda:sm_[[CUDA_VERSION]])" {1}, c++-cpp-output // CHK-PREPROC: 4: compiler, {1}, none, (device-sycl, sm_[[CUDA_VERSION]]) // -// -// // RUN: %clangxx -### -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/no/CUDA/path/here \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc %s 2>&1 \ From c03873b39db095438a5f4665e9ca08e3435c1337 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 1 Aug 2022 17:41:34 +0100 Subject: [PATCH 32/36] Add cuda path for non standard cuda location --- libdevice/cmake/modules/SYCLLibdevice.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 02b2dbfdf9875..bfdbb4ee26f14 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -37,6 +37,10 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) list(APPEND compile_opts "-fno-sycl-libspirv" "-fno-bundle-offload-arch") + if(CUDA_TOOLKIT_ROOT_DIR) + list(APPEND compile_opts + "--cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") + endif() endif() if (WIN32) From 42895e279ecbf20578e5e94c16255f4d9e7a300b Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 3 Aug 2022 10:10:15 +0100 Subject: [PATCH 33/36] Removing cuda dependency on libdevice .o files --- libdevice/cmake/modules/SYCLLibdevice.cmake | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index bfdbb4ee26f14..a653b54466027 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -36,11 +36,8 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") list(APPEND compile_opts "-fno-sycl-libspirv" - "-fno-bundle-offload-arch") - if(CUDA_TOOLKIT_ROOT_DIR) - list(APPEND compile_opts - "--cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") - endif() + "-fno-bundle-offload-arch" + "-nocudalib") endif() if (WIN32) From d7097cbabace8dd8dce51a178f643f32d4c1ba76 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 11 Aug 2022 09:01:08 +0100 Subject: [PATCH 34/36] Respond to comments --- clang/lib/Driver/Driver.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 1774b49832437..3abdc7c9ee425 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5195,10 +5195,9 @@ class OffloadingActionBuilder final { // AOT compilation. if (isSPIR || isNVPTX) { bool UseJitLink = - isSPIR ? Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, - options::OPT_fno_sycl_device_lib_jit_link, - false) - : false; + isSPIR && + Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, + options::OPT_fno_sycl_device_lib_jit_link, false); bool UseAOTLink = isSPIR && (isSpirvAOT || !UseJitLink); SYCLDeviceLibLinked = addSYCLDeviceLibs( TC, FullLinkObjects, UseAOTLink, From 84292e9caf1fec14fb9169c36b50d02aa79bc83b Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 17 Aug 2022 17:57:34 +0100 Subject: [PATCH 35/36] Change behaviour so that cuda TC only has mathErrnoDefault for OFK_SYCL --- clang/lib/Driver/ToolChains/Cuda.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 5360fe8c9b0f8..c546c4ecf39cd 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -184,6 +184,9 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain { void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override; + // math-errno should be the default for SYCL but not other OFK using CUDA TC + bool IsMathErrnoDefault() const override { return OK == Action::OFK_SYCL; } + void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; From 4174525f50d3df2d18f18bb911113f82c781fa1b Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 26 Aug 2022 14:52:36 +0100 Subject: [PATCH 36/36] Make sure -only-needed is used at llvm-link for SYCLDeviceLibs --- clang/lib/Driver/ToolChains/SYCL.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index d9f6057f9f2c5..bd124f184dc88 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -170,6 +170,13 @@ const char *SYCL::Linker::constructLLVMLinkCommand( LibPostfix = ".obj"; std::string FileName = this->getToolChain().getInputFilename(II); StringRef InputFilename = llvm::sys::path::filename(FileName); + if (this->getToolChain().getTriple().isNVPTX()) { + // Linking SYCL Device libs requires libclc as well as libdevice + if ((InputFilename.find("nvidiacl") != InputFilename.npos || + InputFilename.find("libdevice") != InputFilename.npos)) + return true; + LibPostfix = ".cubin"; + } StringRef LibSyclPrefix("libsycl-"); if (!InputFilename.startswith(LibSyclPrefix) || !InputFilename.endswith(LibPostfix) || (InputFilename.count('-') < 2))