From 57409e08a957496382a2d0b68d7e8eff06849c56 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 21 Jul 2025 18:36:39 -0500 Subject: [PATCH 1/5] [Clang] Rework creating offloading toolchains (#125556) Summary: This patch reworks how we create offloading toolchains. Previously we would handle this separately for all the different kinds. This patch instead changes this to use the target triple and the offloading kind to determine the proper toolchain. In the old case where the user only passes `--offload-arch` we instead infer the triple from the passed arguments. This is a pretty major overhaul but currently passes all the clang tests with only minor changes to error messages. --- .../clang/Basic/DiagnosticDriverKinds.td | 2 + clang/include/clang/Driver/Driver.h | 10 +- clang/include/clang/Driver/Options.td | 4 +- clang/include/clang/Driver/ToolChain.h | 4 - clang/lib/Driver/Driver.cpp | 632 ++++++++++-------- clang/lib/Driver/ToolChain.cpp | 38 -- clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 39 +- clang/lib/Driver/ToolChains/Cuda.cpp | 2 +- clang/test/Driver/amdgpu-hip-system-arch.c | 6 +- clang/test/Driver/cuda-phases.cu | 10 +- clang/test/Driver/hip-inputs.hip | 4 +- clang/test/Driver/hip-invalid-target-id.hip | 8 +- clang/test/Driver/hip-options.hip | 5 - clang/test/Driver/invalid-offload-options.cpp | 22 - clang/test/Driver/nvptx-cuda-system-arch.c | 6 +- clang/test/Driver/offload-target.c | 22 + clang/test/Driver/openmp-offload.c | 11 +- clang/test/Driver/openmp-system-arch.c | 8 +- 19 files changed, 406 insertions(+), 429 deletions(-) create mode 100644 clang/test/Driver/offload-target.c diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 55cae4dc5a73c..3f6b753235658 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -130,6 +130,8 @@ def warn_flag_no_sycl_libspirv InGroup; def err_drv_mix_cuda_hip : Error< "mixed CUDA and HIP compilation is not supported">; +def err_drv_mix_offload : Error< + "mixed %0 and %1 offloading compilation is not supported">; def err_drv_bad_target_id : Error< "invalid target ID '%0'; format is a processor name followed by an optional " "colon-delimited list of features followed by an enable/disable sign (e.g., " diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 553da82b77066..f7987bcaa6060 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -357,6 +357,9 @@ class Driver { phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL, llvm::opt::Arg **FinalPhaseArg = nullptr) const; + llvm::Expected> + executeProgram(llvm::ArrayRef Args) const; + private: /// Certain options suppress the 'no input files' warning. LLVM_PREFERRED_TYPE(bool) @@ -369,10 +372,6 @@ class Driver { /// stored in it, and will clean them up when torn down. mutable llvm::StringMap> ToolChains; - /// The associated offloading architectures with each toolchain. - llvm::DenseMap> - OffloadArchs; - private: /// TranslateInputArgs - Create a new derived argument list from the input /// arguments, after applying the standard argument translations. @@ -541,8 +540,7 @@ class Driver { /// empty string. llvm::SmallVector getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain = true) const; + Action::OffloadKind Kind, const ToolChain &TC) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bd85632c941d1..ddc24904f6139 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1236,7 +1236,7 @@ def offload_arch_EQ : CommaJoined<["--"], "offload-arch=">, "If 'native' is used the compiler will detect locally installed architectures. " "For HIP offloading, the device architecture can be followed by target ID features " "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.">; -def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">, +def no_offload_arch_EQ : CommaJoined<["--"], "no-offload-arch=">, Visibility<[ClangOption, FlangOption]>, HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. " "'all' resets the list to its default value.">; @@ -1717,7 +1717,7 @@ defm auto_import : BoolFOption<"auto-import", // In the future this option will be supported by other offloading // languages and accept other values such as CPU/GPU architectures, // offload kinds and target aliases. -def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, +def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, Alias, HelpText<"Specify comma-separated list of offloading target triples (CUDA and HIP only)">; // C++ Coroutines diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index a004f057846fa..58552a629f250 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -218,10 +218,6 @@ class ToolChain { ToolChain(const Driver &D, const llvm::Triple &T, const llvm::opt::ArgList &Args); - /// Executes the given \p Executable and returns the stdout. - llvm::Expected> - executeToolChainProgram(StringRef Executable) const; - void setTripleEnvironment(llvm::Triple::EnvironmentType Env); virtual Tool *buildAssembler() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8520271aba65b..d4764c69a3859 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -112,65 +112,6 @@ using namespace clang::driver; using namespace clang; using namespace llvm::opt; -static std::optional getOffloadTargetTriple(const Driver &D, - const ArgList &Args) { - auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); - // Offload compilation flow does not support multiple targets for now. We - // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) - // to support multiple tool chains first. - switch (OffloadTargets.size()) { - default: - D.Diag(diag::err_drv_only_one_offload_target_supported); - return std::nullopt; - case 0: - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return std::nullopt; - case 1: - break; - } - return llvm::Triple(OffloadTargets[0]); -} - -static std::optional -getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, - const llvm::Triple &HostTriple) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); - } - auto TT = getOffloadTargetTriple(D, Args); - if (TT && (TT->getArch() == llvm::Triple::spirv32 || - TT->getArch() == llvm::Triple::spirv64)) { - if (Args.hasArg(options::OPT_emit_llvm)) - return TT; - D.Diag(diag::err_drv_cuda_offload_only_emit_bc); - return std::nullopt; - } - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - -static std::optional -getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ); - if (llvm::is_contained(OffloadArchs, "amdgcnspirv") && - OffloadArchs.size() == 1) - return llvm::Triple("spirv64-amd-amdhsa"); - return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. - } - auto TT = getOffloadTargetTriple(D, Args); - if (!TT) - return std::nullopt; - if (TT->isAMDGCN() && TT->getVendor() == llvm::Triple::AMD && - TT->getOS() == llvm::Triple::AMDHSA) - return TT; - if (TT->getArch() == llvm::Triple::spirv64) - return TT; - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - template static bool usesInput(const ArgList &Args, F &&Fn) { return llvm::any_of(Args, [&](Arg *A) { return (A->getOption().matches(options::OPT_x) && @@ -495,6 +436,44 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, return FinalPhase; } +llvm::Expected> +Driver::executeProgram(llvm::ArrayRef Args) const { + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("driver-program", "txt", OutputFile, + llvm::sys::fs::OF_Text); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + std::optional Redirects[] = { + {""}, + OutputFile.str(), + {""}, + }; + + std::string ErrorMessage; + int SecondsToWait = 60; + if (std::optional Str = + llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { + if (!llvm::to_integer(*Str, SecondsToWait)) + return llvm::createStringError(std::error_code(), + "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " + "an integer, got '" + + *Str + "'"); + SecondsToWait = std::max(SecondsToWait, 0); // infinite + } + StringRef Executable = Args[0]; + if (llvm::sys::ExecuteAndWait(Executable, Args, {}, Redirects, SecondsToWait, + /*MemoryLimit=*/0, &ErrorMessage)) + return llvm::createStringError(std::error_code(), + Executable + ": " + ErrorMessage); + + llvm::ErrorOr> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return llvm::createStringError(OutputBuf.getError(), + "Failed to read stdout of " + Executable + + ": " + OutputBuf.getError().message()); + return std::move(*OutputBuf); +} + static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts, StringRef Value, bool Claim = true) { Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value, @@ -1059,9 +1038,13 @@ static bool addSYCLDefaultTriple(Compilation &C, return true; } -void Driver::CreateOffloadingDeviceToolChains(Compilation &C, - InputList &Inputs) { +// Handles `native` offload architectures by using the 'offload-arch' utility. +static llvm::SmallVector +getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) { + StringRef Program = C.getArgs().getLastArgValue( + options::OPT_offload_arch_tool_EQ, "offload-arch"); +<<<<<<< HEAD // // CUDA/HIP // @@ -1090,154 +1073,181 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); if (!CudaTriple) return; +======= + SmallVector GPUArchs; + if (llvm::ErrorOr Executable = + llvm::sys::findProgramByName(Program)) { + llvm::SmallVector Args{*Executable}; + if (Kind == Action::OFK_HIP) + Args.push_back("--only=amdgpu"); + else if (Kind == Action::OFK_Cuda) + Args.push_back("--only=nvptx"); + auto StdoutOrErr = C.getDriver().executeProgram(Args); +>>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) + + if (!StdoutOrErr) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << StdoutOrErr.takeError() + << "--offload-arch"; + return GPUArchs; + } else if ((*StdoutOrErr)->getBuffer().empty()) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << "No GPU detected in the system" + << "--offload-arch"; + return GPUArchs; + } + + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + } else { + C.getDriver().Diag(diag::err_drv_command_failure) << "offload-arch"; + } + return GPUArchs; +} - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple, - C.getDefaultToolChain().getTriple()); - - // Emit a warning if the detected CUDA version is too new. - const CudaInstallationDetector &CudaInstallation = - static_cast(TC).CudaInstallation; - if (CudaInstallation.isValid()) - CudaInstallation.WarnIfUnsupportedVersion(); - C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC, - /*SpecificToolchain=*/true); - } else if (IsHIP && !UseLLVMOffload) { - if (auto *OMPTargetArg = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_unsupported_opt_for_language_mode) - << OMPTargetArg->getSpelling() << "HIP"; - return; +// Attempts to infer the correct offloading toolchain triple by looking at the +// requested offloading kind and architectures. +static llvm::DenseSet +inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { + std::set Archs; + for (Arg *A : C.getInputArgs()) { + for (StringRef Arch : A->getValues()) { + if (A->getOption().matches(options::OPT_offload_arch_EQ)) { + if (Arch == "native") { + for (StringRef Str : getSystemOffloadArchs(C, Kind)) + Archs.insert(Str.str()); + } else { + Archs.insert(Arch.str()); + } + } else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) { + if (Arch == "all") + Archs.clear(); + else + Archs.erase(Arch.str()); + } } + } - auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - if (!HIPTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP); + llvm::DenseSet Triples; + for (llvm::StringRef Arch : Archs) { + OffloadArch ID = StringToOffloadArch(Arch); + if (ID == OffloadArch::UNKNOWN) + ID = StringToOffloadArch( + getProcessorFromTargetID(llvm::Triple("amdgcn-amd-amdhsa"), Arch)); - // TODO: Fix 'amdgcnspirv' handling with the new driver. - if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC, - /*SpecificToolchain=*/true); - } + if (Kind == Action::OFK_HIP && !IsAMDOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << Arch; + return llvm::DenseSet(); + } + if (Kind == Action::OFK_Cuda && !IsNVIDIAOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << Arch; + return llvm::DenseSet(); + } + if (Kind == Action::OFK_OpenMP && + (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED)) { + C.getDriver().Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) + << Arch; + return llvm::DenseSet(); + } + if (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "offload" << Arch; + return llvm::DenseSet(); + } + + StringRef Triple; + if (ID == OffloadArch::AMDGCNSPIRV) + Triple = "spirv64-amd-amdhsa"; + else if (IsNVIDIAOffloadArch(ID)) + Triple = C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"; + else if (IsAMDOffloadArch(ID)) + Triple = "amdgcn-amd-amdhsa"; + else + continue; - if (IsCuda || IsHIP) - CUIDOpts = CUIDOptions(C.getArgs(), *this); + // Make a new argument that dispatches this argument to the appropriate + // toolchain. This is required when we infer it and create potentially + // incompatible toolchains from the global option. + Option Opt = C.getDriver().getOpts().getOption(options::OPT_Xarch__); + unsigned Index = C.getArgs().getBaseArgs().MakeIndex("-Xarch_"); + Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index, + C.getArgs().MakeArgString(Triple.split("-").first), + C.getArgs().MakeArgString("--offload-arch=" + Arch)); + C.getArgs().append(A); + Triples.insert(Triple); + } + + // Infer the default target triple if no specific architectures are given. + if (Archs.empty() && Kind == Action::OFK_HIP) + Triples.insert("amdgcn-amd-amdhsa"); + else if (Archs.empty() && Kind == Action::OFK_Cuda) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + else if (Archs.empty() && Kind == Action::OFK_SYCL) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "spirv64-unknown-unknown" + : "spirv32-unknown-unknown"); + + // We need to dispatch these to the appropriate toolchain now. + C.getArgs().eraseArg(options::OPT_offload_arch_EQ); + C.getArgs().eraseArg(options::OPT_no_offload_arch_EQ); + + return Triples; +} - // - // OpenMP - // - // We need to generate an OpenMP toolchain if the user specified targets with - // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. +void Driver::CreateOffloadingDeviceToolChains(Compilation &C, + InputList &Inputs) { + bool UseLLVMOffload = C.getInputArgs().hasArg( + options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); + bool IsCuda = + llvm::any_of(Inputs, + [](std::pair &I) { + return types::isCuda(I.first); + }) && + !UseLLVMOffload; + bool IsHIP = + (llvm::any_of(Inputs, + [](std::pair &I) { + return types::isHIP(I.first); + }) || + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar)) && + !UseLLVMOffload; + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false); bool IsOpenMPOffloading = - ((IsCuda || IsHIP) && UseLLVMOffload) || + UseLLVMOffload || (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false) && (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); - if (IsOpenMPOffloading) { - // We expect that -fopenmp-targets is always used in conjunction with the - // option -fopenmp specifying a valid runtime with offloading support, i.e. - // libomp or libiomp. - OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); - if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; - } - - // If the user specified -fopenmp-targets= we create a toolchain for each - // valid triple. Otherwise, if only --offload-arch= was specified we instead - // attempt to derive the appropriate toolchains from the arguments. - if (Arg *OpenMPTargets = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - if (OpenMPTargets && !OpenMPTargets->getNumValues()) { - Diag(clang::diag::warn_drv_empty_joined_argument) - << OpenMPTargets->getAsString(C.getInputArgs()); - return; - } - - // Make sure these show up in a deterministic order. - std::multiset OpenMPTriples; - for (StringRef T : OpenMPTargets->getValues()) - OpenMPTriples.insert(T); - - llvm::StringMap FoundNormalizedTriples; - for (StringRef T : OpenMPTriples) { - llvm::Triple TT(ToolChain::getOpenMPTriple(T)); - std::string NormalizedName = TT.normalize(); - - // Make sure we don't have a duplicate triple. - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, T); - if (!Inserted) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << T << TripleIt->second; - continue; - } - - // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) { - Diag(clang::diag::err_drv_invalid_omp_target) << T; - continue; - } - - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/true); - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - ((!IsHIP && !IsCuda) || UseLLVMOffload)) { - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsNVIDIAOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsAMDOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && - !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; - return; - } - } - - // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - - llvm::SmallVector Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = Archs; - } - } - - // If the set is empty then we failed to find a native architecture. - auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP); - if (TCRange.first == TCRange.second) - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) - << "native"; - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && + !(IsCuda || IsHIP))); + + llvm::DenseSet Kinds; + const std::pair ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMPOffloading, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); + + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()).upper() + << Action::GetOffloadKindName(*(++Kinds.begin())).upper(); return; } +<<<<<<< HEAD // // SYCL // @@ -1249,19 +1259,23 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, options::OPT_fno_sycl, false) || C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, options::OPT_fsyclbin_EQ); - - auto argSYCLIncompatible = [&](OptSpecifier OptId) { - if (!IsSYCL) - return; - if (Arg *IncompatArg = C.getInputArgs().getLastArg(OptId)) - Diag(clang::diag::err_drv_argument_not_allowed_with) - << IncompatArg->getSpelling() << "-fsycl"; - }; - // -static-libstdc++ is not compatible with -fsycl. - argSYCLIncompatible(options::OPT_static_libstdcxx); - // -ffreestanding cannot be used with -fsycl - argSYCLIncompatible(options::OPT_ffreestanding); - +======= + // Initialize the compilation identifier used for unique CUDA / HIP names. + if (IsCuda || IsHIP) + CUIDOpts = CUIDOptions(C.getArgs(), *this); +>>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) + + // Get the list of requested offloading toolchains. If they were not + // explicitly specified we will infer them based on the offloading language + // and requested architectures. + std::multiset Triples; + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { + std::vector ArgValues = + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); + for (llvm::StringRef Target : ArgValues) + Triples.insert(C.getInputArgs().MakeArgString(Target)); + +<<<<<<< HEAD llvm::SmallVector UniqueSYCLTriplesVec; // A mechanism for retrieving SYCL-specific options, erroring out @@ -1537,12 +1551,74 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, /*SpecificToolchain=*/true); +======= + if (ArgValues.empty()) + Diag(clang::diag::warn_drv_empty_joined_argument) + << C.getInputArgs() + .getLastArg(options::OPT_offload_targets_EQ) + ->getAsString(C.getInputArgs()); + } else if (Kinds.size() > 0) { + for (Action::OffloadKind Kind : Kinds) { + llvm::DenseSet Derived = inferOffloadToolchains(C, Kind); + Triples.insert(Derived.begin(), Derived.end()); +>>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) } } - // - // TODO: Add support for other offloading programming models here. - // + // Build an offloading toolchain for every requested target and kind. + llvm::StringMap FoundNormalizedTriples; + for (StringRef Target : Triples) { + // OpenMP offloading requires a compatible libomp. + if (Kinds.contains(Action::OFK_OpenMP)) { + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + return; + } + } + + // Certain options are not allowed when combined with SYCL compilation. + if (Kinds.contains(Action::OFK_SYCL)) { + for (auto ID : + {options::OPT_static_libstdcxx, options::OPT_ffreestanding}) + if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID)) + Diag(clang::diag::err_drv_argument_not_allowed_with) + << IncompatArg->getSpelling() << "-fsycl"; + } + + // Create a device toolchain for every specified kind and triple. + for (Action::OffloadKind Kind : Kinds) { + llvm::Triple TT = Kind == Action::OFK_OpenMP + ? ToolChain::getOpenMPTriple(Target) + : llvm::Triple(Target); + if (TT.getArch() == llvm::Triple::ArchType::UnknownArch) { + Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT.str(); + continue; + } + + std::string NormalizedName = TT.normalize(); + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Target); + if (!Inserted) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << Target << TripleIt->second; + continue; + } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + if (Kind == Action::OFK_Cuda) { + auto &CudaInstallation = + static_cast(TC).CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + } + + C.addOffloadDeviceToolChain(&TC, Kind); + } + } } bool Driver::loadZOSCustomizationFile(llvm::cl::ExpansionContext &ExpCtx) { @@ -4182,9 +4258,6 @@ class OffloadingActionBuilder final { // architecture. If we are in host-only mode we return 'success' so that // the host uses the CUDA offload kind. if (auto *IA = dyn_cast(HostAction)) { - assert(!GpuArchList.empty() && - "We should have at least one GPU architecture."); - // If the host input is not CUDA or HIP, we don't need to bother about // this input. if (!(IA->getType() == types::TY_CUDA || @@ -4284,10 +4357,6 @@ class OffloadingActionBuilder final { CudaDeviceActions.clear(); } - /// Get canonicalized offload arch option. \returns empty StringRef if the - /// option is invalid. - virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; - virtual std::optional> getConflictOffloadArchCombination(const std::set &GpuArchs) = 0; @@ -4316,15 +4385,25 @@ class OffloadingActionBuilder final { return true; } - ToolChains.push_back( - AssociatedOffloadKind == Action::OFK_Cuda - ? C.getSingleOffloadToolChain() - : C.getSingleOffloadToolChain()); + std::set GpuArchs; + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { + for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { + ToolChains.push_back(I.second); + + for (auto Arch : + C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second)) + GpuArchs.insert(Arch); + } + } + + for (auto Arch : GpuArchs) + GpuArchList.push_back(Arch.data()); CompileHostOnly = C.getDriver().offloadHostOnly(); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); +<<<<<<< HEAD // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, @@ -4401,6 +4480,9 @@ class OffloadingActionBuilder final { } return Error; +======= + return false; +>>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) } }; @@ -4415,15 +4497,6 @@ class OffloadingActionBuilder final { DefaultOffloadArch = OffloadArch::CudaDefault; } - StringRef getCanonicalOffloadArch(StringRef ArchStr) override { - OffloadArch Arch = StringToOffloadArch(ArchStr); - if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; - return StringRef(); - } - return OffloadArchToString(Arch); - } - std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { @@ -4600,24 +4673,6 @@ class OffloadingActionBuilder final { bool canUseBundlerUnbundler() const override { return true; } - StringRef getCanonicalOffloadArch(StringRef IdStr) override { - llvm::StringMap Features; - // getHIPOffloadTargetTriple() is known to return valid value as it has - // been called successfully in the CreateOffloadingDeviceToolChains(). - auto T = - (IdStr == "amdgcnspirv") - ? llvm::Triple("spirv64-amd-amdhsa") - : *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); - auto ArchStr = parseTargetID(T, IdStr, &Features); - if (!ArchStr) { - C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; - C.setContainsError(); - return StringRef(); - } - auto CanId = getCanonicalTargetID(*ArchStr, Features); - return Args.MakeArgStringRef(CanId); - }; - std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { @@ -7470,23 +7525,20 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple, - bool SpecificToolchain) { + const llvm::Triple &Triple) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (Triple.isNVPTX() && (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "CUDA" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << ArchStr; return StringRef(); } else if (Triple.isAMDGPU() && (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "HIP" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << ArchStr; return StringRef(); } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && @@ -7542,11 +7594,7 @@ getConflictOffloadArchCombination(const llvm::DenseSet &Archs, llvm::SmallVector Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain) const { - if (!TC) - TC = &C.getDefaultToolChain(); - + Action::OffloadKind Kind, const ToolChain &TC) const { // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, @@ -7559,6 +7607,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; +<<<<<<< HEAD StringRef Arch; for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. @@ -7669,47 +7718,46 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } } +======= + for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { +>>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : Arg->getValues()) { if (Arch == "native" || Arch.empty()) { - auto GPUsOrErr = TC->getSystemGPUArchs(Args); + auto GPUsOrErr = TC.getSystemGPUArchs(Args); if (!GPUsOrErr) { - if (!SpecificToolchain) - llvm::consumeError(GPUsOrErr.takeError()); - else - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } for (auto ArchStr : *GPUsOrErr) { - StringRef CanonicalStr = - getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr), - TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = getCanonicalArchString( + C, Args, Args.MakeArgString(ArchStr), TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector(); } } else { - StringRef CanonicalStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector(); } } } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { - for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { + for (StringRef Arch : Arg->getValues()) { if (Arch == "all") { Archs.clear(); } else { - StringRef ArchStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef ArchStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); Archs.erase(ArchStr); } } @@ -7717,26 +7765,34 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } if (auto ConflictingArchs = - getConflictOffloadArchCombination(Archs, TC->getTriple())) + getConflictOffloadArchCombination(Archs, TC.getTriple())) C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) << ConflictingArchs->first << ConflictingArchs->second; - // Skip filling defaults if we're just querying what is availible. - if (SpecificToolchain && Archs.empty()) { + // Fill in the default architectures if not provided explicitly. + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) { Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); } else if (Kind == Action::OFK_HIP) { +<<<<<<< HEAD Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); +======= + Archs.insert(OffloadArchToString(TC.getTriple().isSPIRV() + ? OffloadArch::Generic + : OffloadArch::HIPDefault)); + } else if (Kind == Action::OFK_SYCL) { + Archs.insert(StringRef()); +>>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. - if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) + if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) .getLastArg(options::OPT_march_EQ)) { Archs.insert(Arg->getValue()); } else { - auto ArchsOrErr = TC->getSystemGPUArchs(Args); + auto ArchsOrErr = TC.getSystemGPUArchs(Args); if (!ArchsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) << llvm::toString(ArchsOrErr.takeError()) << "--offload-arch"; } else if (!ArchsOrErr->empty()) { for (auto Arch : *ArchsOrErr) @@ -7835,7 +7891,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Get the product of all bound architectures and toolchains. SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) { - for (StringRef Arch : OffloadArchs.lookup(TC)) { + for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) { TCAndArchs.push_back(std::make_pair(TC, Arch)); DeviceActions.push_back( C.MakeAction(*InputArg, InputType, CUID)); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index be3c6fcfb829d..e6addc933fbb3 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -105,44 +105,6 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, addIfExists(getFilePaths(), Path); } -llvm::Expected> -ToolChain::executeToolChainProgram(StringRef Executable) const { - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile, - llvm::sys::fs::OF_Text); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - std::optional Redirects[] = { - {""}, - OutputFile.str(), - {""}, - }; - - std::string ErrorMessage; - int SecondsToWait = 60; - if (std::optional Str = - llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { - if (!llvm::to_integer(*Str, SecondsToWait)) - return llvm::createStringError(std::error_code(), - "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " - "an integer, got '" + - *Str + "'"); - SecondsToWait = std::max(SecondsToWait, 0); // infinite - } - if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects, - SecondsToWait, - /*MemoryLimit=*/0, &ErrorMessage)) - return llvm::createStringError(std::error_code(), - Executable + ": " + ErrorMessage); - - llvm::ErrorOr> OutputBuf = - llvm::MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) - return llvm::createStringError(OutputBuf.getError(), - "Failed to read stdout of " + Executable + - ": " + OutputBuf.getError().message()); - return std::move(*OutputBuf); -} - void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) { Triple.setEnvironment(Env); if (EffectiveTriple != llvm::Triple()) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index e0c2446d13124..296247cc4d125 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -841,7 +841,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 785162203dcc5..7b006eff85988 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -105,32 +105,15 @@ forAllAssociatedToolChains(Compilation &C, const JobAction &JA, // Apply Work on all the offloading tool chains associated with the current // action. - if (JA.isHostOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isHostOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain()); - - if (JA.isHostOffloading(Action::OFK_OpenMP)) { - auto TCs = C.getOffloadToolChains(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_OpenMP)) - Work(*C.getSingleOffloadToolChain()); - - if (JA.isHostOffloading(Action::OFK_SYCL)) { - auto TCs = C.getOffloadToolChains(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_SYCL)) - Work(*C.getSingleOffloadToolChain()); - - // - // TODO: Add support for other offloading programming models here. - // + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP, + Action::OFK_HIP, Action::OFK_SYCL}) { + if (JA.isHostOffloading(Kind)) { + auto TCs = C.getOffloadToolChains(Kind); + for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) + Work(*II->second); + } else if (JA.isDeviceOffloading(Kind)) + Work(*C.getSingleOffloadToolChain()); + } } static bool @@ -5430,8 +5413,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else { // Host-side compilation. NormalizedTriple = - (IsCuda ? C.getSingleOffloadToolChain() - : C.getSingleOffloadToolChain()) + (IsCuda ? C.getOffloadToolChains(Action::OFK_Cuda).first->second + : C.getOffloadToolChains(Action::OFK_HIP).first->second) ->getTriple() .normalize(); if (IsCuda) { diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index f1039b9775832..fd4c59539d3f4 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -903,7 +903,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c index 9c27bc09fb36c..12e298a8636b1 100644 --- a/clang/test/Driver/amdgpu-hip-system-arch.c +++ b/clang/test/Driver/amdgpu-hip-system-arch.c @@ -14,14 +14,14 @@ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '--offload-arch' +// NO-OUTPUT-ERROR: error: cannot determine hip architecture{{.*}}; consider passing it via '--offload-arch' // case when amdgpu-arch does not return anything with successful execution // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch' +// EMPTY-OUTPUT: error: cannot determine hip architecture: No GPU detected in the system; consider passing it via '--offload-arch' // case when amdgpu-arch returns a gfx906 GPU. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \ @@ -36,4 +36,4 @@ // RUN: --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \ // RUN: -x hip %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD-TIMEOUT -// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) +// BAD-TIMEOUT: clang: error: cannot determine hip architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 8b91a1d5a7fcf..220a320e32705 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -324,8 +324,8 @@ // RUN: -ccc-print-phases --offload-arch=sm_999 -fgpu-rdc -c %s 2>&1 \ // RUN: | FileCheck -check-prefix=INVALID-ARCH %s // INVALID-ARCH: error: unsupported CUDA gpu architecture: sm_999 -// INVALID-ARCH-NEXT: 0: input, "[[INPUT:.+]]", cuda, (host-cuda) -// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// INVALID-ARCH-NEXT: 2: compiler, {1}, ir, (host-cuda) -// INVALID-ARCH-NEXT: 3: backend, {2}, assembler, (host-cuda) -// INVALID-ARCH-NEXT: 4: assembler, {3}, object, (host-cuda) +// INVALID-ARCH: 0: input, "[[INPUT:.+]]", cuda +// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output +// INVALID-ARCH-NEXT: 2: compiler, {1}, ir +// INVALID-ARCH-NEXT: 3: backend, {2}, assembler +// INVALID-ARCH-NEXT: 4: assembler, {3}, object diff --git a/clang/test/Driver/hip-inputs.hip b/clang/test/Driver/hip-inputs.hip index 2d4cc3103c5ec..a8e25ad8ed198 100644 --- a/clang/test/Driver/hip-inputs.hip +++ b/clang/test/Driver/hip-inputs.hip @@ -15,5 +15,5 @@ // RUN: --hip-link %S/Inputs/hip_multiple_inputs/a.cu 2>&1 \ // RUN: | FileCheck -check-prefix=MIX %s -// CHECK-NOT: error: mixed CUDA and HIP compilation is not supported -// MIX: error: mixed CUDA and HIP compilation is not supported +// CHECK-NOT: error: mixed CUDA and HIP offloading compilation is not supported +// MIX: error: mixed CUDA and HIP offloading compilation is not supported diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip index 555043facb2a3..ad942e476617e 100644 --- a/clang/test/Driver/hip-invalid-target-id.hip +++ b/clang/test/Driver/hip-invalid-target-id.hip @@ -4,7 +4,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s -// NOPLUS: error: invalid target ID 'gfx908xnack' +// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx900 \ @@ -22,7 +22,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s -// UNK: error: invalid target ID 'gfx908:unknown+' +// UNK: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -31,7 +31,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s -// MIXED: error: invalid target ID 'gfx908:sramecc+:unknown+' +// MIXED: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -55,7 +55,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s -// NOCOLON: error: invalid target ID 'gfx900+xnack' +// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index 85e85f01ace54..18b039fb0d191 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -115,11 +115,6 @@ // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp" // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp" -// RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --offload-arch=gfx906 -fopenmp=libomp -fopenmp-targets=amdgcn %s 2>&1 \ -// RUN: | FileCheck -check-prefix=OMPTGT %s -// OMPTGT: unsupported option '--offload-targets=' for language mode 'HIP' - // Check -Xoffload-linker option is passed to lld. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp index 48d5310538a3c..6048a3ca82e77 100644 --- a/clang/test/Driver/invalid-offload-options.cpp +++ b/clang/test/Driver/invalid-offload-options.cpp @@ -1,29 +1,7 @@ // UNSUPPORTED: system-windows -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload= \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s // RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo \ // RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ // RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s // INVALID-TARGET: error: invalid or unsupported offload target: '{{.*}}' - -// In the future we should be able to specify multiple targets for HIP -// compilation but currently it is not supported. -// -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo,bar \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu \ -// RUN: --offload=foo --offload=bar \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s - -// TOO-MANY-TARGETS: error: only one offload target is supported - -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu -nogpuinc -nogpulib \ -// RUN: --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \ -// RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s - -// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload' diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index c54eeac73f73b..2d4eca8c43bc3 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -16,14 +16,14 @@ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine nvptx64 architecture{{.*}}; consider passing it via '--offload-arch' +// NO-OUTPUT-ERROR: error: cannot determine cuda architecture{{.*}}; consider passing it via '--offload-arch' // case when nvptx-arch does not return anything with successful execution // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch' +// EMPTY-OUTPUT: error: cannot determine cuda architecture: No GPU detected in the system; consider passing it via '--offload-arch' // case when nvptx-arch does not return anything with successful execution // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \ @@ -49,4 +49,4 @@ // RUN: --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD-TIMEOUT -// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) +// BAD-TIMEOUT: clang: error: cannot determine cuda architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/offload-target.c b/clang/test/Driver/offload-target.c new file mode 100644 index 0000000000000..123ecd3eb830e --- /dev/null +++ b/clang/test/Driver/offload-target.c @@ -0,0 +1,22 @@ +// RUN: %clang -### -fsycl --offload-targets=spirv64 -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL +// SYCL: "spirv64" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIP +// HIP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc -x cuda %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CUDA +// CUDA: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa,nvptx64-nvidia-cuda -fopenmp \ +// RUN: -Xarch_amdgcn --offload-arch=gfx90a -Xarch_nvptx64 --offload-arch=sm_89 \ +// RUN: -nogpulib -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=OPENMP +// OPENMP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" +// OPENMP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=spirv64-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIPSPIRV +// HIPSPIRV: "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c index 516c74b190885..d4016e98e6666 100644 --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -7,7 +7,7 @@ /// Check whether an invalid OpenMP target is specified: // RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s -// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' +// CHK-INVALID-TARGET: error: invalid or unsupported offload target: 'aaa-bbb-ccc-ddd' /// ########################################################################### @@ -18,15 +18,6 @@ /// ########################################################################### -/// Check error for no -fopenmp option -// RUN: not %clang -### -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s -// RUN: not %clang -### -fopenmp=libgomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s -// CHK-NO-FOPENMP: error: '-fopenmp-targets' must be used in conjunction with a '-fopenmp' option compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5' - -/// ########################################################################### - /// Check warning for duplicate offloading targets. // RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c index b18ecf3ec474b..167b07a23f512 100644 --- a/clang/test/Driver/openmp-system-arch.c +++ b/clang/test/Driver/openmp-system-arch.c @@ -24,13 +24,7 @@ // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \ // RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \ -// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \ -// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead +// NO-OUTPUT-ERROR: error: cannot determine openmp architecture // case when amdgpu-arch succeeds. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \ From 22cb230273e580216a38912826be45cad0a8d846 Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Wed, 30 Jul 2025 10:33:57 -0700 Subject: [PATCH 2/5] [WIP][Driver][SYCL] Refactor SYCL offloading behavior This re-lands this change: https://github.com/llvm/llvm-project/commit/a7d93653a6712d8a374a2776853057b03181c12a This was a significant update to how the offloading model is initialized for creating the toolchains that are used. Here, update the SYCL offloading model that is heavily dependent on unique usage given -fsycl-targets to use this new methodology. - Update -fsycl-targets as an alias to --offload-targets - Add needed parsing to create new triples based on non-triple based targets (intel_gpu, etc) --- clang/include/clang/Driver/Options.td | 3 +- clang/lib/Driver/Driver.cpp | 765 +++++++----------- clang/lib/Driver/ToolChain.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/lib/Driver/ToolChains/SYCL.cpp | 11 +- clang/test/Driver/openmp-offload-infer.c | 4 +- .../Driver/sycl-offload-arch-intel-cpu.cpp | 9 - clang/test/Driver/sycl-offload-new-driver.c | 2 +- .../sycl-offload-static-lib-2-old-model.cpp | 56 +- .../lib/rtc/DeviceCompilation.cpp | 2 +- 10 files changed, 316 insertions(+), 540 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ddc24904f6139..c26b48b2f1311 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7233,7 +7233,8 @@ def fno_sycl_esimd_build_host_code : Flag<["-"], "fno-sycl-esimd-build-host-cod Visibility<[ClangOption, CLOption, CC1Option]>, Flags<[HelpHidden]>, HelpText<"Do not build the host implementation of ESIMD functions.">; def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, - Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>, + Flags<[NoXarchOption]>, Alias, + Visibility<[ClangOption, CLOption, CC1Option]>, HelpText<"Specify comma-separated list of triples SYCL offloading targets " "to be supported">; def fsycl_force_target_EQ : Joined<["-"], "fsycl-force-target=">, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index d4764c69a3859..ab281db84fcb2 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -995,8 +995,11 @@ llvm::Triple Driver::getSYCLDeviceTriple(StringRef TargetArch, llvm::Triple TargetTriple(TargetArch); if (Arg && !Arg->isClaimed() && TargetTriple.isSPIR() && TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) { - Diag(diag::err_drv_unsupported_opt_removed) - << Arg->getSpelling().str() + TargetArch.str(); + SmallString<128> OptStr(Arg->getSpelling()); + if (Arg->getOption().matches(options::OPT_offload_targets_EQ)) + OptStr = "-fsycl-targets="; + OptStr += TargetArch.str(); + Diag(diag::err_drv_unsupported_opt_removed) << OptStr; Arg->claim(); } if (llvm::is_contained(SYCLAlias, TargetArch)) { @@ -1044,36 +1047,6 @@ getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) { StringRef Program = C.getArgs().getLastArgValue( options::OPT_offload_arch_tool_EQ, "offload-arch"); -<<<<<<< HEAD - // - // CUDA/HIP - // - // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA - // or HIP type. However, mixed CUDA/HIP compilation is not supported. - using namespace tools::SYCL; - bool IsCuda = - llvm::any_of(Inputs, [](std::pair &I) { - return types::isCuda(I.first); - }); - bool IsHIP = - llvm::any_of(Inputs, - [](std::pair &I) { - return types::isHIP(I.first); - }) || - C.getInputArgs().hasArg(options::OPT_hip_link) || - C.getInputArgs().hasArg(options::OPT_hipstdpar); - bool UseLLVMOffload = C.getInputArgs().hasArg( - options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); - if (IsCuda && IsHIP) { - Diag(clang::diag::err_drv_mix_cuda_hip); - return; - } - if (IsCuda && !UseLLVMOffload) { - auto CudaTriple = getNVIDIAOffloadTargetTriple( - *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); - if (!CudaTriple) - return; -======= SmallVector GPUArchs; if (llvm::ErrorOr Executable = llvm::sys::findProgramByName(Program)) { @@ -1083,7 +1056,6 @@ getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) { else if (Kind == Action::OFK_Cuda) Args.push_back("--only=nvptx"); auto StdoutOrErr = C.getDriver().executeProgram(Args); ->>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) if (!StdoutOrErr) { C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) @@ -1152,6 +1124,12 @@ inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { << Arch; return llvm::DenseSet(); } + if (Kind == Action::OFK_SYCL && !IsIntelGPUOffloadArch(ID) && + !IsIntelCPUOffloadArch(ID) && !IsAMDOffloadArch(ID) && + !IsNVIDIAOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + return llvm::DenseSet(); + } if (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "offload" << Arch; @@ -1167,6 +1145,10 @@ inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { : "nvptx-nvidia-cuda"; else if (IsAMDOffloadArch(ID)) Triple = "amdgcn-amd-amdhsa"; + else if (IsIntelGPUOffloadArch(ID)) + Triple = "spir64_gen-unknown-unknown"; + else if (IsIntelCPUOffloadArch(ID)) + Triple = "spir64_x86_64-unknown-unknown"; else continue; @@ -1191,8 +1173,8 @@ inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { : "nvptx-nvidia-cuda"); else if (Archs.empty() && Kind == Action::OFK_SYCL) Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() - ? "spirv64-unknown-unknown" - : "spirv32-unknown-unknown"); + ? "spir64-unknown-unknown" + : "spir-unknown-unknown"); // We need to dispatch these to the appropriate toolchain now. C.getArgs().eraseArg(options::OPT_offload_arch_EQ); @@ -1201,89 +1183,11 @@ inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { return Triples; } -void Driver::CreateOffloadingDeviceToolChains(Compilation &C, - InputList &Inputs) { - bool UseLLVMOffload = C.getInputArgs().hasArg( - options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); - bool IsCuda = - llvm::any_of(Inputs, - [](std::pair &I) { - return types::isCuda(I.first); - }) && - !UseLLVMOffload; - bool IsHIP = - (llvm::any_of(Inputs, - [](std::pair &I) { - return types::isHIP(I.first); - }) || - C.getInputArgs().hasArg(options::OPT_hip_link) || - C.getInputArgs().hasArg(options::OPT_hipstdpar)) && - !UseLLVMOffload; - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false); - bool IsOpenMPOffloading = - UseLLVMOffload || - (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || - (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && - !(IsCuda || IsHIP))); - - llvm::DenseSet Kinds; - const std::pair ActiveKinds[] = { - {IsCuda, Action::OFK_Cuda}, - {IsHIP, Action::OFK_HIP}, - {IsOpenMPOffloading, Action::OFK_OpenMP}, - {IsSYCL, Action::OFK_SYCL}}; - for (const auto &[Active, Kind] : ActiveKinds) - if (Active) - Kinds.insert(Kind); - - // We currently don't support any kind of mixed offloading. - if (Kinds.size() > 1) { - Diag(clang::diag::err_drv_mix_offload) - << Action::GetOffloadKindName(*Kinds.begin()).upper() - << Action::GetOffloadKindName(*(++Kinds.begin())).upper(); - return; - } - -<<<<<<< HEAD - // - // SYCL - // - // We need to generate a SYCL toolchain if the user specified -fsycl. - // If -fsycl is supplied without any of these we will assume SPIR-V. - // Use of -fsycl-device-only overrides -fsycl. - // Use of -fsyclbin enables SYCL device compilation. - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false) || - C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, - options::OPT_fsyclbin_EQ); -======= - // Initialize the compilation identifier used for unique CUDA / HIP names. - if (IsCuda || IsHIP) - CUIDOpts = CUIDOptions(C.getArgs(), *this); ->>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) - - // Get the list of requested offloading toolchains. If they were not - // explicitly specified we will infer them based on the offloading language - // and requested architectures. - std::multiset Triples; - if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { - std::vector ArgValues = - C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); - for (llvm::StringRef Target : ArgValues) - Triples.insert(C.getInputArgs().MakeArgString(Target)); - -<<<<<<< HEAD - llvm::SmallVector UniqueSYCLTriplesVec; - - // A mechanism for retrieving SYCL-specific options, erroring out - // if SYCL offloading wasn't enabled prior to that +static void diagnoseSYCLOptions(Compilation &C, bool IsSYCL) { auto getArgRequiringSYCLRuntime = [&](OptSpecifier OptId) -> Arg * { Arg *SYCLArg = C.getInputArgs().getLastArg(OptId); if (SYCLArg && !IsSYCL) { - Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) + C.getDriver().Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) // Dropping the '=' symbol, which would otherwise pollute // the diagnostics for the most of options << SYCLArg->getSpelling().split('=').first; @@ -1292,7 +1196,19 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, return SYCLArg; }; - Arg *SYCLTargets = getArgRequiringSYCLRuntime(options::OPT_fsycl_targets_EQ); + // Special check for -fsycl-targets. -fsycl-targets is an alias for + // --offload-targets. + if (!IsSYCL) { + if (auto SYCLArg = + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { + std::string OptString(SYCLArg->getAsString(C.getArgs())); + if (StringRef(OptString).contains("fsycl-targets")) + C.getDriver().Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) + // Dropping the '=' symbol, which would otherwise pollute + // the diagnostics for the most of options + << StringRef(OptString).split('=').first; + } + } // Check if -fsycl-host-compiler is used in conjunction with -fsycl. Arg *SYCLHostCompiler = @@ -1302,7 +1218,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // -fsycl-host-compiler-options cannot be used without -fsycl-host-compiler if (SYCLHostCompilerOptions && !SYCLHostCompiler) - Diag(clang::diag::warn_drv_opt_requires_opt) + C.getDriver().Diag(clang::diag::warn_drv_opt_requires_opt) << SYCLHostCompilerOptions->getSpelling().split('=').first << "-fsycl-host-compiler"; @@ -1321,7 +1237,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, for (const StringRef AllowedValue : AllowedValues) if (AllowedValue == ArgValue) return; - Diag(clang::diag::err_drv_invalid_argument_to_option) + C.getDriver().Diag(clang::diag::err_drv_invalid_argument_to_option) << ArgValue << A->getOption().getName(); }; @@ -1334,7 +1250,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Use of -fsycl-link=early and -fsycl-link=image are not supported. if (SYCLLink && (SYCLLink->getValue() == StringRef("early") || SYCLLink->getValue() == StringRef("image"))) - Diag(diag::err_drv_unsupported_opt_removed) + C.getDriver().Diag(diag::err_drv_unsupported_opt_removed) << SYCLLink->getAsString(C.getInputArgs()); Arg *DeviceCodeSplit = @@ -1357,211 +1273,159 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringRef ArgValue(DeviceObj->getValue()); SmallVector DeviceObjValues = {"spirv", "llvmir", "asm"}; if (llvm::find(DeviceObjValues, ArgValue) == DeviceObjValues.end()) - Diag(clang::diag::warn_ignoring_value_using_default) + C.getDriver().Diag(clang::diag::warn_ignoring_value_using_default) << DeviceObj->getSpelling().split('=').first << ArgValue << "llvmir"; else if (ArgValue == "asm" && (!SYCLDeviceOnly || !EmitAsm)) - Diag(clang::diag::warn_drv_fsycl_device_obj_asm_device_only); + C.getDriver().Diag( + clang::diag::warn_drv_fsycl_device_obj_asm_device_only); } Arg *SYCLForceTarget = getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ); if (SYCLForceTarget) { StringRef Val(SYCLForceTarget->getValue()); - llvm::Triple TT(getSYCLDeviceTriple(Val, SYCLForceTarget)); + llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val, SYCLForceTarget)); if (!isValidSYCLTriple(TT)) - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) << Val; } - bool HasSYCLTargetsOption = SYCLTargets; - - llvm::StringMap> DerivedArchs; - llvm::StringMap FoundNormalizedTriples; - // StringSet to contain SYCL target triples. - llvm::StringSet<> SYCLTriples; - if (HasSYCLTargetsOption) { - // At this point, we know we have a valid combination - // of -fsycl*target options passed - Arg *SYCLTargetsValues = SYCLTargets; - if (SYCLTargetsValues) { - if (SYCLTargetsValues->getNumValues()) { - - // Multiple targets are currently not supported when using - // -fsycl-force-target as the bundler does not allow for multiple - // outputs of the same target. - if (SYCLForceTarget && SYCLTargetsValues->getNumValues() > 1) - Diag(clang::diag::err_drv_multiple_target_with_forced_target) - << SYCLTargetsValues->getAsString(C.getInputArgs()) - << SYCLForceTarget->getAsString(C.getInputArgs()); - - std::multiset SYCLTriples; - for (StringRef SYCLTargetTriple : SYCLTargetsValues->getValues()) - SYCLTriples.insert(SYCLTargetTriple); - - llvm::StringMap FoundNormalizedTriples; - llvm::Triple TT; - for (StringRef Triple : SYCLTriples) { - - if (Triple.starts_with("intel_gpu_")) { - TT = getSYCLDeviceTriple("spir64_gen"); - } else if (Triple.starts_with("nvidia_gpu_")) { - TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); - } else if (Triple.starts_with("amd_gpu_")) { - TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); - } else - TT = getSYCLDeviceTriple(Triple); - - std::string NormalizedName = TT.normalize(); - - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, Triple); - - if (!Inserted) { - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Triple << TripleIt->second; - continue; - } - - // If the specified target is invalid, emit a diagnostic. - if (!isValidSYCLTriple(TT)) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; - continue; - } - - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); - UniqueSYCLTriplesVec.push_back(TT); - } - if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { - // Add the default triple (spir64) toolchain. - llvm::Triple DefaultTriple = - C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, - DefaultTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); - } - } else - Diag(clang::diag::warn_drv_empty_joined_argument) - << SYCLTargetsValues->getAsString(C.getInputArgs()); - } - } - // If the user specified --offload-arch, deduce the offloading - // target triple(s) from the set of architecture(s). - // Create a toolchain for each valid triple. - // We do not support SYCL offloading if any of the inputs is a - // .cu (for CUDA type) or .hip (for HIP type) file. - else if (IsSYCL && C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - !IsHIP && !IsCuda) { - // SYCL offloading to AOT Targets with '--offload-arch' - // is currently enabled only with '--offload-new-driver' option. - // Emit a diagnostic if '--offload-arch' is invoked without - // '--offload-new driver' option. - if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) { - Diag(clang::diag::err_drv_sycl_offload_arch_new_driver); - return; - } - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - llvm::Triple IntelGPUTriple("spir64_gen-unknown-unknown"); - llvm::Triple IntelCPUTriple("spir64_x86_64-unknown-unknown"); - - // Attempt to deduce the offloading triple from the set of architectures. - // We need to temporarily create these toolchains so that we can access - // tools for inferring architectures. - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsSYCLSupportedNVidiaGPUArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsSYCLSupportedAMDGPUArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - bool IsIntelGPU = IsIntelGPUOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); - bool IsIntelCPU = IsIntelCPUOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); - - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && - !IsIntelCPU && !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; - return; - } - } - - for (const llvm::Triple &TT : - {AMDTriple, NVPTXTriple, IntelGPUTriple, IntelCPUTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - C.getDefaultToolChain().getTriple()); +} - llvm::SmallVector Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = Archs; - } - } +void Driver::CreateOffloadingDeviceToolChains(Compilation &C, + InputList &Inputs) { + bool UseLLVMOffload = C.getInputArgs().hasArg( + options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); + bool IsCuda = + llvm::any_of(Inputs, + [](std::pair &I) { + return types::isCuda(I.first); + }) && + !UseLLVMOffload; + bool IsHIP = + (llvm::any_of(Inputs, + [](std::pair &I) { + return types::isHIP(I.first); + }) || + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar)) && + !UseLLVMOffload; + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false) || + C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, + options::OPT_fsyclbin_EQ); + bool IsOpenMPOffloading = + UseLLVMOffload || + (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || + (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && + !(IsCuda || IsHIP))); - auto TCRange = C.getOffloadToolChains(Action::OFK_SYCL); - if (TCRange.first == TCRange.second) { - Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); - return; - } + llvm::DenseSet Kinds; + const std::pair ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMPOffloading, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); - } else { - // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V. - // For -fsycl-device-only, we also setup the implied triple as needed. - if (IsSYCL) { - StringRef SYCLTargetArch = getDefaultSYCLArch(C); - UniqueSYCLTriplesVec.push_back(getSYCLDeviceTriple(SYCLTargetArch)); - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); - } - } - - // -fsycl-fp64-conv-emu is valid only for AOT compilation with an Intel GPU - // target. For other scenarios, we emit a warning message. - if (C.getInputArgs().hasArg(options::OPT_fsycl_fp64_conv_emu)) { - bool HasIntelGPUAOTTarget = false; - for (auto &TT : UniqueSYCLTriplesVec) { - if (TT.isSPIRAOT() && TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) { - HasIntelGPUAOTTarget = true; - break; - } - } - if (!HasIntelGPUAOTTarget) - Diag(diag::warn_unsupported_fsycl_fp64_conv_emu_use); + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1 && !IsSYCL) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()).upper() + << Action::GetOffloadKindName(*(++Kinds.begin())).upper(); + return; } - // We'll need to use the SYCL and host triples as the key into - // getOffloadingDeviceToolChain, because the device toolchains we're - // going to create will depend on both. - if ((IsSYCL && !C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && - !HasSYCLTargetsOption) { - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - for (const auto &TT : UniqueSYCLTriplesVec) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - HostTC->getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + diagnoseSYCLOptions(C, IsSYCL); + + // Initialize the compilation identifier used for unique CUDA / HIP names. + if (IsCuda || IsHIP) + CUIDOpts = CUIDOptions(C.getArgs(), *this); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); -======= + // Get the list of requested offloading toolchains. If they were not + // explicitly specified we will infer them based on the offloading language + // and requested architectures. + std::multiset Triples; + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { + std::vector ArgValues = + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); if (ArgValues.empty()) Diag(clang::diag::warn_drv_empty_joined_argument) << C.getInputArgs() .getLastArg(options::OPT_offload_targets_EQ) ->getAsString(C.getInputArgs()); + else if (IsSYCL) { + // Determine any SYCL implied triples that are based on non-triple + // values (intel_gpu*, nvidia_gpu*, amd_gpu* values). + // Multiple targets are currently not supported when using + // -fsycl-force-target as the bundler does not allow for multiple + // outputs of the same target. + Arg *SYCLForceTarget = + C.getInputArgs().getLastArg(options::OPT_fsycl_force_target_EQ); + if (SYCLForceTarget && ArgValues.size() > 1) { + std::string OptString("-fsycl-targets"); + if (auto SYCLArg = + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) + OptString = SYCLArg->getAsString(C.getInputArgs()); + Diag(clang::diag::err_drv_multiple_target_with_forced_target) + << OptString << SYCLForceTarget->getAsString(C.getInputArgs()); + } + + std::multiset SYCLTriples; + for (StringRef SYCLTargetTriple : ArgValues) + SYCLTriples.insert(SYCLTargetTriple); + + llvm::StringMap FoundNormalizedTriples; + llvm::Triple TT; + for (StringRef Triple : SYCLTriples) { + + if (Triple.starts_with("intel_gpu_")) { + TT = getSYCLDeviceTriple("spir64_gen"); + } else if (Triple.starts_with("nvidia_gpu_")) { + TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); + } else if (Triple.starts_with("amd_gpu_")) { + TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); + } else + TT = getSYCLDeviceTriple(Triple); + + // For the new offloading model, we only want a single triple entry + // for each target, even if we have multiple intel_gpu* entries. We + // will track triples for new model and unique strings for the old + // model. + std::string NormalizedName; + bool UseNewOffload = + (C.getArgs().hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)); + NormalizedName = UseNewOffload + ? TT.normalize() + : getSYCLDeviceTriple(Triple).normalize(); + + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Triple); + + if (IsSYCL && !Inserted) { + if (!UseNewOffload || (UseNewOffload && Triple == TripleIt->second)) + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + << Triple << TripleIt->second; + continue; + } + + // If the specified target is invalid, emit a diagnostic. + if (IsSYCL && !isValidSYCLTriple(TT)) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; + continue; + } + Triples.insert(C.getInputArgs().MakeArgString(TT.normalize())); + } + } else + for (llvm::StringRef Target : ArgValues) + Triples.insert(C.getInputArgs().MakeArgString(Target)); } else if (Kinds.size() > 0) { for (Action::OffloadKind Kind : Kinds) { llvm::DenseSet Derived = inferOffloadToolchains(C, Kind); Triples.insert(Derived.begin(), Derived.end()); ->>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) } } @@ -1596,13 +1460,15 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, continue; } - std::string NormalizedName = TT.normalize(); - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, Target); - if (!Inserted) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << Target << TripleIt->second; - continue; + if (Kind == Action::OFK_OpenMP) { + std::string NormalizedName = TT.normalize(); + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Target); + if (!Inserted) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << Target << TripleIt->second; + continue; + } } auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, @@ -1619,6 +1485,35 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, C.addOffloadDeviceToolChain(&TC, Kind); } } + + // Perform any additional SYCL specific behaviors that are tied to expected + // triples. + if (Kinds.contains(Action::OFK_SYCL)) { + // Add the default toolchain for SYCL if it is not already added when using + // the old offloading model. + if (!C.getArgs().hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)) { + // Make vector of triples. + SmallVector Triples; + for (auto &TripleString : FoundNormalizedTriples) { + llvm::Triple T(TripleString.getKey()); + Triples.push_back(T); + } + if (addSYCLDefaultTriple(C, Triples)) { + llvm::Triple TT = llvm::Triple("spir64-unknown-unknown"); + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + } + } + // -fsycl-fp64-conv-emu is valid only for AOT compilation with an Intel GPU + // target. For other scenarios, we emit a warning message. + if (C.getInputArgs().hasArg(options::OPT_fsycl_fp64_conv_emu)) { + llvm::Triple TT = llvm::Triple("spir64_gen-unknown-unknown"); + if (!FoundNormalizedTriples.contains(TT.normalize())) + Diag(diag::warn_unsupported_fsycl_fp64_conv_emu_use); + } + } } bool Driver::loadZOSCustomizationFile(llvm::cl::ExpansionContext &ExpCtx) { @@ -4034,7 +3929,7 @@ bool Driver::checkForSYCLDefaultDevice(Compilation &C, // Do not do the check if the default device is passed in -fsycl-targets // or if -fsycl-targets isn't passed (that implies default device) - if (const Arg *A = Args.getLastArgNoClaim(options::OPT_fsycl_targets_EQ)) { + if (const Arg *A = Args.getLastArgNoClaim(options::OPT_offload_targets_EQ)) { for (const char *Val : A->getValues()) { llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val, A)); if ((TT.isSPIROrSPIRV()) && TT.getSubArch() == llvm::Triple::NoSubArch) @@ -4403,86 +4298,7 @@ class OffloadingActionBuilder final { EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); -<<<<<<< HEAD - // --offload and --offload-arch options are mutually exclusive. - if (Args.hasArgNoClaim(options::OPT_offload_EQ) && - Args.hasArgNoClaim(options::OPT_offload_arch_EQ, - options::OPT_no_offload_arch_EQ)) { - C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch" - << "--offload"; - } - - // Collect all offload arch parameters, removing duplicates. - std::set GpuArchs; - bool Error = false; - const ToolChain &TC = *ToolChains.front(); - for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"", - AssociatedOffloadKind)) { - if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || - A->getOption().matches(options::OPT_no_offload_arch_EQ))) - continue; - A->claim(); - - for (StringRef ArchStr : llvm::split(A->getValue(), ",")) { - if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && - ArchStr == "all") { - GpuArchs.clear(); - } else if (ArchStr == "native") { - auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); - if (!GPUsOrErr) { - TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC.getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; - continue; - } - - for (auto GPU : *GPUsOrErr) { - GpuArchs.insert(Args.MakeArgString(GPU)); - } - } else { - ArchStr = getCanonicalOffloadArch(ArchStr); - if (ArchStr.empty()) { - Error = true; - } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(ArchStr); - else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(ArchStr); - else - llvm_unreachable("Unexpected option."); - } - } - } - - auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs); - if (ConflictingArchs) { - C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) - << ConflictingArchs->first << ConflictingArchs->second; - C.setContainsError(); - return true; - } - - // Collect list of GPUs remaining in the set. - for (auto Arch : GpuArchs) - GpuArchList.push_back(Arch.data()); - - // Default to sm_20 which is the lowest common denominator for - // supported GPUs. sm_20 code should work correctly, if - // suboptimally, on all newer GPUs. - if (GpuArchList.empty()) { - if (ToolChains.front()->getTriple().isSPIROrSPIRV()) { - if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) - GpuArchList.push_back(OffloadArch::AMDGCNSPIRV); - else - GpuArchList.push_back(OffloadArch::Generic); - } else { - GpuArchList.push_back(DefaultOffloadArch); - } - } - - return Error; -======= return false; ->>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) } }; @@ -5140,7 +4956,7 @@ class OffloadingActionBuilder final { JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, Input, AssociatedOffloadKind); - if (TT.getOS() != llvm::Triple::NVCL) { + if (TT.getOS() != llvm::Triple::NVCL && !TT.isSPIROrSPIRV()) { auto *AA = C.getDriver().ConstructPhaseAction( C, Args, phases::Assemble, BA, AssociatedOffloadKind); ActionList DeviceActions = {BA, AA}; @@ -5556,8 +5372,8 @@ class OffloadingActionBuilder final { bool IsNativeCPU = TargetTriple.isNativeCPU(); for (const auto &Input : ListIndex) { // No need for any conversion if we are coming in from the - // clang-offload-deps or regular compilation path. - if (IsNVPTX || IsAMDGCN || ContainsOffloadDepsAction(Input) || + // clang-offload-deps path. + if (ContainsOffloadDepsAction(Input) || ContainsCompileOrAssembleAction(Input)) { LinkObjects.push_back(Input); continue; @@ -6106,9 +5922,8 @@ class OffloadingActionBuilder final { ArchStr = OffloadArchToString(Arch); } else if (TargetBE->isAMDGCN()) { llvm::StringMap Features; - auto Arch = parseTargetID( - *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), - ArchStr, &Features); + auto Arch = parseTargetID(llvm::Triple("amdgcn-amd-amdhsa"), + ArchStr, &Features); if (!Arch) { C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; continue; @@ -6310,7 +6125,7 @@ class OffloadingActionBuilder final { // Gather information about the SYCL Ahead of Time targets. The targets // are determined on the SubArch values passed along in the triple. Arg *SYCLTargets = - C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ); + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ); bool HasValidSYCLRuntime = C.getInputArgs().hasFlag( options::OPT_fsycl, options::OPT_fno_sycl, false); @@ -6321,27 +6136,33 @@ class OffloadingActionBuilder final { for (StringRef Val : SYCLTargetsValues->getValues()) { StringRef UserTargetName(Val); if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("spir64_gen"), ValidDevice->data()); UserTargetName = "spir64_gen"; } else if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("nvptx64-nvidia-cuda"), ValidDevice->data()); UserTargetName = "nvptx64-nvidia-cuda"; } else if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("amdgcn-amd-amdhsa"), @@ -6362,8 +6183,10 @@ class OffloadingActionBuilder final { // the following iterations. FoundNormalizedTriples[NormalizedName] = Val; - SYCLTripleList.push_back( - C.getDriver().getSYCLDeviceTriple(UserTargetName)); + if (isValidSYCLTriple(llvm::Triple(UserTargetName))) + SYCLTripleList.push_back( + C.getDriver().getSYCLDeviceTriple(UserTargetName)); + // For user specified spir64_gen, add an empty device value as a // placeholder. if (TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) @@ -6681,7 +6504,7 @@ class OffloadingActionBuilder final { // FIXME - unbundling action with -fsycl-link is unbundling for both host // and device, where only the device is needed. auto UnbundlingHostAction = C.MakeAction( - A, (HasSPIRTarget && HostAction->getType() == types::TY_Archive) + A, (HostAction->getType() == types::TY_Archive) ? types::TY_Tempfilelist : A->getType()); UnbundlingHostAction->registerDependentActionInfo( @@ -7543,16 +7366,14 @@ static StringRef getCanonicalArchString(Compilation &C, } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && (Arch == OffloadArch::UNKNOWN || !IsIntelGPUOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "spir64_gen" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_gen" << ArchStr; return StringRef(); } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64 && (Arch == OffloadArch::UNKNOWN || !IsIntelCPUOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "spir64_x86_64" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_x86_64" << ArchStr; return StringRef(); } if (IsNVIDIAOffloadArch(Arch)) @@ -7607,89 +7428,42 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; -<<<<<<< HEAD - StringRef Arch; - for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { + std::unique_ptr ExtractedArg = nullptr; + for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. - std::unique_ptr ExtractedArg = nullptr; if (Kind == Action::OFK_SYCL) { - // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" - // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" - if (TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && - Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, TC->getTriple(), *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known - // arch to be associated with the current spir64_gen entry. Grab - // the right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - if (!Arch.empty()) - Archs.insert(Arch); - break; - } - } - } // For SYCL based offloading, we allow for -Xsycl-target-backend // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for // specifying options. - if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && - Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && - llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { + if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + llvm::Triple(Arg->getValue(0)) == TC.getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); // -Xsycl-target-backend --offload-arch=gfx1150 - } else if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_gen) && - Arg->getOption().matches(options::OPT_Xsycl_backend)) { + } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); } - } else { - if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && - ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { - Arg->claim(); - unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); - unsigned Prev = Index; - ExtractedArg = getOpts().ParseOneArg(Args, Index); - if (!ExtractedArg || Index > Prev + 1) { - TC->getDriver().Diag(diag::err_drv_invalid_Xopenmp_target_with_args) - << Arg->getAsString(Args); - continue; - } - Arg = ExtractedArg.get(); - } } - if (Kind == Action::OFK_SYCL && - Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + Arg->getOption().matches(options::OPT_offload_targets_EQ)) { for (StringRef SYCLTargetValue : Arg->getValues()) { + StringRef Arch; if (auto Device = tools::SYCL::gen::isGPUTarget( SYCLTargetValue)) { - if (SpecificToolchain && - !(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) + if (!(TC.getTriple().isSPIRAOT() && + TC.getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) continue; if (Device->empty()) { Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; } if (IsIntelGPUOffloadArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< tools::SYCL::gen::NvidiaGPU>(SYCLTargetValue)) { @@ -7698,7 +7472,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, continue; } if (IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< clang::driver::tools::SYCL::gen::AmdGPU>( @@ -7708,7 +7482,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, continue; } if (IsSYCLSupportedAMDGPUArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else { Arch = StringRef(); @@ -7717,10 +7491,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(Arch); } } - -======= - for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { ->>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { @@ -7764,6 +7534,33 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } } + if (Kind == Action::OFK_SYCL) { + // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" + // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" + if (TC.getTriple().isSPIRAOT() && + TC.getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC.getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + StringRef Arch; + Arch = TargetArgs[i + 1]; + if (!Arch.empty()) + Archs.insert(Arch); + break; + } + } + } + } + if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, TC.getTriple())) C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) @@ -7774,15 +7571,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (Kind == Action::OFK_Cuda) { Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); } else if (Kind == Action::OFK_HIP) { -<<<<<<< HEAD - Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); -======= Archs.insert(OffloadArchToString(TC.getTriple().isSPIRV() ? OffloadArch::Generic : OffloadArch::HIPDefault)); } else if (Kind == Action::OFK_SYCL) { - Archs.insert(StringRef()); ->>>>>>> a7d93653a671 ([Clang] Rework creating offloading toolchains (#125556)) + // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. + // The default arch is set for NVPTX if not provided. For AMDGPU, emit + // an error as the user is responsible to set the arch. + if (TC.getTriple().isNVPTX()) + Archs.insert(OffloadArchToString(OffloadArch::SM_50)); + else if (TC.getTriple().isAMDGPU()) + C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) + << 1 << TC.getTriple().str(); + else + Archs.insert(StringRef()); } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) @@ -7801,17 +7603,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(StringRef()); } } - } else if (Kind == Action::OFK_SYCL) { - // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. - // The default arch is set for NVPTX if not provided. For AMDGPU, emit - // an error as the user is responsible to set the arch. - if (TC->getTriple().isNVPTX()) - Archs.insert(OffloadArchToString(OffloadArch::SM_50)); - else if (TC->getTriple().isAMDGPU()) - C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) - << 1 << TC->getTriple().str(); - else - Archs.insert(StringRef()); } } Args.ClaimAllArgs(options::OPT_offload_arch_EQ); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index e6addc933fbb3..f887eca3db400 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1865,7 +1865,7 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs( getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple); continue; } - if (IsSYCL && !SingleTargetTripleCount(options::OPT_fsycl_targets_EQ)) { + if (IsSYCL && !SingleTargetTripleCount(options::OPT_offload_targets_EQ)) { getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) << A->getSpelling(); continue; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7b006eff85988..cc174ccb1a281 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8662,7 +8662,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (isa(JA) && JA.isHostOffloading(Action::OFK_SYCL)) { SmallString<128> TargetInfo("-fsycl-targets="); - if (Arg *Tgts = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *Tgts = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { if (i) TargetInfo += ','; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 8082e8c880609..fbbd289eb6ff7 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -317,7 +317,7 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // add -fsycl-targets=intel_gpu_pvc..., native bfloat16 devicelib can // only be linked when all GPU types specified support. // We need to filter CPU target here and only focus on GPU device. - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (auto TargetsV : SYCLTarget->getValues()) { if (!checkSpirvJIT(StringRef(TargetsV)) && !StringRef(TargetsV).starts_with("spir64_gen") && @@ -338,7 +338,7 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // bfloat16 native conversion. UseNative = true; - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (auto TargetsV : SYCLTarget->getValues()) { if (!checkSpirvJIT(StringRef(TargetsV)) && !GPUArchsWithNBF16.contains(StringRef(TargetsV))) { @@ -646,7 +646,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (!IsSpirvAOT) return JIT; - llvm::opt::Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ); + llvm::opt::Arg *SYCLTarget = + Args.getLastArg(options::OPT_offload_targets_EQ); if (!SYCLTarget || (SYCLTarget->getValues().size() != 1)) return JIT; @@ -1672,7 +1673,7 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple, if (OptNoTriple) { // With multiple -fsycl-targets, a triple is required so we know where // the options should go. - const Arg *TargetArg = Args.getLastArg(options::OPT_fsycl_targets_EQ); + const Arg *TargetArg = Args.getLastArg(options::OPT_offload_targets_EQ); if (TargetArg && TargetArg->getValues().size() != 1) { getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) << A->getSpelling(); @@ -1911,7 +1912,7 @@ void SYCLToolChain::TranslateBackendTargetArgs( // Handle -Xsycl-target-backend. TranslateTargetOpt(Triple, Args, CmdArgs, options::OPT_Xsycl_backend, options::OPT_Xsycl_backend_EQ, Device); - TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ); + TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_offload_targets_EQ); } void SYCLToolChain::TranslateLinkerTargetArgs(const llvm::Triple &Triple, diff --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c index 2a38a99c30518..a756e82438466 100644 --- a/clang/test/Driver/openmp-offload-infer.c +++ b/clang/test/Driver/openmp-offload-infer.c @@ -36,10 +36,10 @@ // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ -// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=skylake \ +// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=unknown \ // RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-FAILED -// CHECK-FAILED: error: failed to deduce triple for target architecture 'skylake'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead +// CHECK-FAILED: error: failed to deduce triple for target architecture 'unknown'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ // RUN: --offload-arch=sm_70 --offload-arch=gfx908 -fno-openmp \ diff --git a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp index d733618744d71..46d0bc19c2258 100644 --- a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp +++ b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp @@ -58,15 +58,6 @@ // TARGET-TRIPLE-CPU: "-D__SYCL_TARGET_INTEL_X86_64__" // CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_x86_64-unknown-unknown,arch=[[DEV_STR]],kind=sycl" -// Tests for handling a missing architecture. -// -// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=MISSING %s -// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=MISSING %s - -// MISSING: error: must pass in an explicit cpu or gpu architecture to '--offload-arch' - // Tests for handling a incorrect architecture. // // RUN: not %clangxx --offload-new-driver -fsycl --offload-arch=badArch %s -### 2>&1 \ diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index a6aeb6b77fdf1..d3fe36be70e62 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -114,8 +114,8 @@ // RUN: --offload-new-driver %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK_PACKAGER_OPTS %s // CHK_PACKAGER_OPTS: clang-offload-packager{{.*}} "-o" -// CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,kind=sycl,compile-opts={{.*}}-spir64_gen-opt,link-opts=-spir64_gen-link-opt // CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64-unknown-unknown,arch=generic,kind=sycl,compile-opts={{.*}}-spir64-opt,link-opts=-spir64-link-opt +// CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,kind=sycl,compile-opts={{.*}}-spir64_gen-opt,link-opts=-spir64_gen-link-opt /// Check phases with multiple intel_gpu settings // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl \ diff --git a/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp b/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp index 53d6fbc910e9c..52aaf074432d8 100644 --- a/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp +++ b/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp @@ -21,16 +21,14 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -L/dummy/dir %t_lib.lo -### %t_obj.o 2>&1 \ // RUN: | FileCheck %s -check-prefixes=STATIC_LIB,STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_lib.a -### %t_obj.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_lib.lo -### %t_obj.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input=[[INPUTO:.+\.o]]" "-output=[[HOSTOBJ:.+\.o]]" "-output={{.+\.o}}" // STATIC_LIB: clang-offload-deps{{.*}} "-targets=[[BUNDLE_TRIPLE]]" // STATIC_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}" "-output=[[OUTFILE:.+\.txt]]" -// STATIC_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}" "-output=[[OUTFILE:.+\.a]]" // STATIC_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTFILE]]" "--in-replace=[[OUTFILE]]" "--out-file-list=[[IROUTFILE:.+\.txt]]" "--out-replace=[[IROUTFILE]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTFILE]]" "-o" "[[IROUTFILE]]" // STATIC_LIB_DEF: llvm-link{{.*}} "@[[IROUTFILE]]" -// STATIC_LIB_NVPTX: llvm-link{{.*}} "[[OUTFILE]]" // STATIC_LIB: ld{{.*}} "{{.*}}_lib.{{(a|lo)}}" "[[HOSTOBJ]]" // Test using -l style for passing libraries. @@ -43,14 +41,12 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -Xlinker -Bstatic -L%t_dir -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \ // RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB,STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_L_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input=[[INPUTO:.+\.o]]" "-output=[[HOSTOBJ:.+\.o]]" "-output={{.+\.o}}" // STATIC_L_LIB: clang-offload-deps{{.*}} "-targets=[[BUNDLE_TRIPLE]]" // STATIC_L_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}liblin64.a" "-output=[[OUTFILE:.+\.txt]]" -// STATIC_L_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}liblin64.a" "-output=[[OUTFILE:.+\.a]]" // STATIC_L_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTFILE]]" "--in-replace=[[OUTFILE]]" "--out-file-list=[[IROUTFILE:.+\.txt]]" "--out-replace=[[IROUTFILE]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTFILE]]" "-o" "[[IROUTFILE]]" // STATIC_L_LIB_DEF: llvm-link{{.*}} "@[[IROUTFILE]]" -// STATIC_L_LIB_NVPTX: llvm-link{{.*}} "[[OUTFILE]]" // STATIC_L_LIB: ld{{.*}} "-llin64" "[[HOSTOBJ]]" // non-fat libraries should not trigger the unbundling step. @@ -71,16 +67,14 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver %t_lib.a -### %t-1.o %t-2.o %t-3.o 2>&1 \ // RUN: | FileCheck %s -check-prefixes=STATIC_LIB_MULTI_O,STATIC_LIB_MULTI_O_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda %t_lib.a -### %t-1.o %t-2.o %t-3.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_MULTI_O,STATIC_LIB_MULTI_O_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_MULTI_O,STATIC_LIB_MULTI_O_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_LIB_MULTI_O: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input={{.+}}-1.o" // STATIC_LIB_MULTI_O: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input={{.+}}-2.o" // STATIC_LIB_MULTI_O: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input={{.+}}-3.o" // STATIC_LIB_MULTI_O: clang-offload-deps{{.*}} "-targets=[[BUNDLE_TRIPLE]]" // STATIC_LIB_MULTI_O_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTFILE:.+\.txt]]" -// STATIC_LIB_MULTI_O_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTFILE:.+\.a]]" // STATIC_LIB_MULTI_O_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTFILE]]" "--in-replace=[[OUTFILE]]" "--out-file-list=[[IROUTFILE:.+\.txt]]" "--out-replace=[[IROUTFILE]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTFILE]]" "-o" "[[IROUTFILE]]" // STATIC_LIB_MULTI_O_DEF: llvm-link{{.*}} "@[[IROUTFILE]]" -// STATIC_LIB_MULTI_O_NVPTX: llvm-link{{.*}} "[[OUTFILE]]" /// ########################################################################### @@ -127,18 +121,19 @@ // STATIC_LIB_SRC-CUDA: 10: linker, {0, 9}, host_dep_image, (host-sycl) // STATIC_LIB_SRC-CUDA: 11: clang-offload-deps, {10}, ir, (host-sycl) // STATIC_LIB_SRC-CUDA: 12: input, "[[INPUTA]]", archive -// STATIC_LIB_SRC-CUDA: 13: clang-offload-unbundler, {12}, archive -// STATIC_LIB_SRC-CUDA: 14: linker, {5, 11, 13}, ir, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 15: sycl-post-link, {14}, ir, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 16: file-table-tform, {15}, ir, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 17: backend, {16}, assembler, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 18: assembler, {17}, object, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 19: linker, {17, 18}, cuda-fatbin, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 20: foreach, {16, 19}, cuda-fatbin, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 21: file-table-tform, {15, 20}, tempfiletable, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 22: clang-offload-wrapper, {21}, object, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 23: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {22}, object -// STATIC_LIB_SRC-CUDA: 24: linker, {0, 9, 23}, image, (host-sycl) +// STATIC_LIB_SRC-CUDA: 13: clang-offload-unbundler, {12}, tempfilelist +// STATIC_LIB_SRC-CUDA: 14: spirv-to-ir-wrapper, {13}, tempfilelist, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 15: linker, {5, 11, 14}, ir, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 17: file-table-tform, {16}, ir, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 18: backend, {17}, assembler, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 19: assembler, {18}, object, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object +// STATIC_LIB_SRC-CUDA: 25: linker, {0, 9, 24}, image, (host-sycl) /// ########################################################################### @@ -151,11 +146,9 @@ // STATIC_LIB_SRC2: ld{{(.exe)?}}" {{.*}} "-o" "[[HOSTEXE:.+\.out]]" {{.*}}"--unresolved-symbols=ignore-all" // STATIC_LIB_SRC2: clang-offload-deps{{.*}} "-targets=[[DEPS_TRIPLE]]" "-outputs=[[OUTDEPS:.+\.bc]]" "[[HOSTEXE]]" // STATIC_LIB_SRC2_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTLIB:.+\.txt]]" -// STATIC_LIB_SRC2_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTLIB:.+\.a]]" // STATIC_LIB_SRC2_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTLIB]]" "--in-replace=[[OUTLIB]]" "--out-file-list=[[OUTLIBLIST:.+\.txt]]" "--out-replace=[[OUTLIBLIST]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTLIB]]" "-o" [[OUTLIBLIST]]" // STATIC_LIB_SRC2: llvm-link{{.*}} "[[OUTDEPS]]" "-o" "[[OUTTEMP:.+\.bc]]" // STATIC_LIB_SRC2_DEF: llvm-link{{.*}} "--only-needed" "[[OUTTEMP]]" "@[[OUTLIBLIST]]" -// STATIC_LIB_SRC2_NVPTX: llvm-link{{.*}} "--only-needed" "[[OUTTEMP]]" "[[OUTLIB]]" // STATIC_LIB_SRC2: ld{{(.exe)?}}" {{.*}} "[[HOSTOBJ]]" /// ########################################################################### @@ -174,7 +167,8 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda,spir64 %t_lib.a -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=STATIC_LIB_MIX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_LIB_MIX: clang-offload-bundler{{.*}} "-type=aoo" "-targets=sycl-nvptx64-nvidia-cuda-sm_50,sycl-spir64-unknown-unknown" {{.*}} "-output=[[NVPTXLIST:.+\.txt]]" "-output=[[SYCLLIST:.+\.txt]]" -// STATIC_LIB_MIX: llvm-link{{.*}} "@[[NVPTXLIST]]" +// STATIC_LIB_MIX: spirv-to-ir-wrapper{{.*}} "[[NVPTXLIST]]" "-o" "[[NVPTXLINKLIST:.+\.txt]]" +// STATIC_LIB_MIX: llvm-link{{.*}} "@[[NVPTXLINKLIST]]" // STATIC_LIB_MIX: spirv-to-ir-wrapper{{.*}} "[[SYCLLIST]]" "-o" "[[SYCLLINKLIST:.+\.txt]]" // STATIC_LIB_MIX: llvm-link{{.*}} "@[[SYCLLINKLIST]]" @@ -191,18 +185,15 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -L/dummy/dir %t_obj.o -Wl,@%/t_arg.arg -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_obj.o -Wl,--whole-archive %t_lib.a %t_lib_2.a -Wl,--no-whole-archive -### 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_1,WHOLE_STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_1,WHOLE_STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_obj.o -Wl,@%/t_arg.arg -### 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // WHOLE_STATIC_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" // WHOLE_STATIC_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTA:.+\.a]]" "-output=[[OUTPUTA:.+\.txt]]" // WHOLE_STATIC_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTPUTA]]" "--in-replace=[[OUTPUTA]]" "--out-file-list=[[OUTLISTA:.+\.txt]]" "--out-replace=[[OUTLISTA]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTPUTA]]" "-o" "[[OUTLISTA]]" // WHOLE_STATIC_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTB:.+\.a]]" "-output=[[OUTPUTB:.+\.txt]]" // WHOLE_STATIC_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTPUTB]]" "--in-replace=[[OUTPUTB]]" "--out-file-list=[[OUTLISTB:.+\.txt]]" "--out-replace=[[OUTLISTB]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTPUTB]]" "-o" "[[OUTLISTB]]" // WHOLE_STATIC_LIB_DEF: llvm-link{{.*}} "@[[OUTLISTA]]" "@[[OUTLISTB]]" -// WHOLE_STATIC_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTA:.+\.a]]" "-output=[[OUTPUTA:.+\.a]]" -// WHOLE_STATIC_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTB:.+\.a]]" "-output=[[OUTPUTB:.+\.a]]" -// WHOLE_STATIC_LIB_NVPTX: llvm-link{{.*}} "[[OUTPUTA]]" "[[OUTPUTB]]" // WHOLE_STATIC_LIB: clang-offload-wrapper{{.*}} // WHOLE_STATIC_LIB: clang{{.*}} "-c" // WHOLE_STATIC_LIB_1: ld{{.*}} "--whole-archive" "[[INPUTA]]" "[[INPUTB]]" "--no-whole-archive" @@ -229,8 +220,9 @@ // STATIC_LIB_NOSRC-SPIR: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}_lib.{{(a|lo)}}" "-output=[[DEVICELIB:.+\.txt]]" "-unbundle" // STATIC_LIB_NOSRC-SPIR: llvm-foreach{{.*}}spirv-to-ir-wrapper{{.*}} "[[DEVICELIB]]" "-o" "[[DEVICELIST:.+\.txt]]" // STATIC_LIB_NOSRC-SPIR: llvm-link{{.*}} "@[[DEVICELIST]]" "-o" "[[BCFILE:.+\.bc]]" -// STATIC_LIB_NOSRC-CUDA: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}_lib.{{(a|lo)}}" "-output=[[DEVICELIB:.+\.a]]" "-unbundle" -// STATIC_LIB_NOSRC-CUDA: llvm-link{{.*}} "[[DEVICELIB]]" "-o" "[[BCFILE:.+\.bc]]" +// STATIC_LIB_NOSRC-CUDA: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}_lib.{{(a|lo)}}" "-output=[[DEVICELIB:.+\.txt]]" "-unbundle" +// STATIC_LIB_NOSRC-CUDA: llvm-foreach{{.*}}spirv-to-ir-wrapper{{.*}} "[[DEVICELIB]]" "-o" "[[DEVICELIST:.+\.txt]]" +// STATIC_LIB_NOSRC-CUDA: llvm-link{{.*}} "@[[DEVICELIST]]" "-o" "[[BCFILE:.+\.bc]]" // STATIC_LIB_NOSRC: sycl-post-link{{.*}} "-o" "[[TABLE:.+]]" "[[BCFILE]]" // STATIC_LIB_NOSRC: file-table-tform{{.*}} "-o" "[[LIST:.+]]" "[[TABLE]]" // STATIC_LIB_NOSRC-SPIR: llvm-foreach{{.*}}llvm-spirv{{.*}} "-o" "[[OBJLIST:.+\.txt]]"{{.*}} "[[LIST]]" diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index fd4e55c2d5f14..b4237b1117c84 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -650,7 +650,7 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, C->getSingleOffloadToolChain(); InputArgList EmptyArgList; auto Archs = - D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, OffloadTC); + D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, *OffloadTC); assert(Archs.size() == 1 && "Offload toolchain should be configured to single architecture"); StringRef CPU = *Archs.begin(); From a617dbfc57a7f487a394ef431b335d91bdf17157 Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Wed, 6 Aug 2025 07:00:19 -0700 Subject: [PATCH 3/5] Update test for implied triple --- clang/test/Driver/offload-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/offload-target.c b/clang/test/Driver/offload-target.c index 123ecd3eb830e..af8653ea92225 100644 --- a/clang/test/Driver/offload-target.c +++ b/clang/test/Driver/offload-target.c @@ -1,6 +1,6 @@ // RUN: %clang -### -fsycl --offload-targets=spirv64 -nogpuinc %s -ccc-print-bindings 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL -// SYCL: "spirv64" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]" +// SYCL: "spirv64-unknown-unknown" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]" // RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ // RUN: | FileCheck %s -check-prefix=HIP From 92466487e00f92adde129c7368dcd5fe1e6ac01c Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Wed, 6 Aug 2025 15:02:07 -0700 Subject: [PATCH 4/5] Use SmallSet for Triples, reducing complexity for duplicate diagnostic --- clang/lib/Driver/Driver.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 589b3acad067e..319c081100cf3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1348,7 +1348,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Get the list of requested offloading toolchains. If they were not // explicitly specified we will infer them based on the offloading language // and requested architectures. - std::multiset Triples; + llvm::SmallSet Triples; if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { std::vector ArgValues = C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); @@ -1393,26 +1393,18 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // For the new offloading model, we only want a single triple entry // for each target, even if we have multiple intel_gpu* entries. We - // will track triples for new model and unique strings for the old - // model. - std::string NormalizedName; - bool UseNewOffload = - (C.getArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)); - NormalizedName = UseNewOffload - ? TT.normalize() - : getSYCLDeviceTriple(Triple).normalize(); + // will track triples using the target values instead of the Triples + // themselves. + std::string NormalizedName = getSYCLDeviceTriple(Triple).normalize(); auto [TripleIt, Inserted] = FoundNormalizedTriples.try_emplace(NormalizedName, Triple); if (IsSYCL && !Inserted) { - if (!UseNewOffload || (UseNewOffload && Triple == TripleIt->second)) - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) << Triple << TripleIt->second; continue; } - // If the specified target is invalid, emit a diagnostic. if (IsSYCL && !isValidSYCLTriple(TT)) { Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; From ea89a1ea87f452e4835cfdacf0e24bcd8279c316 Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Wed, 6 Aug 2025 15:04:51 -0700 Subject: [PATCH 5/5] Revert "Use SmallSet for Triples, reducing complexity for duplicate diagnostic" This reverts commit 92466487e00f92adde129c7368dcd5fe1e6ac01c. --- clang/lib/Driver/Driver.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 319c081100cf3..589b3acad067e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1348,7 +1348,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Get the list of requested offloading toolchains. If they were not // explicitly specified we will infer them based on the offloading language // and requested architectures. - llvm::SmallSet Triples; + std::multiset Triples; if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { std::vector ArgValues = C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); @@ -1393,18 +1393,26 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // For the new offloading model, we only want a single triple entry // for each target, even if we have multiple intel_gpu* entries. We - // will track triples using the target values instead of the Triples - // themselves. - std::string NormalizedName = getSYCLDeviceTriple(Triple).normalize(); + // will track triples for new model and unique strings for the old + // model. + std::string NormalizedName; + bool UseNewOffload = + (C.getArgs().hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)); + NormalizedName = UseNewOffload + ? TT.normalize() + : getSYCLDeviceTriple(Triple).normalize(); auto [TripleIt, Inserted] = FoundNormalizedTriples.try_emplace(NormalizedName, Triple); if (IsSYCL && !Inserted) { - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + if (!UseNewOffload || (UseNewOffload && Triple == TripleIt->second)) + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) << Triple << TripleIt->second; continue; } + // If the specified target is invalid, emit a diagnostic. if (IsSYCL && !isValidSYCLTriple(TT)) { Diag(clang::diag::err_drv_invalid_sycl_target) << Triple;