set(MSG_CUDA_MAP "\n\n" " Valid CUDA Toolkit Map:\n" " 8.x for Fermi/Kepler /Maxwell/Pascal,\n" " 9.x for Kepler /Maxwell/Pascal/Volta,\n" " 10.x for Kepler /Maxwell/Pascal/Volta/Turing,\n" " 11.x for Kepler (in part)/Maxwell/Pascal/Volta/Turing/Ampere\n\n" "Reference https://developer.nvidia.com/cuda-gpus#compute for arch and family name\n\n" ) add_definitions(-DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT) option(XMRIG_LARGEGRID "Support large CUDA block count > 128" ON) if (XMRIG_LARGEGRID) add_definitions("-DXMRIG_LARGEGRID=${XMRIG_LARGEGRID}") endif() set(DEFAULT_CUDA_ARCH "50") # Fermi GPUs are only supported with CUDA < 9.0 if (CUDA_VERSION VERSION_LESS 9.0) list(APPEND DEFAULT_CUDA_ARCH "20;21") endif() # Kepler GPUs are only supported with CUDA < 11.0 if (CUDA_VERSION VERSION_LESS 11.0) list(APPEND DEFAULT_CUDA_ARCH "30") elseif (CUDA_VERSION VERSION_LESS 12.0) list(APPEND DEFAULT_CUDA_ARCH "35") endif() # add Pascal support for CUDA >= 8.0 if (NOT CUDA_VERSION VERSION_LESS 8.0) list(APPEND DEFAULT_CUDA_ARCH "60") endif() # add Volta support for CUDA >= 9.0 if (NOT CUDA_VERSION VERSION_LESS 9.0) list(APPEND DEFAULT_CUDA_ARCH "70") endif() # add Turing support for CUDA >= 10.0 if (NOT CUDA_VERSION VERSION_LESS 10.0) list(APPEND DEFAULT_CUDA_ARCH "75") endif() # add Ampere support for CUDA >= 11.0 if (NOT CUDA_VERSION VERSION_LESS 11.0) list(APPEND DEFAULT_CUDA_ARCH "80") endif() if (NOT CUDA_VERSION VERSION_LESS 11.1) list(APPEND DEFAULT_CUDA_ARCH "86") endif() if (NOT CUDA_VERSION VERSION_LESS 11.5) list(APPEND DEFAULT_CUDA_ARCH "87") endif() if (NOT CUDA_VERSION VERSION_LESS 11.8) list(APPEND DEFAULT_CUDA_ARCH "89") list(APPEND DEFAULT_CUDA_ARCH "90") endif() list(SORT DEFAULT_CUDA_ARCH) set(CUDA_ARCH "${DEFAULT_CUDA_ARCH}" CACHE STRING "Set GPU architecture (semicolon separated list, e.g. '-DCUDA_ARCH=20;35;60')") # validate architectures (only numbers are allowed) foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) string(REGEX MATCH "^[0-9]+$" IS_NUMBER ${CUDA_ARCH}) if(NOT IS_NUMBER) message(FATAL_ERROR "Defined compute architecture '${CUDA_ARCH_ELEM}' in " "'${CUDA_ARCH}' is not an integral number, use e.g. '30' (for compute architecture 3.0).") endif() unset(IS_NUMBER) if(${CUDA_ARCH_ELEM} LESS 20) message("${MSG_CUDA_MAP}") message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified.") endif() if (NOT CUDA_VERSION VERSION_LESS 11.0) if(${CUDA_ARCH_ELEM} LESS 35) message("${MSG_CUDA_MAP}") message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " "Use CUDA v10.x maximum, Kepler (30) was dropped at v11.") endif() else() if(NOT ${CUDA_ARCH_ELEM} LESS 80) message("${MSG_CUDA_MAP}") message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " "Use CUDA v11.x minimum, Ampere (80) was added at v11.") endif() endif() if (CUDA_VERSION VERSION_LESS 10.0) if(NOT ${CUDA_ARCH_ELEM} LESS 75) message("${MSG_CUDA_MAP}") message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " "Use CUDA v10.x minimum, Turing (75) was added at v10.") endif() endif() if (NOT CUDA_VERSION VERSION_LESS 9.0) if(${CUDA_ARCH_ELEM} LESS 30) message("${MSG_CUDA_MAP}") message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " "Use CUDA v8.x maximum, Fermi (20/21) was dropped at v9.") endif() else() if(NOT ${CUDA_ARCH_ELEM} LESS 70) message("${MSG_CUDA_MAP}") message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " "Use CUDA v9.x minimum, Volta (70/72) was added at v9.") endif() endif() endforeach() unset(MSG_CUDA_MAP) list(SORT CUDA_ARCH) add_definitions(-DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT) option(XMRIG_LARGEGRID "Support large CUDA block count > 128" ON) if (XMRIG_LARGEGRID) add_definitions("-DXMRIG_LARGEGRID=${XMRIG_LARGEGRID}") endif() option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF) option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF) if (WITH_DRIVER_API) set(CUDA_LIB_HINTS "${LIBCUDA_LIBRARY_DIR}") set(CUDA_NVRTC_LIB_HINTS "${LIBNVRTC_LIBRARY_DIR}") if (XMRIG_OS_APPLE) list(APPEND CUDA_LIB_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib") list(APPEND CUDA_NVRTC_LIB_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib") else() set(LIB_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "/usr/lib64" "/usr/local/cuda/lib64" ) list(APPEND CUDA_LIB_HINTS ${LIB_HINTS}) list(APPEND CUDA_NVRTC_LIB_HINTS ${LIB_HINTS}) unset(LIB_HINTS) endif() find_library(CUDA_LIB libcuda cuda HINTS ${CUDA_LIB_HINTS}) find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS ${CUDA_NVRTC_LIB_HINTS}) unset(CUDA_LIB_HINTS) unset(CUDA_NVRTC_LIB_HINTS) list(APPEND LIBS ${CUDA_LIB} ${CUDA_NVRTC_LIB}) endif() if("${CUDA_COMPILER}" STREQUAL "clang") set(LIBS ${LIBS} cudart_static) set(CLANG_BUILD_FLAGS "-O3 -x cuda --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") # activation usage of FMA set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -ffp-contract=fast") if (CUDA_SHOW_REGISTER) set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -Xcuda-ptxas -v") endif(CUDA_SHOW_REGISTER) if (CUDA_KEEP_FILES) set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -save-temps=${PROJECT_BINARY_DIR}") endif(CUDA_KEEP_FILES) foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) # set flags to create device code for the given architectures set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} --cuda-gpu-arch=sm_${CUDA_ARCH_ELEM}") endforeach() elseif("${CUDA_COMPILER}" STREQUAL "nvcc") # avoid that nvcc in CUDA < 8 tries to use libc `memcpy` within the kernel if (CUDA_VERSION VERSION_LESS 8.0) add_definitions(-D_FORCE_INLINES) add_definitions(-D_MWAITXINTRIN_H_INCLUDED) endif() set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Wno-deprecated-gpu-targets") if (NOT CUDA_VERSION VERSION_LESS 11.3) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "--threads 0") endif() foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) # set flags to create device code for the given architecture if("${CUDA_ARCH_ELEM}" STREQUAL "21") # "2.1" actually does run faster when compiled as itself, versus in "2.0" compatible mode # strange virtual code type on top of compute_20, with no compute_21 (so the normal rule fails) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "--generate-code arch=compute_20,code=sm_21") else() set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") endif() endforeach() # give each thread an independent default stream set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --default-stream per-thread") #set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} static") option(CUDA_SHOW_CODELINES "Show kernel lines in cuda-gdb and cuda-memcheck" OFF) if (CUDA_SHOW_CODELINES) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --source-in-ptx -lineinfo) set(CUDA_KEEP_FILES ON CACHE BOOL "activate keep files" FORCE) endif() if (CUDA_SHOW_REGISTER) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" -Xptxas=-v) endif() if (CUDA_KEEP_FILES) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --keep --keep-dir "${PROJECT_BINARY_DIR}") endif() else() message(FATAL_ERROR "selected CUDA compiler '${CUDA_COMPILER}' is not supported") endif() if (WITH_RANDOMX) set(CUDA_RANDOMX_SOURCES src/RandomX/aes_cuda.hpp src/RandomX/arqma/configuration.h src/RandomX/arqma/randomx_arqma.cu src/RandomX/blake2b_cuda.hpp src/RandomX/common.hpp src/RandomX/graft/configuration.h src/RandomX/graft/randomx_graft.cu src/RandomX/hash.hpp src/RandomX/monero/configuration.h src/RandomX/monero/randomx_monero.cu src/RandomX/randomx_cuda.hpp src/RandomX/randomx.cu src/RandomX/wownero/configuration.h src/RandomX/wownero/randomx_wownero.cu src/RandomX/yada/configuration.h src/RandomX/yada/randomx_yada.cu ) else() set(CUDA_RANDOMX_SOURCES "") endif() if (WITH_KAWPOW AND WITH_DRIVER_API) set(CUDA_KAWPOW_SOURCES src/KawPow/raven/CudaKawPow_gen.cpp src/KawPow/raven/CudaKawPow_gen.h src/KawPow/raven/KawPow.cu ) else() set(CUDA_KAWPOW_SOURCES "") endif() set(CUDA_SOURCES src/cryptonight.h src/cuda_aes.hpp src/cuda_blake.hpp src/cuda_core.cu src/cuda_device.hpp src/cuda_extra.cu src/cuda_extra.h src/cuda_fast_int_math_v2.hpp src/cuda_groestl.hpp src/cuda_jh.hpp src/cuda_keccak.hpp src/cuda_skein.hpp ${CUDA_RANDOMX_SOURCES} ${CUDA_KAWPOW_SOURCES} ) if("${CUDA_COMPILER}" STREQUAL "clang") add_library(xmrig-cu STATIC ${CUDA_SOURCES}) set_target_properties(xmrig-cu PROPERTIES COMPILE_FLAGS ${CLANG_BUILD_FLAGS}) set_target_properties(xmrig-cu PROPERTIES LINKER_LANGUAGE CXX) set_source_files_properties(${CUDA_SOURCES} PROPERTIES LANGUAGE CXX) else() cuda_add_library(xmrig-cu STATIC ${CUDA_SOURCES}) endif()