We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
related to ##5491 . More cuda gencodes(gencode: sm_xx), more slower NVCC compiling.
Following code can detect the installed GPU arch automatically.
function(detect_installed_gpus out_variable) if(NOT CUDA_gpu_detect_output) set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) file(WRITE ${cufile} "" "#include <cstdio>\n" "int main() {\n" " int count = 0;\n" " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" " if (count == 0) return -1;\n" " for (int device = 0; device < count; ++device) {\n" " cudaDeviceProp prop;\n" " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" " std::printf(\"%d.%d \", prop.major, prop.minor);\n" " }\n" " return 0;\n" "}\n") execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin=${CUDA_HOST_COMPILER}" "--run" "${cufile}" WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(nvcc_res EQUAL 0) # only keep the last line of nvcc_out STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") list(GET nvcc_out -1 nvcc_out) string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}") set(CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from caffe_detect_gpus tool" FORCE) endif() endif() if(NOT CUDA_gpu_detect_output) message(STATUS "Automatic GPU detection failed. Building for all known architectures.") set(${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE) else() set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) endif() endfunction()
Also ralated to #5413
The text was updated successfully, but these errors were encountered:
qingqing01
No branches or pull requests
related to ##5491 . More cuda gencodes(gencode: sm_xx), more slower NVCC compiling.
Following code can detect the installed GPU arch automatically.
Also ralated to #5413
The text was updated successfully, but these errors were encountered: