GalSim-developers · rmjarvis · Aug 11, 2023 · Jul 7, 2023 · Jul 21, 2023 · Jul 31, 2023
diff --git a/include/galsim/Std.h b/include/galsim/Std.h
@@ -134,9 +134,10 @@ class Debugger // Use a Singleton model so it can be included multiple times.
 #define verbose_level Debugger::instance().get_level()
 #define xassert(x) assert(x)
 #else
-#define dbg if(false) (std::cerr)
-#define xdbg if(false) (std::cerr)
-#define xxdbg if(false) (std::cerr)
+extern std::ostream* dbgout;
+#define dbg if(false) (*dbgout)
+#define xdbg if(false) (*dbgout)
+#define xxdbg if(false) (*dbgout)
 #define set_dbgout(dbgout)
 #define set_verbose(level)
 #define xassert(x)

diff --git a/setup.py b/setup.py
@@ -72,7 +72,7 @@ def all_files_from(dir, ext=''):
 
 copt =  {
     'gcc' : ['-O2','-std=c++11','-fvisibility=hidden','-fopenmp'],
-    'gcc w/ GPU' : ['-O2','-std=c++11','-fvisibility=hidden','-fopenmp','-foffload=nvptx-none'],
+    'gcc w/ GPU' : ['-O2','-std=c++11','-fvisibility=hidden','-fopenmp','-foffload=nvptx-none','-DGALSIM_USE_GPU'],
     'icc' : ['-O2','-vec-report0','-std=c++11','-openmp'],
     'clang' : ['-O2','-std=c++11',
                '-Wno-shorten-64-to-32','-fvisibility=hidden','-stdlib=libc++'],
@@ -85,18 +85,20 @@ def all_files_from(dir, ext=''):
     'clang w/ GPU' : ['-O2','-msse2','-std=c++11','-fopenmp','-fopenmp-targets=nvptx64-nvidia-cuda',
                       '-Wno-openmp-mapping','-Wno-unknown-cuda-version',
                       '-Wno-shorten-64-to-32','-fvisibility=hidden', '-DGALSIM_USE_GPU'],
+    'nvc++' : ['-O2','-std=c++11','-mp=gpu','-DGALSIM_USE_GPU'],
     'unknown' : [],
 }
 lopt =  {
     'gcc' : ['-fopenmp'],
-    'gcc w/ GPU' : ['-fopenmp','-foffload=nvptx-none'],
+    'gcc w/ GPU' : ['-fopenmp','-foffload=nvptx-none', '-foffload=-lm'],
     'icc' : ['-openmp'],
     'clang' : ['-stdlib=libc++'],
     'clang w/ OpenMP' : ['-stdlib=libc++','-fopenmp'],
     'clang w/ Intel OpenMP' : ['-stdlib=libc++','-liomp5'],
     'clang w/ manual OpenMP' : ['-stdlib=libc++','-lomp'],
     'clang w/ GPU' : ['-fopenmp','-fopenmp-targets=nvptx64-nvidia-cuda',
                       '-Wno-openmp-mapping','-Wno-unknown-cuda-version'],
+    'nvc++' : ['-mp=gpu'],
     'unknown' : [],
 }
 
@@ -143,6 +145,9 @@ def get_compiler_type(compiler, check_unknown=True, output=False):
     line = lines[0].decode(encoding='UTF-8')
     if line.startswith('Configured'):
         line = lines[1].decode(encoding='UTF-8')
+    # nvc++ version info starts with a blank line
+    if line.strip() == "":
+        line = lines[1].decode(encoding='UTF-8')
 
     if 'clang' in line:
         # clang 3.7 is the first with openmp support.  But Apple lies about the version
@@ -180,6 +185,8 @@ def get_compiler_type(compiler, check_unknown=True, output=False):
                 print("Yay! This version of gcc supports GPU!")
             return 'gcc w/ GPU'
         return 'gcc'
+    elif 'nvc++' in line or 'nvcc' in line or 'NVIDIA' in line:
+        return 'nvc++'
     elif 'clang' in cc:
         return 'clang'
     elif 'gcc' in cc or 'g++' in cc:
@@ -762,6 +769,12 @@ def fix_compiler(compiler, njobs):
     except (AttributeError, ValueError):
         pass
 
+    # nvc++ doesn't support -Wno-unused-result
+    try:
+        compiler.compiler_so.remove("-Wno-unused-result")
+    except (AttributeError, ValueError):
+        pass
+
     # Figure out what compiler it will use
     comp_type = get_compiler_type(compiler, output=True)
     cc = compiler.compiler_so[0]