Skip to content

Commit

Permalink
user-tools should add xms argument to java cmd
Browse files Browse the repository at this point in the history
Signed-off-by: Ahmed Hussein <ahussein@nvidia.com>

Fixes NVIDIA#1382

Upon investigation, it was revealed that the min heap size could impact
the runtime significantly.
This code change aims at setting the xms java argument to 50% of the max
heap size.

- pass xms to the java cmd
- update the runtime report to list jvm info along with jvm arguments
  related to heap:
  - `runtime.jvm.*`
  - `runtime.jvm.arg*`
  • Loading branch information
amahussein committed Oct 23, 2024
1 parent 88b37bf commit de00f69
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
package org.apache.spark.sql.rapids.tool.util

import java.io.{PrintWriter, StringWriter}
import java.lang.management.ManagementFactory

import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`

import com.nvidia.spark.rapids.tool.ToolTextFileWriter
import org.apache.hadoop.conf.Configuration
Expand Down Expand Up @@ -49,6 +52,12 @@ object RuntimeUtil extends Logging {
// Add the Spark version used in runtime.
// Note that it is different from the Spark version used in the build.
buildProps.setProperty("runtime.spark.version", ToolUtils.sparkRuntimeVersion)
// Add the JVM and OS information
getJVMOSInfo.foreach {
kv => buildProps.setProperty(s"runtime.${kv._1}", kv._2)
}
// get the JVM memory arguments
getJVMHeapArguments.foreach(kv => buildProps.setProperty(s"runtime.${kv._1}", kv._2))
val reportWriter = new ToolTextFileWriter(outputDir, REPORT_FILE_NAME, REPORT_LABEL, hadoopConf)
try {
reportWriter.writeProperties(buildProps, REPORT_LABEL)
Expand All @@ -73,6 +82,30 @@ object RuntimeUtil extends Logging {
"os.version" -> System.getProperty("os.version")
)
}
}

def getJVMHeapArguments: Map[String, String] = {
val gcMxBeans = ManagementFactory.getGarbageCollectorMXBeans
gcMxBeans.foreach(_.getName)

val jvmHeapGCArgs = ManagementFactory.getRuntimeMXBean.getInputArguments.filter(
p => p.startsWith("-Xmx") || p.startsWith("-Xms") || p.startsWith("-XX:")).map {
sizeArg =>
if (sizeArg.startsWith("-Xmx")) {
("jvm.arg.heap.max", sizeArg.drop(4))
} else if (sizeArg.startsWith("-Xms")) {
("jvm.arg.heap.min", sizeArg.drop(4))
} else { // this is heap argument
// drop the first "-XX:"
val dropSize = if (sizeArg.startsWith("-XX:+")) 5 else 4
val parts = sizeArg.drop(dropSize).split("=")
if (parts.length == 2) {
(s"jvm.arg.gc.${parts(0)}", parts(1))
} else {
(s"jvm.arg.gc.${parts(0)}", "")
}
}
}
// get remaining GC arguments
jvmHeapGCArgs.toMap
}
}
9 changes: 7 additions & 2 deletions user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,11 +878,16 @@ def _re_evaluate_platform_args(self, tool_name: str) -> dict:
job_args = self.ctxt.get_ctxt('jobArgs')
result = copy.deepcopy(job_args)
job_resources = self._get_job_submission_resources(tool_name)
jvm_min_heap = job_resources['jvmMinHeapSize']
jvm_max_heap = job_resources['jvmMaxHeapSize']
jvm_heap_key = f'Xmx{jvm_max_heap}g'
jvm_max_heap_key = f'Xmx{jvm_max_heap}g'
jvm_min_heap_key = f'Xms{jvm_min_heap}g'
# At this point, we need to set the heap argument for the JVM. Otherwise, the process uses
# its default values.
result['platformArgs']['jvmArgs'].update({jvm_heap_key: ''})
result['platformArgs']['jvmArgs'].update({
jvm_min_heap_key: '',
jvm_max_heap_key: ''
})
return result

@timeit('Building Job Arguments and Executing Job CMD') # pylint: disable=too-many-function-args
Expand Down
5 changes: 5 additions & 0 deletions user_tools/src/spark_rapids_tools/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,13 +291,18 @@ def adjust_tools_resources(cls,
else:
prof_threads = max(1, jvm_threads - num_threads_unit) if concurrent_mode else jvm_threads

# calculate the min heap size based on the max heap size
min_heap = max(1, heap_unit // 2)

return {
'qualification': {
'jvmMaxHeapSize': heap_unit,
'jvmMinHeapSize': min_heap,
'rapidsThreads': num_threads_unit
},
'profiling': {
'jvmMaxHeapSize': prof_heap,
'jvmMinHeapSize': min_heap,
'rapidsThreads': prof_threads
}
}
Expand Down

0 comments on commit de00f69

Please sign in to comment.