NVIDIA · tgravescs · Jun 23, 2021 · Jun 21, 2021 · Jun 21, 2021 · Jun 22, 2021
diff --git a/tools/README.md b/tools/README.md
@@ -246,29 +246,30 @@ We can input multiple Spark event logs and this tool can compare environments, e
 ### A. Compare Information Collected ###
 Compare Application Information:
 
-+--------+-----------------------+-------------+-------------+--------+-----------+------------+-------+
-|appIndex|appId                  |startTime    |endTime      |duration|durationStr|sparkVersion|gpuMode|
-+--------+-----------------------+-------------+-------------+--------+-----------+------------+-------+
-|1       |app-20210329165943-0103|1617037182848|1617037490515|307667  |5.1 min    |3.0.1       |false  |
-|2       |app-20210329170243-0018|1617037362324|1617038578035|1215711 |20 min     |3.0.1       |true   |
-+--------+-----------------------+-------------+-------------+--------+-----------+------------+-------+
++--------+-----------+-----------------------+-------------+-------------+--------+-----------+------------+-------------+
+|appIndex|appName    |appId                  |startTime    |endTime      |duration|durationStr|sparkVersion|pluginEnabled|
++--------+-----------+-----------------------+-------------+-------------+--------+-----------+------------+-------------+
+|1       |Spark shell|app-20210329165943-0103|1617037182848|1617037490515|307667  |5.1 min    |3.0.1       |false        |
+|2       |Spark shell|app-20210329170243-0018|1617037362324|1617038578035|1215711 |20 min     |3.0.1       |true         |
++--------+-----------+-----------------------+-------------+-------------+--------+-----------+------------+-------------+
 ```
 
 - Compare Executor information:
 ```
 Compare Executor Information:
-+--------+----------+----------+-----------+------------+-------------+--------+--------+--------+------------+--------+--------+
-|appIndex|executorID|totalCores|maxMem     |maxOnHeapMem|maxOffHeapMem|exec_cpu|exec_mem|exec_gpu|exec_offheap|task_cpu|task_gpu|
-+--------+----------+----------+-----------+------------+-------------+--------+--------+--------+------------+--------+--------+
-|1       |0         |4         |13984648396|13984648396 |0            |null    |null    |null    |null        |null    |null    |
-|1       |1         |4         |13984648396|13984648396 |0            |null    |null    |null    |null        |null    |null    |
++--------+-----------------+------------+-------------+-----------+------------+-------------+--------------+------------------+---------------+-------+-------+
+|appIndex|resourceProfileId|numExecutors|executorCores|maxMem     |maxOnHeapMem|maxOffHeapMem|executorMemory|numGpusPerExecutor|executorOffHeap|taskCpu|taskGpu|
++--------+-----------------+------------+-------------+-----------+------------+-------------+--------------+------------------+---------------+-------+-------+
+|1       |0                |1           |4            |11264537395|11264537395 |0            |20480         |1                 |0              |1      |0.0    |
+|1       |1                |2           |2            |3247335014 |3247335014  |0            |6144          |2                 |0              |2      |2.0    |
++--------+-----------------+------------+-------------+-----------+------------+-------------+-------------+--------------+------------------+---------------+-------+-------+
 ```
 
 - Compare Rapids related Spark properties side-by-side:
 ```
 Compare Rapids Properties which are set explicitly:
 +-------------------------------------------+----------+----------+
-|key                                        |value_app1|value_app2|
+|propertyName                               |appIndex_1|appIndex_2|
 +-------------------------------------------+----------+----------+
 |spark.rapids.memory.pinnedPool.size        |null      |2g        |
 |spark.rapids.sql.castFloatToDecimal.enabled|null      |true      |
@@ -402,7 +403,7 @@ SQL Duration and Executor CPU Time Percent
 ```
 Shuffle Skew Check: (When task's Shuffle Read Size > 3 * Avg Stage-level size)
 +--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
-|appIndex|stageId|stageAttemptId|taskId|attempt|taskDurationSec|avgDurationSec|taskShuffleReadMB|avgShuffleReadMB|taskPeakMemoryMB|successful|endReason_first100char                                                                              |
+|appIndex|stageId|stageAttemptId|taskId|attempt|taskDurationSec|avgDurationSec|taskShuffleReadMB|avgShuffleReadMB|taskPeakMemoryMB|successful|reason                                                                                              |
 +--------+-------+--------------+------+-------+---------------+--------------+-----------------+----------------+----------------+----------+----------------------------------------------------------------------------------------------------+
 |1       |2      |0             |2222  |0      |111.11         |7.7           |2222.22          |111.11          |0.01            |false     |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /dddd/xxxxxxx/ccccc/bbbbbbbbb/aaaaaaa|
 |1       |2      |0             |2224  |1      |222.22         |8.8           |3333.33          |111.11          |0.01            |false     |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /dddd/xxxxxxx/ccccc/bbbbbbbbb/aaaaaaa|
@@ -416,46 +417,47 @@ Below are examples.
 - Print failed tasks:
 ```
 Failed tasks:
-+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
-|stageId|stageAttemptId|taskId|attempt|endReason_first100char                                                                              |
-+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
-|4      |0             |2842  |0      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
-|4      |0             |2858  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(22000),None,false,true,None)|
-|4      |0             |2884  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(21148),None,false,true,None)|
-|4      |0             |2908  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(20420),None,false,true,None)|
-|4      |0             |3410  |1      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
-+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
++--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
+|appIndex|stageId|stageAttemptId|taskId|attempt|failureReason                                                                              |
++--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
+|3       |4      |0             |2842  |0      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
+|3       |4      |0             |2858  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(22000),None,false,true,None)|
+|3       |4      |0             |2884  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(21148),None,false,true,None)|
+|3       |4      |0             |2908  |0      |TaskKilled(another attempt succeeded,List(AccumulableInfo(453,None,Some(20420),None,false,true,None)|
+|3       |4      |0             |3410  |1      |ExceptionFailure(ai.rapids.cudf.CudfException,cuDF failure at: /home/jenkins/agent/workspace/jenkins|
+|4       |1      |0             |1948  |1      |TaskKilled(another attempt succeeded,List(AccumulableInfo(290,None,Some(1107),None,false,true,None),|
++--------+-------+--------------+------+-------+----------------------------------------------------------------------------------------------------+
 ```
 
 - Print failed stages:
 ```
 Failed stages:
-+-------+---------+-------------------------------------+--------+---------------------------------------------------+
-|stageId|attemptId|name                                 |numTasks|failureReason_first100char                         |
-+-------+---------+-------------------------------------+--------+---------------------------------------------------+
-|4      |0        |attachTree at Spark300Shims.scala:624|1000    |Job 0 cancelled as part of cancellation of all jobs|
-+-------+---------+-------------------------------------+--------+---------------------------------------------------+
++--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
+|appIndex|stageId|attemptId|name                                 |numTasks|failureReason                                      |
++--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
+|3       |4      |0        |attachTree at Spark300Shims.scala:624|1000    |Job 0 cancelled as part of cancellation of all jobs|
++--------+-------+---------+-------------------------------------+--------+---------------------------------------------------+
 ```
 
 - Print failed jobs:
 ```
 Failed jobs:
-+-----+---------+------------------------------------------------------------------------+
-|jobID|jobResult|failedReason_first100char                                               |
-+-----+---------+------------------------------------------------------------------------+
-|0    |JobFailed|java.lang.Exception: Job 0 cancelled as part of cancellation of all jobs|
-+-----+---------+------------------------------------------------------------------------+
++--------+-----+---------+------------------------------------------------------------------------+
+|appIndex|jobID|jobResult|failureReason                                                           |
++--------+-----+---------+------------------------------------------------------------------------+
+|3       |0    |JobFailed|java.lang.Exception: Job 0 cancelled as part of cancellation of all j...|
++--------+-----+---------+------------------------------------------------------------------------+
 ```
 
 - SQL Plan HealthCheck:
 
   Prints possibly unsupported query plan nodes such as `$Lambda` key word means dataset API.
 ```
-+-----+------+--------+---------------------------------------------------------------------------------------------------+
-|sqlID|nodeID|nodeName|nodeDesc_first100char                                                                              |
-+-----+------+--------+---------------------------------------------------------------------------------------------------+
-|1    |8     |Filter  |Filter $line21.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$Lambda$4578/0x00000008019f1840@4b63e04c.apply|
-+-----+------+--------+---------------------------------------------------------------------------------------------------+
++--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
+|appIndex|sqlID|nodeID|nodeName|nodeDescription                                                                                    |
++--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
+|3       |1    |8     |Filter  |Filter $line21.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$Lambda$4578/0x00000008019f1840@4b63e04c.apply|
++--------+-----+------+--------+---------------------------------------------------------------------------------------------------+
 ```
 
 ### How to use this tool
@@ -529,6 +531,11 @@ default filesystem, it supports local filesystem or HDFS. There are separate fil
 under the same sub-directory when using the options to generate query visualizations or printing the SQL plans.
 
 The output location can be changed using the `--output-directory` option. Default is current directory.
+
+There is a 100 characters limit for each output column. If the result of the column exceeds this limit, it is suffixed with ... for that column.
+
+ResourceProfile ids are parsed for the event logs that are from Spark 3.1 or later. ResourceProfileId column is added in the output table for such event logs. 
+A ResourceProfile allows the user to specify executor and task requirements for an RDD that will get applied during a stage. This allows the user to change the resource requirements between stages.
 
 Note: We suggest you also save the output of the `spark-submit` or `spark-shell` to a log file for troubleshooting.
 

diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Analysis.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Analysis.scala
@@ -170,7 +170,8 @@ class Analysis(apps: Seq[ApplicationInfo], fileWriter: Option[ToolTextFileWriter
       }.map( app => "(" + app.profilingDurationSQL + ")")
       .mkString(" union ")
     if (query.nonEmpty) {
-      apps.head.runQuery(query, false, fileWriter, messageHeader)
+      apps.head.runQuery(query + "order by appIndex, sqlID, `SQL Duration`",
+        false, fileWriter, messageHeader)
     } else {
       apps.head.sparkSession.emptyDataFrame
     }
@@ -209,7 +210,7 @@ class Analysis(apps: Seq[ApplicationInfo], fileWriter: Option[ToolTextFileWriter
            |round(tmp.avgShuffleReadBytes/1024/1024,2) as avgShuffleReadMB,
            |round(t.peakExecutionMemory/1024/1024,2) as taskPeakMemoryMB,
            |t.successful,
-           |substr(t.endReason,0,100) endReason_first100char
+           |substr(t.endReason,0,100) reason
            |from tmp, taskDF_${app.index} t
            |where tmp.stageId=t.StageId
            |and tmp.stageAttemptId=t.stageAttemptId

diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ClassWarehouse.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ClassWarehouse.scala
@@ -22,8 +22,8 @@ package com.nvidia.spark.rapids.tool.profiling
  */
 
 case class ResourceProfileCase(
-  id: Int, exec_cpu: Int, exec_mem: Long, exec_gpu: Int,
-  exec_offheap: Long, task_cpu: Int, task_gpu: Double)
+  id: Int, executorCores: Int, executorMemory: Long, numGpusPerExecutor: Int,
+  executorOffHeap: Long, taskCpu: Int, taskGpu: Double)
 
 case class BlockManagerCase(
   executorID: String, host: String, port: Int,

diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/CollectInformation.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/CollectInformation.scala
@@ -72,7 +72,7 @@ class CollectInformation(apps: Seq[ApplicationInfo],
     val messageHeader = "\nExecutor Information:\n"
     for (app <- apps) {
       if (app.allDataFrames.contains(s"executorsDF_${app.index}")) {
-        app.runQuery(query = app.generateExecutorInfo + " order by cast(executorID as long)",
+        app.runQuery(query = app.generateExecutorInfo + " order by cast(numExecutors as long)",
           fileWriter = fileWriter, messageHeader = messageHeader)
       } else {
         fileWriter.foreach(_.write("No Executor Information Found!\n"))
@@ -98,7 +98,7 @@ class CollectInformation(apps: Seq[ApplicationInfo],
     val messageHeader = "\nSpark Rapids parameters set explicitly:\n"
     for (app <- apps) {
       if (app.allDataFrames.contains(s"propertiesDF_${app.index}")) {
-        app.runQuery(query = app.generateRapidsProperties + " order by key",
+        app.runQuery(query = app.generateRapidsProperties + " order by propertyName",
           fileWriter = fileWriter, messageHeader = messageHeader)
       } else {
         fileWriter.foreach(_.write("No Spark Rapids parameters Found!\n"))