diff --git a/docs/spark-profiling-tool.md b/docs/spark-profiling-tool.md index 782e214d7f5..b5a4c5c21da 100644 --- a/docs/spark-profiling-tool.md +++ b/docs/spark-profiling-tool.md @@ -354,7 +354,7 @@ stage. Jobs and SQL are not color coordinated. #### B. Analysis - Job + Stage level aggregated task metrics - SQL level aggregated task metrics -- SQL duration, application during, if it contains a Dataset operation, potential problems, executor CPU time percent +- SQL duration, application during, if it contains Dataset or RDD operation, potential problems, executor CPU time percent - Shuffle Skew Check: (When task's Shuffle Read Size > 3 * Avg Stage-level size) Below we will aggregate the task level metrics at different levels @@ -382,15 +382,15 @@ SQL level aggregated task metrics: |1 |application_1111111111111_0001|0 |show at :11|1111 |222222 |6666666 |55555555 |55.55 |0 |13333333 |111111 |999 |3333.3 |6666666 |55555 |66666 |11111111 |0 |111111111111 |11111111111 |111111 |0 |0 |0 |888888888 |8 |11111 |11111 |99999 |11111111111 |2222222 |222222222222 |0 |222222222222 |444444444444 |5555555 |444444 | ``` -- SQL duration, application during, if it contains a Dataset operation, potential problems, executor CPU time percent: +- SQL duration, application during, if it contains Dataset or RDD operation, potential problems, executor CPU time percent: ``` SQL Duration and Executor CPU Time Percent -+--------+------------------------------+-----+------------+-------------------+------------+------------------+-------------------------+ -|appIndex|App ID |sqlID|SQL Duration|Contains Dataset Op|App Duration|Potential Problems|Executor CPU Time Percent| -+--------+------------------------------+-----+------------+-------------------+------------+------------------+-------------------------+ -|1 |application_1603128018386_7759|0 |11042 |false |119990 |null |68.48 | -+--------+------------------------------+-----+------------+-------------------+------------+------------------+-------------------------+ ++--------+------------------------------+-----+------------+--------------------------+------------+------------------+-------------------------+ +|appIndex|App ID |sqlID|SQL Duration|Contains Dataset or RDD Op|App Duration|Potential Problems|Executor CPU Time Percent| ++--------+------------------------------+-----+------------+--------------------------+------------+------------------+-------------------------+ +|1 |application_1603128018386_7759|0 |11042 |false |119990 |null |68.48 | ++--------+------------------------------+-----+------------+--------------------------+------------+------------------+-------------------------+ ``` - Shuffle Skew Check: @@ -520,4 +520,4 @@ Usage: java -cp rapids-4-spark-tools_2.12-.jar:$SPARK_HOME/jars/* All the metrics definitions can be found in the [executor task metrics doc](https://spark.apache.org/docs/latest/monitoring.html#executor-task-metrics) / [executor metrics doc](https://spark.apache.org/docs/latest/monitoring.html#executor-metrics) or -the [SPARK webUI doc](https://spark.apache.org/docs/latest/web-ui.html#content). \ No newline at end of file +the [SPARK webUI doc](https://spark.apache.org/docs/latest/web-ui.html#content). diff --git a/docs/spark-qualification-tool.md b/docs/spark-qualification-tool.md index a74116a1eb1..3d6fb60bffb 100644 --- a/docs/spark-qualification-tool.md +++ b/docs/spark-qualification-tool.md @@ -304,6 +304,9 @@ The score is based on the total time spent in tasks of SQL Dataframe operations. The tool also looks for read data formats and types that the plugin doesn't fully support and if it finds any, it will take away from the score. The parameter to control this negative impact of the score is `-r, --read-score-percent` with the default value as 20(percent). +Each application(event log) could have multiple SQL queries. If a SQL's plan has a Dataset API or RDD call +inside of it, that SQL query is not categorized as a Dataframe SQL query. We are unable to determine how much +of that query is made up of Dataset or RDD calls so the entire query task time is not included in the score. The idea behind this algorithm is that the longer the total task time doing SQL Dataframe operations the higher the score is and the more likely the plugin will be able to help accelerate that application. diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Analysis.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Analysis.scala index 1285f79bc52..9afd9260d9f 100644 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Analysis.scala +++ b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Analysis.scala @@ -320,7 +320,7 @@ class Analysis(apps: Seq[ApplicationInfo]) { app.sqlIdToInfo.map { case (sqlId, sqlCase) => // Potential problems not properly track, add it later SQLDurationExecutorTimeProfileResult(app.index, app.appId, sqlId, sqlCase.duration, - sqlCase.hasDataset, app.appInfo.duration, sqlCase.problematic, + sqlCase.hasDatasetOrRDD, app.appInfo.duration, sqlCase.problematic, sqlCase.sqlCpuTimePercent) } } diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ClassWarehouse.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ClassWarehouse.scala index b2771c83627..914f4d4787d 100644 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ClassWarehouse.scala +++ b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/ClassWarehouse.scala @@ -131,7 +131,7 @@ class SQLExecutionInfoClass( val startTime: Long, var endTime: Option[Long], var duration: Option[Long], - var hasDataset: Boolean, + var hasDatasetOrRDD: Boolean, var problematic: String = "", var sqlCpuTimePercent: Double = -1) @@ -526,7 +526,7 @@ case class SQLDurationExecutorTimeProfileResult(appIndex: Int, appId: String, sq duration: Option[Long], containsDataset: Boolean, appDuration: Option[Long], potentialProbs: String, executorCpuRatio: Double) extends ProfileResult { override val outputHeaders = Seq("appIndex", "App ID", "sqlID", "SQL Duration", - "Contains Dataset Op", "App Duration", "Potential Problems", "Executor CPU Time Percent") + "Contains Dataset or RDD Op", "App Duration", "Potential Problems", "Executor CPU Time Percent") val durStr = duration match { case Some(dur) => dur.toString case None => "" diff --git a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala index a978b05680a..295a458701b 100644 --- a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala +++ b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppBase.scala @@ -112,10 +112,11 @@ abstract class AppBase( logInfo(s"Total number of events parsed: $totalNumEvents for ${eventlog.toString}") } - protected def isDataSetPlan(desc: String): Boolean = { + protected def isDataSetOrRDDPlan(desc: String): Boolean = { desc match { case l if l.matches(".*\\$Lambda\\$.*") => true case a if a.endsWith(".apply") => true + case r if r.matches(".*SerializeFromObject.*") => true case _ => false } } diff --git a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/ApplicationInfo.scala b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/ApplicationInfo.scala index 8747b3306c3..103c6a35c5d 100644 --- a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/ApplicationInfo.scala +++ b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/ApplicationInfo.scala @@ -271,13 +271,13 @@ class ApplicationInfo( val allnodes = planGraph.allNodes for (node <- allnodes) { checkGraphNodeForBatchScan(sqlID, node) - if (isDataSetPlan(node.desc)) { + if (isDataSetOrRDDPlan(node.desc)) { sqlIdToInfo.get(sqlID).foreach { sql => - sql.hasDataset = true + sql.hasDatasetOrRDD = true } if (gpuMode) { val thisPlan = UnsupportedSQLPlan(sqlID, node.id, node.name, node.desc, - "Contains Dataset") + "Contains Dataset or RDD") unsupportedSQLplan += thisPlan } } diff --git a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/EventsProcessor.scala b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/EventsProcessor.scala index b5040c81485..e2e48575eac 100644 --- a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/EventsProcessor.scala +++ b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/profiling/EventsProcessor.scala @@ -332,7 +332,7 @@ class EventsProcessor() extends EventProcessorBase with Logging { event.time, None, None, - hasDataset = false, + hasDatasetOrRDD = false, "" ) app.sqlIdToInfo.put(event.executionId, sqlExecution) diff --git a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualAppInfo.scala b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualAppInfo.scala index 08efbbab9c7..5f2fbc608cc 100644 --- a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualAppInfo.scala +++ b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualAppInfo.scala @@ -58,8 +58,8 @@ class QualAppInfo( val sqlIDtoProblematic: HashMap[Long, Set[String]] = HashMap[Long, Set[String]]() - // SQL containing any Dataset operation - val sqlIDToDataSetCase: HashSet[Long] = HashSet[Long]() + // SQL containing any Dataset operation or RDD to DataSet/DataFrame operation + val sqlIDToDataSetOrRDDCase: HashSet[Long] = HashSet[Long]() val notSupportFormatAndTypes: HashMap[String, Set[String]] = HashMap[String, Set[String]]() @@ -115,19 +115,19 @@ class QualAppInfo( // for the SQL dataframe duration private def calculateSqlDataframeDuration: Long = { sqlDurationTime.filterNot { case (sqlID, dur) => - sqlIDToDataSetCase.contains(sqlID) || dur == -1 + sqlIDToDataSetOrRDDCase.contains(sqlID) || dur == -1 }.values.sum } private def probNotDataset: HashMap[Long, Set[String]] = { - sqlIDtoProblematic.filterNot { case (sqlID, _) => sqlIDToDataSetCase.contains(sqlID) } + sqlIDtoProblematic.filterNot { case (sqlID, _) => sqlIDToDataSetOrRDDCase.contains(sqlID) } } // The total task time for all tasks that ran during SQL dataframe // operations. if the SQL contains a dataset, it isn't counted. private def calculateTaskDataframeDuration: Long = { val validSums = sqlIDToTaskEndSum.filterNot { case (sqlID, _) => - sqlIDToDataSetCase.contains(sqlID) || sqlDurationTime.getOrElse(sqlID, -1) == -1 + sqlIDToDataSetOrRDDCase.contains(sqlID) || sqlDurationTime.getOrElse(sqlID, -1) == -1 } validSums.values.map(dur => dur.totalTaskDuration).sum } @@ -164,7 +164,7 @@ class QualAppInfo( private def calculateCpuTimePercent: Double = { val validSums = sqlIDToTaskEndSum.filterNot { case (sqlID, _) => - sqlIDToDataSetCase.contains(sqlID) || sqlDurationTime.getOrElse(sqlID, -1) == -1 + sqlIDToDataSetOrRDDCase.contains(sqlID) || sqlDurationTime.getOrElse(sqlID, -1) == -1 } val totalCpuTime = validSums.values.map { dur => dur.executorCPUTime @@ -252,8 +252,8 @@ class QualAppInfo( val allnodes = planGraph.allNodes for (node <- allnodes) { checkGraphNodeForBatchScan(sqlID, node) - if (isDataSetPlan(node.desc)) { - sqlIDToDataSetCase += sqlID + if (isDataSetOrRDDPlan(node.desc)) { + sqlIDToDataSetOrRDDCase += sqlID } val issues = findPotentialIssues(node.desc) if (issues.nonEmpty) { diff --git a/tools/src/test/resources/ProfilingExpectations/rapids_duration_and_cpu_expectation.csv b/tools/src/test/resources/ProfilingExpectations/rapids_duration_and_cpu_expectation.csv index af3bc5de080..640069b819a 100644 --- a/tools/src/test/resources/ProfilingExpectations/rapids_duration_and_cpu_expectation.csv +++ b/tools/src/test/resources/ProfilingExpectations/rapids_duration_and_cpu_expectation.csv @@ -1,2 +1,2 @@ -appIndex,App ID,sqlID,SQL Duration,Contains Dataset Op,App Duration,Potential Problems,Executor CPU Time Percent -1,application_1603128018386_7759,0,11042,false,119990,"",68.48 +appIndex,App ID,sqlID,SQL Duration,Contains Dataset or RDD Op,App Duration,Potential Problems,Executor CPU Time Percent +1,application_1603128018386_7759,0,11042,true,119990,"",68.48 diff --git a/tools/src/test/resources/ProfilingExpectations/unsupported_sql_eventlog_expectation.csv b/tools/src/test/resources/ProfilingExpectations/unsupported_sql_eventlog_expectation.csv index f0e015054d5..5ab0922c205 100644 --- a/tools/src/test/resources/ProfilingExpectations/unsupported_sql_eventlog_expectation.csv +++ b/tools/src/test/resources/ProfilingExpectations/unsupported_sql_eventlog_expectation.csv @@ -1,3 +1,5 @@ appIndex,sqlID,nodeID,nodeName,nodeDescription,reason -1,0,3,MapElements,MapElements com.nvidia.spark.rapids.tool.profiling.QualificationInfoSuite$$$Lambda$1571/993650587@7b,"Contains Dataset" -1,0,4,Filter,Filter com.nvidia.spark.rapids.tool.profiling.QualificationInfoSuite$$$Lambda$1569/1828787392@2eb6d3,"Contains Dataset" +1,0,2,SerializeFromObject,"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromSt","Contains Dataset or RDD" +1,0,3,MapElements,MapElements com.nvidia.spark.rapids.tool.profiling.QualificationInfoSuite$$$Lambda$1571/993650587@7b,"Contains Dataset or RDD" +1,0,4,Filter,Filter com.nvidia.spark.rapids.tool.profiling.QualificationInfoSuite$$$Lambda$1569/1828787392@2eb6d3,"Contains Dataset or RDD" +1,0,10,SerializeFromObject,"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromSt","Contains Dataset or RDD" \ No newline at end of file diff --git a/tools/src/test/resources/QualificationExpectations/db_sim_test_expectation.csv b/tools/src/test/resources/QualificationExpectations/db_sim_test_expectation.csv index 13e668b1d39..4fe573b3581 100644 --- a/tools/src/test/resources/QualificationExpectations/db_sim_test_expectation.csv +++ b/tools/src/test/resources/QualificationExpectations/db_sim_test_expectation.csv @@ -1,2 +1,2 @@ App Name,App ID,Score,Potential Problems,SQL Dataframe Duration,SQL Dataframe Task Duration,App Duration,Executor CPU Time Percent,App Duration Estimated,SQL Duration with Potential Problems,SQL Ids with Failures,Read Score Percent,ReadFileFormat Score,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Unsupported Nested Complex Types -Spark shell,local-1623876083964,1417661.00,"",119903,1417661,133857,91.14,false,0,"",20,100.00,"","","","" +Spark shell,local-1623876083964,0.0,"",0,0,133857,0.0,false,0,"",20,100.00,"","","","" diff --git a/tools/src/test/resources/QualificationExpectations/directory_test_expectation.csv b/tools/src/test/resources/QualificationExpectations/directory_test_expectation.csv index 13e668b1d39..28159dcac7b 100644 --- a/tools/src/test/resources/QualificationExpectations/directory_test_expectation.csv +++ b/tools/src/test/resources/QualificationExpectations/directory_test_expectation.csv @@ -1,2 +1,2 @@ App Name,App ID,Score,Potential Problems,SQL Dataframe Duration,SQL Dataframe Task Duration,App Duration,Executor CPU Time Percent,App Duration Estimated,SQL Duration with Potential Problems,SQL Ids with Failures,Read Score Percent,ReadFileFormat Score,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Unsupported Nested Complex Types -Spark shell,local-1623876083964,1417661.00,"",119903,1417661,133857,91.14,false,0,"",20,100.00,"","","","" +Spark shell,local-1623876083964,0.0,"",0,0,133857,0,false,0,"",20,100.00,"","","","" diff --git a/tools/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv b/tools/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv index a80c93cd5df..00c1a31ec83 100644 --- a/tools/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv +++ b/tools/src/test/resources/QualificationExpectations/qual_test_simple_expectation.csv @@ -1,5 +1,5 @@ App Name,App ID,Score,Potential Problems,SQL Dataframe Duration,SQL Dataframe Task Duration,App Duration,Executor CPU Time Percent,App Duration Estimated,SQL Duration with Potential Problems,SQL Ids with Failures,Read Score Percent,ReadFileFormat Score,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Unsupported Nested Complex Types -Rapids Spark Profiling Tool Unit Tests,local-1622043423018,125035.00,"",11128,125035,16319,37.81,false,0,"",20,100.00,"",JSON,"","" Rapids Spark Profiling Tool Unit Tests,local-1623281204390,3732.80,UDF,2032,4666,6240,46.27,false,577,"",20,0.00,"JSON[*]",JSON,"","" +Rapids Spark Profiling Tool Unit Tests,local-1622043423018,0.0,"",0,0,16319,0.0,false,0,"",20,100.00,"",JSON,"","" Rapids Spark Profiling Tool Unit Tests,local-1621966649543,0.00,"",0,0,10650,0.0,false,0,"",20,100.00,"",JSON,"","" Rapids Spark Profiling Tool Unit Tests,local-1621955976602,0.00,"",0,0,10419,0.0,false,0,"",20,100.00,"",JSON,"","" diff --git a/tools/src/test/resources/QualificationExpectations/spark2_expectation.csv b/tools/src/test/resources/QualificationExpectations/spark2_expectation.csv index b308e944890..dd12452a147 100644 --- a/tools/src/test/resources/QualificationExpectations/spark2_expectation.csv +++ b/tools/src/test/resources/QualificationExpectations/spark2_expectation.csv @@ -1,2 +1,2 @@ App Name,App ID,Score,Potential Problems,SQL Dataframe Duration,SQL Dataframe Task Duration,App Duration,Executor CPU Time Percent,App Duration Estimated,SQL Duration with Potential Problems,SQL Ids with Failures,Read Score Percent,ReadFileFormat Score,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Unsupported Nested Complex Types -Spark shell,local-1624892957956,37581.00,"",3751,37581,17801,58.47,false,0,"",20,100.00,"","","","" +Spark shell,local-1634253215009,335.07,"",1520,359,47063,67.64,false,0,"",20,66.67,Text[*],"","","" diff --git a/tools/src/test/resources/spark-events-profiling/spark2-eventlog.zstd b/tools/src/test/resources/spark-events-profiling/spark2-eventlog.zstd old mode 100755 new mode 100644 index 4cb3d9389ae..0d8ceb64655 Binary files a/tools/src/test/resources/spark-events-profiling/spark2-eventlog.zstd and b/tools/src/test/resources/spark-events-profiling/spark2-eventlog.zstd differ diff --git a/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala b/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala index 23b5cb06444..c46f83799ce 100644 --- a/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala +++ b/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/ApplicationInfoSuite.scala @@ -154,11 +154,11 @@ class ApplicationInfoSuite extends FunSuite with Logging { assert(apps.size == 1) assert(apps.head.sparkVersion.equals("2.2.3")) assert(apps.head.gpuMode.equals(false)) - assert(apps.head.jobIdToInfo.keys.toSeq.size == 1) + assert(apps.head.jobIdToInfo.keys.toSeq.size == 6) assert(apps.head.jobIdToInfo.keys.toSeq.contains(0)) val stage0 = apps.head.stageIdToInfo.get((0, 0)) assert(stage0.isDefined) - assert(stage0.get.info.numTasks.equals(6)) + assert(stage0.get.info.numTasks.equals(1)) } test("test no sql eventlog") { @@ -516,7 +516,7 @@ class ApplicationInfoSuite extends FunSuite with Logging { val execInfo = collect.getExecutorInfo assert(execInfo.size == 1) assert(execInfo.head.numExecutors === 1) - assert(execInfo.head.maxMem === 16991335219L) + assert(execInfo.head.maxMem === 384093388L) } test("test executor info cluster mode") { @@ -612,7 +612,7 @@ class ApplicationInfoSuite extends FunSuite with Logging { val dotDirs = ToolTestUtils.listFilesMatching(tempSubDir, { f => f.endsWith(".csv") }) - assert(dotDirs.length === 11) + assert(dotDirs.length === 12) for (file <- dotDirs) { assert(file.getAbsolutePath.endsWith(".csv")) // just load each one to make sure formatted properly @@ -642,7 +642,7 @@ class ApplicationInfoSuite extends FunSuite with Logging { val dotDirs = ToolTestUtils.listFilesMatching(tempSubDir, { f => f.endsWith(".csv") }) - assert(dotDirs.length === 9) + assert(dotDirs.length === 10) for (file <- dotDirs) { assert(file.getAbsolutePath.endsWith(".csv")) // just load each one to make sure formatted properly diff --git a/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/CompareSuite.scala b/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/CompareSuite.scala index 4581be8ed7b..deaed41d16d 100644 --- a/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/CompareSuite.scala +++ b/tools/src/test/scala/com/nvidia/spark/rapids/tool/profiling/CompareSuite.scala @@ -45,10 +45,10 @@ class CompareSuite extends FunSuite { val compare = new CompareApplications(apps) val (matchingSqlIdsRet, matchingStageIdsRet) = compare.findMatchingStages() // none match - assert(matchingSqlIdsRet.size === 29) + assert(matchingSqlIdsRet.size === 32) assert(matchingSqlIdsRet.head.outputHeaders.size == 2) assert(matchingSqlIdsRet.head.rows.size == 2) - assert(matchingStageIdsRet.size === 73) + assert(matchingStageIdsRet.size === 75) assert(matchingStageIdsRet.head.outputHeaders.size == 2) assert(matchingStageIdsRet.head.rows.size == 2) } diff --git a/tools/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala b/tools/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala index e8f7c6cf89e..88b44051a3f 100644 --- a/tools/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala +++ b/tools/src/test/scala/com/nvidia/spark/rapids/tool/qualification/QualificationSuite.scala @@ -117,7 +117,7 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging { val (exit, appSum) = QualificationMain.mainInternal(appArgs) assert(exit == 0) assert(appSum.size == 4) - assert(appSum.head.appId.equals("local-1622043423018")) + assert(appSum.head.appId.equals("local-1623281204390")) val filename = s"$outpath/rapids_4_spark_qualification_output/" + s"rapids_4_spark_qualification_output.log" @@ -153,7 +153,7 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging { val (exit, appSum) = QualificationMain.mainInternal(appArgs) assert(exit == 0) assert(appSum.size == 4) - assert(appSum.head.appId.equals("local-1622043423018")) + assert(appSum.head.appId.equals("local-1623281204390")) val filename = s"$outpath/rapids_4_spark_qualification_output/" + s"rapids_4_spark_qualification_output.log" @@ -164,7 +164,7 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging { assert(lines.size == (4 + 4)) // skip the 3 header lines val firstRow = lines(3) - assert(firstRow.contains("local-1622043423018")) + assert(firstRow.contains("local-1623281204390")) } finally { inputSource.close() }