From 114164bd3f6bfb67e58f8ead93c5bba0d08b8a0a Mon Sep 17 00:00:00 2001 From: panbingkun Date: Fri, 31 May 2024 16:45:07 -0700 Subject: [PATCH] [SPARK-48490][CORE] Unescapes any literals for message of MessageWithContext ### What changes were proposed in this pull request? The pr aims to `unescapes` any literals for `message` of `MessageWithContext` ### Why are the changes needed? - For example, before this PR ``` logInfo("This is a log message\nThis is a new line \t other msg") ``` It will output: ``` 24/05/31 22:53:27 INFO PatternLoggingSuite: This is a log message This is a new line other msg ``` But: ``` logInfo(log"This is a log message\nThis is a new line \t other msg") ``` It will output: ``` 24/05/31 22:53:59 ERROR PatternLoggingSuite: This is a log message\nThis is a new line \t other msg ``` Obviously, the latter is not the result we `expected`. ### Does this PR introduce _any_ user-facing change? Yes, fix bug. ### How was this patch tested? - Add new UT. - Pass GA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46824 from panbingkun/SPARK-48490. Authored-by: panbingkun Signed-off-by: Gengliang Wang --- .../org/apache/spark/internal/Logging.scala | 3 +- .../spark/util/PatternSparkLoggerSuite.java | 6 ++ .../spark/util/SparkLoggerSuiteBase.java | 53 +++++++++++------ .../util/StructuredSparkLoggerSuite.java | 11 ++++ .../spark/util/PatternLoggingSuite.scala | 8 +++ .../spark/util/StructuredLoggingSuite.scala | 57 +++++++++++++++++++ 6 files changed, 121 insertions(+), 17 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala index 72c7cdfa62362..9500ed4b174e5 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala @@ -19,6 +19,7 @@ package org.apache.spark.internal import scala.jdk.CollectionConverters._ +import org.apache.commons.text.StringEscapeUtils import org.apache.logging.log4j.{CloseableThreadContext, Level, LogManager} import org.apache.logging.log4j.core.{Filter, LifeCycle, LogEvent, Logger => Log4jLogger, LoggerContext} import org.apache.logging.log4j.core.appender.ConsoleAppender @@ -99,7 +100,7 @@ case class MessageWithContext(message: String, context: java.util.HashMap[String * Companion class for lazy evaluation of the MessageWithContext instance. */ class LogEntry(messageWithContext: => MessageWithContext) { - def message: String = messageWithContext.message + def message: String = StringEscapeUtils.unescapeJava(messageWithContext.message) def context: java.util.HashMap[String, String] = messageWithContext.context } diff --git a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java index 1d2e6d76a7590..6bfe595def1d4 100644 --- a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java +++ b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java @@ -53,6 +53,12 @@ String expectedPatternForBasicMsg(Level level) { return toRegexPattern(level, ".* : This is a log message\n"); } + @Override + String expectedPatternForBasicMsgWithEscapeChar(Level level) { + return toRegexPattern(level, + ".* : This is a log message\\nThis is a new line \\t other msg\\n"); + } + @Override String expectedPatternForBasicMsgWithException(Level level) { return toRegexPattern(level, """ diff --git a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java index 079899581dc08..37b5372885b8d 100644 --- a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java +++ b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java @@ -68,16 +68,17 @@ private void checkLogOutput(Level level, Runnable func, ExpectedResult result) { } } - private String basicMsg() { - return "This is a log message"; - } + private final String basicMsg = "This is a log message"; + + private final String basicMsgWithEscapeChar = + "This is a log message\nThis is a new line \t other msg"; private final MDC executorIDMDC = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"); private final String msgWithMDC = "Lost executor {}."; private final MDC[] mdcs = new MDC[] { - MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"), - MDC.of(LogKeys.REASON$.MODULE$, "the shuffle data is too large")}; + MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"), + MDC.of(LogKeys.REASON$.MODULE$, "the shuffle data is too large")}; private final String msgWithMDCs = "Lost executor {}, reason: {}"; private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null); @@ -91,6 +92,9 @@ private String basicMsg() { // test for basic message (without any mdc) abstract String expectedPatternForBasicMsg(Level level); + // test for basic message (with escape char) + abstract String expectedPatternForBasicMsgWithEscapeChar(Level level); + // test for basic message and exception abstract String expectedPatternForBasicMsgWithException(Level level); @@ -113,12 +117,12 @@ private String basicMsg() { abstract String expectedPatternForJavaCustomLogKey(Level level); @Test - public void testBasicMsgLogger() { - Runnable errorFn = () -> logger().error(basicMsg()); - Runnable warnFn = () -> logger().warn(basicMsg()); - Runnable infoFn = () -> logger().info(basicMsg()); - Runnable debugFn = () -> logger().debug(basicMsg()); - Runnable traceFn = () -> logger().trace(basicMsg()); + public void testBasicMsg() { + Runnable errorFn = () -> logger().error(basicMsg); + Runnable warnFn = () -> logger().warn(basicMsg); + Runnable infoFn = () -> logger().info(basicMsg); + Runnable debugFn = () -> logger().debug(basicMsg); + Runnable traceFn = () -> logger().trace(basicMsg); List.of( Pair.of(Level.ERROR, errorFn), Pair.of(Level.WARN, warnFn), @@ -128,14 +132,31 @@ public void testBasicMsgLogger() { checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForBasicMsg)); } + @Test + public void testBasicMsgWithEscapeChar() { + Runnable errorFn = () -> logger().error(basicMsgWithEscapeChar); + Runnable warnFn = () -> logger().warn(basicMsgWithEscapeChar); + Runnable infoFn = () -> logger().info(basicMsgWithEscapeChar); + Runnable debugFn = () -> logger().debug(basicMsgWithEscapeChar); + Runnable traceFn = () -> logger().trace(basicMsgWithEscapeChar); + List.of( + Pair.of(Level.ERROR, errorFn), + Pair.of(Level.WARN, warnFn), + Pair.of(Level.INFO, infoFn), + Pair.of(Level.DEBUG, debugFn), + Pair.of(Level.TRACE, traceFn)).forEach(pair -> + checkLogOutput(pair.getLeft(), pair.getRight(), + this::expectedPatternForBasicMsgWithEscapeChar)); + } + @Test public void testBasicLoggerWithException() { Throwable exception = new RuntimeException("OOM"); - Runnable errorFn = () -> logger().error(basicMsg(), exception); - Runnable warnFn = () -> logger().warn(basicMsg(), exception); - Runnable infoFn = () -> logger().info(basicMsg(), exception); - Runnable debugFn = () -> logger().debug(basicMsg(), exception); - Runnable traceFn = () -> logger().trace(basicMsg(), exception); + Runnable errorFn = () -> logger().error(basicMsg, exception); + Runnable warnFn = () -> logger().warn(basicMsg, exception); + Runnable infoFn = () -> logger().info(basicMsg, exception); + Runnable debugFn = () -> logger().debug(basicMsg, exception); + Runnable traceFn = () -> logger().trace(basicMsg, exception); List.of( Pair.of(Level.ERROR, errorFn), Pair.of(Level.WARN, warnFn), diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java index ec19014e117ce..6959fe11820ff 100644 --- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java +++ b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java @@ -69,6 +69,17 @@ String expectedPatternForBasicMsg(Level level) { }"""); } + @Override + String expectedPatternForBasicMsgWithEscapeChar(Level level) { + return compactAndToRegexPattern(level, """ + { + "ts": "", + "level": "", + "msg": "This is a log message\\\\nThis is a new line \\\\t other msg", + "logger": "" + }"""); + } + @Override String expectedPatternForBasicMsgWithException(Level level) { return compactAndToRegexPattern(level, """ diff --git a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala index ab9803d83bf62..6af5f2d4f19f9 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala @@ -34,6 +34,14 @@ class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll { s""".*$level $className: This is a log message\n""" } + override def expectedPatternForBasicMsgWithEscapeChar(level: Level): String = { + s""".*$level $className: This is a log message\nThis is a new line \t other msg\n""" + } + + override def expectedPatternForBasicMsgWithEscapeCharMDC(level: Level): String = { + s""".*$level $className: This is a log message\nThis is a new line \t other msg\n""" + } + override def expectedPatternForBasicMsgWithException(level: Level): String = { s""".*$level $className: This is a log message\n[\\s\\S]*""" } diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala index 694f06706421a..598ae90402a8e 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala @@ -53,6 +53,11 @@ trait LoggingSuiteBase def basicMsg: String = "This is a log message" + def basicMsgWithEscapeChar: String = "This is a log message\nThis is a new line \t other msg" + + def basicMsgWithEscapeCharMDC: LogEntry = + log"This is a log message\nThis is a new line \t other msg" + def msgWithMDC: LogEntry = log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}." def msgWithMDCValueIsNull: LogEntry = log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, null)}." @@ -66,6 +71,12 @@ trait LoggingSuiteBase // test for basic message (without any mdc) def expectedPatternForBasicMsg(level: Level): String + // test for basic message (with escape char) + def expectedPatternForBasicMsgWithEscapeChar(level: Level): String + + // test for basic message (with escape char mdc) + def expectedPatternForBasicMsgWithEscapeCharMDC(level: Level): String + // test for basic message and exception def expectedPatternForBasicMsgWithException(level: Level): String @@ -95,6 +106,30 @@ trait LoggingSuiteBase } } + test("Basic logging with escape char") { + Seq( + (Level.ERROR, () => logError(basicMsgWithEscapeChar)), + (Level.WARN, () => logWarning(basicMsgWithEscapeChar)), + (Level.INFO, () => logInfo(basicMsgWithEscapeChar)), + (Level.DEBUG, () => logDebug(basicMsgWithEscapeChar)), + (Level.TRACE, () => logTrace(basicMsgWithEscapeChar))).foreach { case (level, logFunc) => + val logOutput = captureLogOutput(logFunc) + assert(expectedPatternForBasicMsgWithEscapeChar(level).r.matches(logOutput)) + } + } + + test("Basic logging with escape char MDC") { + Seq( + (Level.ERROR, () => logError(basicMsgWithEscapeCharMDC)), + (Level.WARN, () => logWarning(basicMsgWithEscapeCharMDC)), + (Level.INFO, () => logInfo(basicMsgWithEscapeCharMDC)), + (Level.DEBUG, () => logDebug(basicMsgWithEscapeCharMDC)), + (Level.TRACE, () => logTrace(basicMsgWithEscapeCharMDC))).foreach { case (level, logFunc) => + val logOutput = captureLogOutput(logFunc) + assert(expectedPatternForBasicMsgWithEscapeCharMDC(level).r.matches(logOutput)) + } + } + test("Basic logging with Exception") { val exception = new RuntimeException("OOM") Seq( @@ -197,6 +232,28 @@ class StructuredLoggingSuite extends LoggingSuiteBase { }""") } + override def expectedPatternForBasicMsgWithEscapeChar(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "This is a log message\\\\nThis is a new line \\\\t other msg", + "logger": "$className" + }""") + } + + override def expectedPatternForBasicMsgWithEscapeCharMDC(level: Level): String = { + compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "$level", + "msg": "This is a log message\\\\nThis is a new line \\\\t other msg", + "logger": "$className" + }""") + } + override def expectedPatternForBasicMsgWithException(level: Level): String = { compactAndToRegexPattern( s"""