diff --git a/airflow/providers/apache/spark/hooks/spark_submit.py b/airflow/providers/apache/spark/hooks/spark_submit.py index d94be2cc023cc..dffaf8b17eb1a 100644 --- a/airflow/providers/apache/spark/hooks/spark_submit.py +++ b/airflow/providers/apache/spark/hooks/spark_submit.py @@ -237,8 +237,23 @@ def _mask_cmd(self, connection_cmd): # Mask any password related fields in application args with key value pair # where key contains password (case insensitive), e.g. HivePassword='abc' connection_cmd_masked = re.sub( - r"(\S*?(?:secret|password)\S*?\s*=\s*')[^']*(?=')", - r'\1******', ' '.join(connection_cmd), flags=re.I) + r"(" + r"\S*?" # Match all non-whitespace characters before... + r"(?:secret|password)" # ...literally a "secret" or "password" + # word (not capturing them). + r"\S*?" # All non-whitespace characters before either... + r"(?:=|\s+)" # ...an equal sign or whitespace characters + # (not capturing them). + r"(['\"]?)" # An optional single or double quote. + r")" # This is the end of the first capturing group. + r"(?:(?!\2\s).)*" # All characters between optional quotes + # (matched above); if the value is quoted, + # it may contain whitespace. + r"(\2)", # Optional matching quote. + r'\1******\3', + ' '.join(connection_cmd), + flags=re.I, + ) return connection_cmd_masked diff --git a/tests/providers/apache/spark/hooks/test_spark_submit.py b/tests/providers/apache/spark/hooks/test_spark_submit.py index 17e43a91ca248..a56b0163f0a8f 100644 --- a/tests/providers/apache/spark/hooks/test_spark_submit.py +++ b/tests/providers/apache/spark/hooks/test_spark_submit.py @@ -754,42 +754,38 @@ def test_k8s_process_on_kill(self, mock_popen, mock_client_method): @parameterized.expand( ( ( - ("spark-submit", "foo", "--bar", "baz", "--password='secret'"), - "spark-submit foo --bar baz --password='******'", - ), - ( - ("spark-submit", "foo", "--bar", "baz", "--secret='secret'"), - "spark-submit foo --bar baz --secret='******'", + ("spark-submit", "foo", "--bar", "baz", "--password='secret'", "--foo", "bar"), + "spark-submit foo --bar baz --password='******' --foo bar", ), ( - ("spark-submit", "foo", "--bar", "baz", "--foo.password='secret'"), - "spark-submit foo --bar baz --foo.password='******'", + ("spark-submit", "foo", "--bar", "baz", "--password='secret'"), + "spark-submit foo --bar baz --password='******'", ), ( - ("spark-submit", "foo", "--bar", "baz", "--foo.password='secret'"), - "spark-submit foo --bar baz --foo.password='******'", + ("spark-submit", "foo", "--bar", "baz", '--password="secret"'), + 'spark-submit foo --bar baz --password="******"', ), ( - ("spark-submit", "foo", "--bar", "baz", "--password='secret'", "--foo", "bar"), - "spark-submit foo --bar baz --password='******' --foo bar", + ("spark-submit", "foo", "--bar", "baz", '--password=secret'), + 'spark-submit foo --bar baz --password=******', ), ( - ("spark-submit", "foo", "--bar", "baz", "--password='secret'", "--foo=bar"), - "spark-submit foo --bar baz --password='******' --foo=bar", + ("spark-submit", "foo", "--bar", "baz", "--password 'secret'"), + "spark-submit foo --bar baz --password '******'", ), ( - ("spark-submit", "foo", "--bar", "baz", "--password='secret'", "--foo='bar'"), - "spark-submit foo --bar baz --password='******' --foo='bar'", + ("spark-submit", "foo", "--bar", "baz", "--password='sec\"ret'"), + "spark-submit foo --bar baz --password='******'", ), ( - ("spark-submit", "foo", "--bar", "baz", "--password='secret'", "bar"), - "spark-submit foo --bar baz --password='******' bar", + ("spark-submit", "foo", "--bar", "baz", '--password="sec\'ret"'), + 'spark-submit foo --bar baz --password="******"', ), ( ("spark-submit",), "spark-submit", ), - ), + ) ) def test_masks_passwords(self, command: str, expected: str) -> None: # Given