diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index eb4bcfd8c76fc..fb9feba41c636 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -741,10 +741,10 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         end
         os_version = nothing
         if os == "macos"
-            os_version = extract_os_version("macos", r".*darwin([\d\.]+)")
+            os_version = extract_os_version("macos", r".*darwin([\d\.]+)"sa)
         end
         if os == "freebsd"
-            os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)")
+            os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa)
         end
         tags["os_version"] = os_version
 
@@ -798,13 +798,13 @@ function parse_dl_name_version(path::String, os::String)
     local dlregex
     if os == "windows"
         # On Windows, libraries look like `libnettle-6.dll`
-        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"
+        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"sa
     elseif os == "macos"
         # On OSX, libraries look like `libnettle.6.3.dylib`
-        dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"
+        dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"sa
     else
         # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
-        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"
+        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"sa
     end
 
     m = match(dlregex, basename(path))
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index f4d240f423e89..0d17746c6d928 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -796,7 +796,7 @@ function inline_linfo_printer(code::IRCode)
     end
 end
 
-_strip_color(s::String) = replace(s, r"\e\[\d+m" => "")
+_strip_color(s::String) = replace(s, r"\e\[\d+m"a => "")
 
 function statementidx_lineinfo_printer(f, code::IRCode)
     printer = f(code.linetable)
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 6d1e4283c814d..1b661716cc2d9 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -48,7 +48,7 @@ arguments of type `Any`.
 
 To restrict deprecation to a specific signature, annotate the
 arguments of `old`. For example,
-```jldoctest; filter = r"@ .*"
+```jldoctest; filter = r"@ .*"a
 julia> new(x::Int) = x;
 
 julia> new(x::Float64) = 2x;
diff --git a/base/libc.jl b/base/libc.jl
index 0a542ecbd1a82..5b508e00bf3e0 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -225,7 +225,7 @@ function strptime(fmt::AbstractString, timestr::AbstractString)
     @static if Sys.isapple()
         # if we didn't explicitly parse the weekday or year day, use mktime
         # to fill them in automatically.
-        if !occursin(r"([^%]|^)%(a|A|j|w|Ow)", fmt)
+        if !occursin(r"([^%]|^)%(a|A|j|w|Ow)"a, fmt)
             ccall(:mktime, Int, (Ref{TmStruct},), tm)
         end
     end
diff --git a/base/methodshow.jl b/base/methodshow.jl
index d3a40db665d1c..a45b89c6ccf63 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -7,7 +7,7 @@ function strip_gensym(sym)
     if sym === :var"#self#" || sym === :var"#unused#"
         return empty_sym
     end
-    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$" => s"\1"))
+    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$"sa => s"\1"))
 end
 
 function argtype_decl(env, n, @nospecialize(sig::DataType), i::Int, nargs, isva::Bool) # -> (argname, argtype)
@@ -364,7 +364,7 @@ function url(m::Method)
     (m.file === :null || m.file === :string) && return ""
     file = string(m.file)
     line = m.line
-    line <= 0 || occursin(r"In\[[0-9]+\]", file) && return ""
+    line <= 0 || occursin(r"In\[[0-9]+\]"a, file) && return ""
     Sys.iswindows() && (file = replace(file, '\\' => '/'))
     libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
     if inbase(M)
diff --git a/base/path.jl b/base/path.jl
index 1fac47432cda3..c439a2800acce 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -20,22 +20,22 @@ export
 
 if Sys.isunix()
     const path_separator    = "/"
-    const path_separator_re = r"/+"
-    const path_directory_re = r"(?:^|/)\.{0,2}$"
-    const path_dir_splitter = r"^(.*?)(/+)([^/]*)$"
-    const path_ext_splitter = r"^((?:.*/)?(?:\.|[^/\.])[^/]*?)(\.[^/\.]*|)$"
+    const path_separator_re = r"/+"sa
+    const path_directory_re = r"(?:^|/)\.{0,2}$"sa
+    const path_dir_splitter = r"^(.*?)(/+)([^/]*)$"sa
+    const path_ext_splitter = r"^((?:.*/)?(?:\.|[^/\.])[^/]*?)(\.[^/\.]*|)$"sa
 
     splitdrive(path::String) = ("",path)
 elseif Sys.iswindows()
     const path_separator    = "\\"
-    const path_separator_re = r"[/\\]+"
-    const path_absolute_re  = r"^(?:[A-Za-z]+:)?[/\\]"
-    const path_directory_re = r"(?:^|[/\\])\.{0,2}$"
-    const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"
-    const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"
+    const path_separator_re = r"[/\\]+"sa
+    const path_absolute_re  = r"^(?:[A-Za-z]+:)?[/\\]"sa
+    const path_directory_re = r"(?:^|[/\\])\.{0,2}$"sa
+    const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"sa
+    const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"sa
 
     function splitdrive(path::String)
-        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"s, path)::AbstractMatch
+        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"sa, path)::AbstractMatch
         String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
diff --git a/base/regex.jl b/base/regex.jl
index d1ef3c9d13d48..400784e1b27d7 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -46,19 +46,24 @@ mutable struct Regex <: AbstractPattern
 end
 
 function Regex(pattern::AbstractString, flags::AbstractString)
-    options = DEFAULT_COMPILER_OPTS
+    compile_options = DEFAULT_COMPILER_OPTS
+    match_options = DEFAULT_MATCH_OPTS
     for f in flags
         if f == 'a'
-            options &= ~PCRE.UCP
+            # instruct pcre2 to treat the strings as simple bytes (aka "ASCII"), not char encodings
+            compile_options &= ~PCRE.UCP  # user can re-enable with (*UCP)
+            compile_options &= ~PCRE.UTF # user can re-enable with (*UTF)
+            compile_options &= ~PCRE.MATCH_INVALID_UTF # this would force on UTF
+            match_options &= ~PCRE.NO_UTF_CHECK # if the user did force on UTF, we should check it for safety
         else
-            options |= f=='i' ? PCRE.CASELESS  :
-                       f=='m' ? PCRE.MULTILINE :
-                       f=='s' ? PCRE.DOTALL    :
-                       f=='x' ? PCRE.EXTENDED  :
-                       throw(ArgumentError("unknown regex flag: $f"))
+            compile_options |= f=='i' ? PCRE.CASELESS  :
+                               f=='m' ? PCRE.MULTILINE :
+                               f=='s' ? PCRE.DOTALL    :
+                               f=='x' ? PCRE.EXTENDED  :
+                               throw(ArgumentError("unknown regex flag: $f"))
         end
     end
-    Regex(pattern, options, DEFAULT_MATCH_OPTS)
+    Regex(pattern, compile_options, match_options)
 end
 Regex(pattern::AbstractString) = Regex(pattern, DEFAULT_COMPILER_OPTS, DEFAULT_MATCH_OPTS)
 
@@ -96,9 +101,15 @@ listed after the ending quote, to change its behaviour:
 - `s` allows the `.` modifier to match newlines.
 - `x` enables "comment mode": whitespace is enabled except when escaped with `\\`, and `#`
   is treated as starting a comment.
-- `a` disables `UCP` mode (enables ASCII mode). By default `\\B`, `\\b`, `\\D`, `\\d`, `\\S`,
-  `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With this option,
-  these sequences only match ASCII characters.
+- `a` enables ASCII mode (disables `UTF` and `UCP` modes). By default `\\B`, `\\b`, `\\D`,
+  `\\d`, `\\S`, `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With
+  this option, these sequences only match ASCII characters. This includes `\\u` also, which
+  will emit the specified character value directly as a single byte, and not attempt to
+  encode it into UTF-8. Importantly, this option allows matching against invalid UTF-8
+  strings, by treating both matcher and target as simple bytes (as if they were ISO/IEC
+  8859-1 / Latin-1 bytes) instead of as character encodings. In this case, this option is
+  often combined with `s`. This option can be further refined by starting the pattern with
+  (*UCP) or (*UTF).
 
 See [`Regex`](@ref) if interpolation is needed.
 
@@ -112,23 +123,38 @@ This regex has the first three flags enabled.
 macro r_str(pattern, flags...) Regex(pattern, flags...) end
 
 function show(io::IO, re::Regex)
-    imsxa = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED|PCRE.UCP
+    imsx = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED
+    ac = PCRE.UTF|PCRE.MATCH_INVALID_UTF|PCRE.UCP
+    am = PCRE.NO_UTF_CHECK
     opts = re.compile_options
-    if (opts & ~imsxa) == (DEFAULT_COMPILER_OPTS & ~imsxa)
+    mopts = re.match_options
+    default = ((opts & ~imsx) | ac) == DEFAULT_COMPILER_OPTS
+    if default
+       if (opts & ac) == ac
+           default = mopts == DEFAULT_MATCH_OPTS
+       elseif (opts & ac) == 0
+           default = mopts == (DEFAULT_MATCH_OPTS & ~am)
+       else
+           default = false
+       end
+   end
+    if default
         print(io, "r\"")
         escape_raw_string(io, re.pattern)
         print(io, "\"")
-        if (opts & PCRE.CASELESS ) != 0; print(io, 'i'); end
-        if (opts & PCRE.MULTILINE) != 0; print(io, 'm'); end
-        if (opts & PCRE.DOTALL   ) != 0; print(io, 's'); end
-        if (opts & PCRE.EXTENDED ) != 0; print(io, 'x'); end
-        if (opts & PCRE.UCP      ) == 0; print(io, 'a'); end
+        if (opts & PCRE.CASELESS ) != 0; print(io, "i"); end
+        if (opts & PCRE.MULTILINE) != 0; print(io, "m"); end
+        if (opts & PCRE.DOTALL   ) != 0; print(io, "s"); end
+        if (opts & PCRE.EXTENDED ) != 0; print(io, "x"); end
+        if (opts & ac            ) == 0; print(io, "a"); end
     else
         print(io, "Regex(")
         show(io, re.pattern)
-        print(io, ',')
+        print(io, ", ")
         show(io, opts)
-        print(io, ')')
+        print(io, ", ")
+        show(io, mopts)
+        print(io, ")")
     end
 end
 
diff --git a/base/set.jl b/base/set.jl
index 5be7eaf004352..a91bf328bd911 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -13,7 +13,7 @@ See also: [`AbstractSet`](@ref), [`BitSet`](@ref), [`Dict`](@ref),
 [`push!`](@ref), [`empty!`](@ref), [`union!`](@ref), [`in`](@ref), [`isequal`](@ref)
 
 # Examples
-```jldoctest filter = r"^\\S.+"
+```jldoctest; filter = r"^  '.'"ma
 julia> s = Set("aaBca")
 Set{Char} with 3 elements:
   'a'
@@ -23,9 +23,9 @@ Set{Char} with 3 elements:
 julia> push!(s, 'b')
 Set{Char} with 4 elements:
   'a'
-  'c'
   'b'
   'B'
+  'c'
 
 julia> s = Set([NaN, 0.0, 1.0, 2.0]);
 
diff --git a/base/shell.jl b/base/shell.jl
index f443a1f9c094a..7c973ab289c7f 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -292,9 +292,9 @@ function shell_escape_csh(io::IO, args::AbstractString...)
         first = false
         i = 1
         while true
-            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z" => "",
-                         r"^[^']*\z" => "'", r"^[^\$\`\"]*\z" => "\"",
-                         r"^[^']+"  => "'", r"^[^\$\`\"]+"  => "\"")
+            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z"sa => "",
+                         r"^[^']*\z"sa => "'", r"^[^\$\`\"]*\z"sa => "\"",
+                         r"^[^']+"sa  => "'", r"^[^\$\`\"]+"sa  => "\"")
                 if ((m = match(r, SubString(arg, i))) !== nothing)
                     write(io, e)
                     write(io, replace(m.match, '\n' => "\\\n"))
@@ -391,7 +391,7 @@ julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
 """
 function shell_escape_wincmd(io::IO, s::AbstractString)
     # https://stackoverflow.com/a/4095133/1990689
-    occursin(r"[\r\n\0]", s) &&
+    occursin(r"[\r\n\0]"sa, s) &&
         throw(ArgumentError("control character unsupported by CMD.EXE"))
     i = 1
     len = ncodeunits(s)
@@ -446,7 +446,7 @@ function escape_microsoft_c_args(io::IO, args::AbstractString...)
         else
             write(io, ' ')  # separator
         end
-        if isempty(arg) || occursin(r"[ \t\"]", arg)
+        if isempty(arg) || occursin(r"[ \t\"]"sa, arg)
             # Julia raw strings happen to use the same escaping convention
             # as the argv[] parser in Microsoft's C runtime library.
             write(io, '"')
diff --git a/test/path.jl b/test/path.jl
index 4a4caa6b0b115..2f4f2d0983a58 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -171,6 +171,9 @@
         @test string(splitdrive(S(homedir()))...) == homedir()
         @test splitdrive("a\nb") == ("", "a\nb")
 
+        @test splitdir("a/\xfe/\n/b/c.ext") == ("a/\xfe/\n/b", "c.ext")
+        @test splitext("a/\xfe/\n/b/c.ext") == ("a/\xfe/\n/b/c", ".ext")
+
         if Sys.iswindows()
             @test splitdrive(S("\\\\servername\\hello.world\\filename.ext")) ==
                 ("\\\\servername\\hello.world","\\filename.ext")
diff --git a/test/regex.jl b/test/regex.jl
index 70f620cad7141..e5f1428527512 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -59,6 +59,11 @@
     @test repr(r"\\\"") == raw"r\"\\\\\\\"\""
     @test repr(s"\\\"\\") == raw"s\"\\\\\\\"\\\\\""
 
+    @test repr(r""a) == "r\"\"a"
+    @test repr(r""imsxa) == "r\"\"imsxa"
+    @test repr(Regex("", Base.DEFAULT_COMPILER_OPTS, UInt32(0))) == """Regex("", $(repr(Base.DEFAULT_COMPILER_OPTS)), $(repr(UInt32(0))))"""
+    @test repr(Regex("", UInt32(0), Base.DEFAULT_MATCH_OPTS)) == """Regex("", $(repr(UInt32(0))), $(repr(Base.DEFAULT_MATCH_OPTS)))"""
+
     # findall
     @test findall(r"\w+", "foo bar") == [1:3, 5:7]
     @test findall(r"\w+", "foo bar", overlap=true) == [1:3, 2:3, 3:3, 5:7, 6:7, 7:7]
@@ -122,18 +127,24 @@
 
     # Backcapture reference in substitution string
     @test replace("abcde", r"(..)(?P<byname>d)" => s"\g<byname>xy\\\1") == "adxy\\bce"
-    @test_throws ErrorException replace("a", r"(?P<x>)" => s"\g<y>")
+    @test_throws(ErrorException("Bad replacement string: Group y not found in regex r\"(?P<x>)\""),
+        replace("a", r"(?P<x>)" => s"\g<y>"))
     # test replace with invalid substitution group pattern
-    @test_throws ErrorException replace("s", r"(?<g1>.)" => s"\gg1>")
+    @test_throws(ErrorException("Bad replacement string: \\gg1>"),
+        replace("s", r"(?<g1>.)" => s"\gg1>"))
     # test replace with 2-digit substitution group
     @test replace(("0" ^ 9) * "1", Regex(("(0)" ^ 9) * "(1)") => s"10th group: \10") == "10th group: 1"
 
     # Proper unicode handling
     @test  match(r"∀∀", "∀x∀∀∀").match == "∀∀"
 
-    # 'a' flag to disable UCP
+    # 'a' flag to disable UCP and UTF
     @test match(r"\w+", "Düsseldorf").match == "Düsseldorf"
     @test match(r"\w+"a, "Düsseldorf").match == "D"
+    @test match(r".+"a, "Düsseldorf").match == "Düsseldorf"
+    @test match(r".+"a, "Dü\xefsseldorf").match == "Dü\xefsseldorf"
+    @test_throws(ErrorException("PCRE.exec error: $(Base.PCRE.err_message(Base.PCRE.ERROR_UTF8_ERR6))"),
+        match(r"(*UTF).+"a, "Dü\xefsseldorf"))
 
     # Regex behaves like a scalar in broadcasting
     @test occursin.(r"Hello", ["Hello", "World"]) == [true, false]
@@ -211,8 +222,7 @@
     end
 
     # Test that PCRE throws the correct kind of error
-    # TODO: Uncomment this once the corresponding change has propagated to CI
-    #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
+    @test_throws ErrorException("PCRE error: NULL regex object") Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
 
     # test that we can get the error message of negative error codes
     @test Base.PCRE.err_message(Base.PCRE.ERROR_NOMEMORY) isa String