diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 3c555f006d082..3af8ba86153a1 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -5,3 +5,5 @@
 e66bfa5dd32f93e76068c00ad882c1fc839c5af8
 # whitespace: replace non-breaking space => space
 100a741e7ab38c91d48cc929bb001afc8e09261f
+# whitespace: replace tabs => space
+b03e8ab9c7bd3e001add519571858fa04d6a249b
diff --git a/HISTORY.md b/HISTORY.md
index 0db48d5f960e3..935b203ffaa97 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -135,8 +135,9 @@ Standard library changes
 * The contextual module which is active in the REPL can be changed (it is `Main` by default),
   via the `REPL.activate(::Module)` function or via typing the module in the REPL and pressing
   the keybinding Alt-m ([#33872]).
-* An "IPython mode" which mimics the behaviour of the prompts and storing the evaluated result in `Out` can be
-  activated with `REPL.ipython_mode!()`. See the manual for how to enable this at startup ([#46474]).
+* A "numbered prompt" mode which prints numbers for each input and output and stores evaluated results in `Out` can be
+  activated with `REPL.numbered_prompt!()`. See the manual for how to enable this at startup ([#46474]).
+* Tab completion displays available keyword arguments ([#43536])
 
 #### SuiteSparse
 
@@ -176,7 +177,7 @@ Standard library changes
 
 #### DelimitedFiles
 
-* DelimitedFiles has been moved out as a separate package. It now has to be explicitly installed to be used.
+* DelimitedFiles has been moved out as a separate package.
 
 Deprecated or removed
 ---------------------
@@ -5563,18 +5564,18 @@ Deprecated or removed
 
   * several syntax whitespace insensitivities have been deprecated ([#11891]).
     ```julia
-     # function call
-     f (x)
-
-     # getindex
-     x [17]
-     rand(2) [1]
-
-     # function definition
-     f (x) = x^2
-     function foo (x)
-	x^2
-     end
+    # function call
+    f (x)
+
+    # getindex
+    x [17]
+    rand(2) [1]
+
+    # function definition
+    f (x) = x^2
+    function foo (x)
+        x^2
+    end
     ```
 
   * indexing with `Real`s that are not subtypes of `Integer` (`Rational`, `AbstractFloat`, etc.) has been deprecated ([#10458]).
diff --git a/Make.inc b/Make.inc
index 7c1ca6a5db7a8..65b1468781632 100644
--- a/Make.inc
+++ b/Make.inc
@@ -92,6 +92,16 @@ WITH_MMTK ?= 0
 # Enable DTrace support
 WITH_DTRACE := 0
 
+# Enable ITTAPI integration
+WITH_ITTAPI := 0
+
+# Enable Tracy support
+WITH_TRACY := 0
+WITH_TRACY_CALLSTACKS := 0
+
+# Enable Timing Counts support
+WITH_TIMING_COUNTS := 0
+
 # Prevent picking up $ARCH from the environment variables
 ARCH:=
 
@@ -178,7 +188,7 @@ endif
 JULIA_VERSION := $(shell cat $(JULIAHOME)/VERSION)
 JULIA_MAJOR_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'.' -f 1)
 JULIA_MINOR_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'.' -f 2)
-JULIA_PATCH_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'.' -f 3)
+JULIA_PATCH_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'+' -f 1 | cut -d'.' -f 3)
 
 # libjulia's SONAME will follow the format libjulia.so.$(SOMAJOR). Before v1.0.0,
 # SOMAJOR will be a two-decimal value, e.g. libjulia.so.0.5, whereas at and beyond
@@ -289,6 +299,9 @@ private_libdir := $(libdir)/julia
 endif
 build_private_libdir := $(build_libdir)/julia
 
+private_libexecdir := $(libexecdir)/julia
+build_private_libexecdir := $(build_libexecdir)/julia
+
 # A helper functions for dealing with lazily-evaluated, expensive operations..  Spinning
 # up a python process to, for exaxmple, parse a TOML file is expensive, and we must wait
 # until the TOML files are on-disk before we can parse them.  This means that we cannot
@@ -313,7 +326,7 @@ define cache_rel_path
 $(1)_rel_eval = $(call rel_path,$(2),$($(1)))
 $(1)_rel = $$(call hit_cache,$(1)_rel_eval)
 endef
-$(foreach D,libdir private_libdir datarootdir libexecdir docdir sysconfdir includedir,$(eval $(call cache_rel_path,$(D),$(bindir))))
+$(foreach D,libdir private_libdir datarootdir libexecdir private_libexecdir docdir sysconfdir includedir,$(eval $(call cache_rel_path,$(D),$(bindir))))
 $(foreach D,build_libdir build_private_libdir,$(eval $(call cache_rel_path,$(D),$(build_bindir))))
 
 # Save a special one: reverse_private_libdir_rel: usually just `../`, but good to be general:
@@ -449,7 +462,13 @@ endif
 
 # Compiler specific stuff
 
-CC_VERSION_STRING = $(shell $(CC) --version)
+ifeq (default,$(origin CC))
+CC := $(CROSS_COMPILE)$(CC) # attempt to add cross-compiler prefix, if the user
+                            # is not overriding the default, to form target-triple-cc (which
+                            # may not exist), and use that to decide what compiler the user
+                            # is using for the target build (or default to gcc)
+endif
+CC_VERSION_STRING = $(shell $(CC) --version 2>/dev/null)
 ifneq (,$(findstring clang,$(CC_VERSION_STRING)))
 USECLANG := 1
 USEGCC := 0
@@ -464,7 +483,7 @@ FC := $(CROSS_COMPILE)gfortran
 ifeq ($(OS), Darwin)
 APPLE_ARCH := $(shell uname -m)
 ifneq ($(APPLE_ARCH),arm64)
-MACOSX_VERSION_MIN := 10.10
+MACOSX_VERSION_MIN := 10.14
 else
 MACOSX_VERSION_MIN := 11.0
 endif
@@ -536,8 +555,8 @@ CC_BASE := $(shell echo $(CC) | cut -d' ' -f1)
 CC_ARG := $(shell echo $(CC) | cut -s -d' ' -f2-)
 CXX_BASE := $(shell echo $(CXX) | cut -d' ' -f1)
 CXX_ARG := $(shell echo $(CXX) | cut -s -d' ' -f2-)
-FC_BASE := $(shell echo $(FC) | cut -d' ' -f1)
-FC_ARG := $(shell echo $(FC) | cut -s -d' ' -f2-)
+FC_BASE := $(shell echo $(FC) 2>/dev/null | cut -d' ' -f1)
+FC_ARG := $(shell echo $(FC) 2>/dev/null | cut -s -d' ' -f2-)
 endif
 
 JFFLAGS := -O2 $(fPIC)
@@ -677,7 +696,10 @@ SANITIZE_LDFLAGS :=
 ifeq ($(SANITIZE_MEMORY),1)
 SANITIZE_OPTS += -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer
 SANITIZE_LDFLAGS += $(SANITIZE_OPTS)
-endif
+ifneq ($(findstring $(OS),Linux FreeBSD),)
+SANITIZE_LDFLAGS += -Wl,--warn-unresolved-symbols
+endif # OS Linux or FreeBSD
+endif # SANITIZE_MEMORY=1
 ifeq ($(SANITIZE_ADDRESS),1)
 SANITIZE_OPTS += -fsanitize=address
 SANITIZE_LDFLAGS += -fsanitize=address
@@ -739,7 +761,28 @@ ifeq ($(WITH_DTRACE), 1)
 JCXXFLAGS += -DUSE_DTRACE
 JCFLAGS += -DUSE_DTRACE
 DTRACE := dtrace
-else
+endif
+
+ifeq ($(WITH_ITTAPI), 1)
+JCXXFLAGS += -DUSE_ITTAPI
+JCFLAGS += -DUSE_ITTAPI
+LIBITTAPI:=-littnotify
+endif
+
+ifeq ($(WITH_TRACY), 1)
+JCXXFLAGS += -DUSE_TRACY -DTRACY_ENABLE -DTRACY_FIBERS
+JCFLAGS += -DUSE_TRACY -DTRACY_ENABLE -DTRACY_FIBERS
+LIBTRACYCLIENT:=-lTracyClient
+endif
+ifeq ($(WITH_TRACY_CALLSTACKS), 1)
+JCXXFLAGS += -DTRACY_CALLSTACK=32
+JCFLAGS += -DTRACY_CALLSTACK=32
+LIBTRACYCLIENT:=-lTracyClient
+endif
+
+ifeq ($(WITH_TIMING_COUNTS), 1)
+JCXXFLAGS += -DUSE_TIMING_COUNTS
+JCFLAGS += -DUSE_TIMING_COUNTS
 endif
 
 # ===========================================================================
@@ -857,7 +900,6 @@ endif
 # If we are running on powerpc64le or ppc64le, set certain options automatically
 ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
 JCFLAGS += -fsigned-char
-OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=POWER8
 BINARY:=64
 # GCC doesn't do -march= on ppc64le
@@ -1045,6 +1087,10 @@ PATCHELF := patchelf
 else
 PATCHELF := $(build_depsbindir)/patchelf
 endif
+# In the standard build system we want to patch files with `--set-rpath`, but downstream
+# packagers like Spack may want to use `--add-rpath` instead, leave them the possibility to
+# choose the command.
+PATCHELF_SET_RPATH_ARG := --set-rpath
 
 ifeq ($(USE_SYSTEM_LIBWHICH), 1)
 LIBWHICH := libwhich
@@ -1190,7 +1236,7 @@ CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.31|GLIBCXX_3\.5\.|GLIBCXX_4\.
 # Note: we explicitly _do not_ define `CSL` here, since it requires some more
 # advanced techniques to decide whether it should be installed from a BB source
 # or not.  See `deps/csl.mk` for more detail.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
diff --git a/Makefile b/Makefile
index bc24f9272b060..3e4dbef73bb52 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@ all: debug release
 DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir))
 ifneq ($(BUILDROOT),$(JULIAHOME))
 BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/gcext test/llvmpasses)
-BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk
+BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk $(BUILDROOT)/pkgimage.mk
 DIRS += $(BUILDDIRS)
 $(BUILDDIRMAKE): | $(BUILDDIRS)
 	@# add Makefiles to the build directories for convenience (pointing back to the source location of each)
@@ -104,7 +104,10 @@ julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-sysimg-ji julia
 
 julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest julia-libllvmcalltest julia-base-cache
 
-debug release : % : julia-%
+stdlibs-cache-release stdlibs-cache-debug : stdlibs-cache-% : julia-%
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk all-$*
+
+debug release : % : julia-% stdlibs-cache-%
 
 docs: julia-sysimg-$(JULIA_BUILD_MODE)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no'
@@ -116,7 +119,7 @@ check-whitespace:
 ifneq ($(NO_GIT), 1)
 	@# Append the directory containing the julia we just built to the end of `PATH`,
 	@# to give us the best chance of being able to run this check.
-	@PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIAHOME)/contrib/check-whitespace.jl
+	@PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl)
 else
 	$(warn "Skipping whitespace check because git is unavailable")
 endif
@@ -152,7 +155,7 @@ release-candidate: release testall
 	@echo 10. Follow packaging instructions in doc/build/distributing.md to create binary packages for all platforms
 	@echo 11. Upload to AWS, update https://julialang.org/downloads and http://status.julialang.org/stable links
 	@echo 12. Update checksums on AWS for tarball and packaged binaries
-	@echo 13. Update versions.json
+	@echo 13. Update versions.json. Wait at least 60 minutes before proceeding to step 14.
 	@echo 14. Push to Juliaup (https://github.com/JuliaLang/juliaup/wiki/Adding-a-Julia-version)
 	@echo 15. Announce on mailing lists
 	@echo 16. Change master to release-0.X in base/version.jl and base/version_git.sh as in 4cb1e20
@@ -234,6 +237,9 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libwinpthread
 else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread
 endif
+ifeq ($(WITH_TRACY),1)
+JL_PRIVATE_LIBS-0 += libTracyClient
+endif
 
 
 ifeq ($(OS),Darwin)
@@ -253,7 +259,7 @@ endef
 
 install: $(build_depsbindir)/stringreplace docs
 	@$(MAKE) $(QUIET_MAKE) $(JULIA_BUILD_MODE)
-	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(libexecdir); do \
+	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(private_libexecdir); do \
 		mkdir -p $(DESTDIR)$$subdir; \
 	done
 
@@ -268,8 +274,8 @@ else ifeq ($(JULIA_BUILD_MODE),debug)
 	-$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/
 endif
 
-	# We have a single exception; we want 7z.dll to live in libexec, not bin, so that 7z.exe can find it.
-	-mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(libexecdir)/
+	# We have a single exception; we want 7z.dll to live in private_libexecdir, not bindir, so that 7z.exe can find it.
+	-mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(private_libexecdir)/
 	-$(INSTALL_M) $(build_bindir)/libopenlibm.dll.a $(DESTDIR)$(libdir)/
 	-$(INSTALL_M) $(build_libdir)/libssp.dll.a $(DESTDIR)$(libdir)/
 	# The rest are compiler dependencies, as an example memcpy is exported by msvcrt
@@ -331,11 +337,14 @@ endif
 		done \
 	done
 endif
-	# Install `7z` into libexec/
-	$(INSTALL_M) $(build_bindir)/7z$(EXE) $(DESTDIR)$(libexecdir)/
+	# Install `7z` into private_libexecdir
+	$(INSTALL_M) $(build_bindir)/7z$(EXE) $(DESTDIR)$(private_libexecdir)/
+
+	# Install `lld` into private_libexecdir
+	$(INSTALL_M) $(build_depsbindir)/lld$(EXE) $(DESTDIR)$(private_libexecdir)/
 
-	# Install `lld` into libexec/
-	$(INSTALL_M) $(build_depsbindir)/lld$(EXE) $(DESTDIR)$(libexecdir)/
+	# Install `dsymutil` into private_libexecdir/
+	$(INSTALL_M) $(build_depsbindir)/dsymutil$(EXE) $(DESTDIR)$(private_libexecdir)/
 
 	# Copy public headers
 	cp -R -L $(build_includedir)/julia/* $(DESTDIR)$(includedir)/julia
@@ -346,11 +355,6 @@ else ifeq ($(JULIA_BUILD_MODE),debug)
 	$(INSTALL_M) $(build_private_libdir)/sys-debug.$(SHLIB_EXT) $(DESTDIR)$(private_libdir)
 endif
 
-	# Cache stdlibs
-	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no $(JULIAHOME)/contrib/cache_stdlibs.jl)
-	# CI uses `--check-bounds=yes` which impacts the cache flags
-	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes $(JULIAHOME)/contrib/cache_stdlibs.jl)
-
 	# Copy in all .jl sources as well
 	mkdir -p $(DESTDIR)$(datarootdir)/julia/base $(DESTDIR)$(datarootdir)/julia/test
 	cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base
@@ -385,7 +389,7 @@ ifneq ($(DARWIN_FRAMEWORK),1)
 endif
 else ifneq (,$(findstring $(OS),Linux FreeBSD))
 	for j in $(JL_TARGETS) ; do \
-		$(PATCHELF) --set-rpath '$$ORIGIN/$(private_libdir_rel):$$ORIGIN/$(libdir_rel)' $(DESTDIR)$(bindir)/$$j; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(private_libdir_rel):$$ORIGIN/$(libdir_rel)' $(DESTDIR)$(bindir)/$$j; \
 	done
 endif
 
@@ -417,17 +421,17 @@ endif
 endif
 else ifneq (,$(findstring $(OS),Linux FreeBSD))
 ifeq ($(JULIA_BUILD_MODE),release)
-	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
-	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT)
 else ifeq ($(JULIA_BUILD_MODE),debug)
-	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
-	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
 endif
 endif
 
 	# Fix rpaths for dependencies. This should be fixed in BinaryBuilder later.
 ifeq ($(OS), Linux)
-	-$(PATCHELF) --set-rpath '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
+	-$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
 endif
 
 ifneq ($(LOADER_BUILD_DEP_LIBS),$(LOADER_INSTALL_DEP_LIBS))
@@ -437,14 +441,6 @@ ifeq ($(JULIA_BUILD_MODE),release)
 else ifeq ($(JULIA_BUILD_MODE),debug)
 	$(call stringreplace,$(DESTDIR)$(shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT),$(LOADER_DEBUG_BUILD_DEP_LIBS)$$,$(LOADER_DEBUG_INSTALL_DEP_LIBS))
 endif
-ifeq ($(OS),Darwin)
-	# Codesign the libjulia we just modified
-ifeq ($(JULIA_BUILD_MODE),release)
-	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(DESTDIR)$(shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT)"
-else ifeq ($(JULIA_BUILD_MODE),debug)
-	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(DESTDIR)$(shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)"
-endif
-endif
 endif
 
 ifeq ($(OS),FreeBSD)
@@ -457,7 +453,7 @@ ifeq ($(OS),FreeBSD)
 	# don't set libgfortran's RPATH, it won't be able to find its friends on systems
 	# that don't have the exact GCC port installed used for the build.
 	for lib in $(DESTDIR)$(private_libdir)/libgfortran*$(SHLIB_EXT)*; do \
-		$(PATCHELF) --set-rpath '$$ORIGIN' $$lib; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $$lib; \
 	done
 endif
 
@@ -500,10 +496,6 @@ ifeq ($(OS), Linux)
 endif
 ifeq ($(OS), WINNT)
 	cd $(BUILDROOT)/julia-$(JULIA_COMMIT)/bin && rm -f llvm* llc.exe lli.exe opt.exe LTO.dll bugpoint.exe macho-dump.exe
-endif
-ifeq ($(OS),Darwin)
-	# If we're on macOS, and we have a codesigning identity, then codesign the binary-dist tarball!
-	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(BUILDROOT)/julia-$(JULIA_COMMIT)"
 endif
 	cd $(BUILDROOT) && $(TAR) zcvf $(JULIA_BINARYDIST_FILENAME).tar.gz julia-$(JULIA_COMMIT)
 
diff --git a/NEWS.md b/NEWS.md
index fc90e9a0746ea..5c42c469e4051 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,17 +4,32 @@ Julia v1.10 Release Notes
 New language features
 ---------------------
 
+* `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`)
+  may now be used as binary operators with arrow precedence. ([#45962])
+
 Language changes
 ----------------
 
+* When a task forks a child, the parent task's task-local RNG (random number generator) is no longer affected. The seeding of child based on the parent task also takes a more disciplined approach to collision resistance, using a design based on the SplitMix and DotMix splittable RNG schemes ([#49110]).
+* A new more-specific rule for methods resolves ambiguities containing Union{} in favor of
+  the method defined explicitly to handle the Union{} argument. This makes it possible to
+  define methods to explicitly handle Union{} without the ambiguities that commonly would
+  result previously. This also lets the runtime optimize certain method lookups in a way
+  that significantly improves load and inference times for heavily overloaded methods that
+  dispatch on Types (such as traits and constructors).
+* The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127).
 
 Compiler/Runtime improvements
 -----------------------------
 
+* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]).
+* The mark phase of the Garbage Collector is now multi-threaded ([#48600]).
 
 Command-line option changes
 ---------------------------
 
+* New option `--gcthreads` to set how many threads will be used by the Garbage Collector ([#48600]).
+  The default is set to `N/2` where `N` is the amount of worker threads (`--threads`) used by Julia.
 
 Multi-threading changes
 -----------------------
@@ -26,7 +41,9 @@ Build system changes
 
 New library functions
 ---------------------
-
+* `tanpi` is now defined. It computes tan(πx) more accurately than `tan(pi*x)` ([#48575]).
+* `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`.
+   It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot<tab>` ([#48899]).
 
 New library features
 --------------------
@@ -34,22 +51,40 @@ New library features
   is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]).
 * `binomial(x, k)` now supports non-integer `x` ([#48124]).
 * A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]).
+* `printstyled` now supports italic output ([#45164]).
 
 Standard library changes
 ------------------------
 
+* `startswith` now supports seekable `IO` streams ([#43055])
+* printing integral `Rational`s will skip the denominator in `Rational`-typed IO context (e.g. in `Arrays`) ([#45396])
 
 #### Package Manager
 
-- "Package Extensions": support for loading a piece of code based on other
-  packages being loaded in the Julia session.
-  This has similar applications as the Requires.jl package but also
-  supports precompilation and setting compatibility.
+* `Pkg.precompile` now accepts `timing` as a keyword argument which displays per package timing information for precompilation (e.g. `Pkg.precompile(timing=true)`)
+
 #### LinearAlgebra
 
+* `AbstractQ` no longer subtypes to `AbstractMatrix`. Moreover, `adjoint(Q::AbstractQ)`
+  no longer wraps `Q` in an `Adjoint` type, but instead in an `AdjointQ`, that itself
+  subtypes `AbstractQ`. This change accounts for the fact that typically `AbstractQ`
+  instances behave like function-based, matrix-backed linear operators, and hence don't
+  allow for efficient indexing. Also, many `AbstractQ` types can act on vectors/matrices
+  of different size, acting like a matrix with context-dependent size. With this change,
+  `AbstractQ` has a well-defined API that is described in detail in the
+  [Julia documentation](https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/#man-linalg-abstractq)
+  ([#46196]).
+* Adjoints and transposes of `Factorization` objects are no longer wrapped in `Adjoint`
+  and `Transpose` wrappers, respectively. Instead, they are wrapped in
+  `AdjointFactorization` and `TranposeFactorization` types, which themselves subtype
+  `Factorization` ([#46874]).
+* New functions `hermitianpart` and `hermitianpart!` for extracting the Hermitian
+  (real symmetric) part of a matrix ([#31836]).
+* The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the
+  parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]).
 
 #### Printf
-
+* Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]).
 
 #### Profile
 
@@ -71,6 +106,7 @@ Standard library changes
 
 * The `@test_broken` macro (or `@test` with `broken=true`) now complains if the test expression returns a
   non-boolean value in the same way as a non-broken test. ([#47804])
+* When a call to `@test` fails or errors inside a function, a larger stacktrace is now printed such that the location of the test within a `@testset` can be retrieved ([#49451])
 
 #### Dates
 
@@ -87,6 +123,7 @@ Standard library changes
 #### InteractiveUtils
 
  * `code_native` and `@code_native` now default to intel syntax instead of AT&T.
+ * `@time_imports` now shows the timing of any module `__init__()`s that are run ([#49529])
 
 Deprecated or removed
 ---------------------
diff --git a/README.md b/README.md
index 4c8d93f70bf72..26fbb21a8a6a7 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
-<a name="logo"/>
 <div align="center">
-<a href="https://julialang.org/" target="_blank">
-<img src="doc/src/assets/logo.svg" alt="Julia Logo" width="210" height="142"></img>
-</a>
+    <a href="https://julialang.org/" target="_blank">
+        <picture>
+          <source media="(prefers-color-scheme: dark)" srcset="doc/src/assets/julialogoheaderimage_dark.svg">
+          <img alt="The Julia logo" src="doc/src/assets/julialogoheaderimage_light.svg">
+        </picture>
+    </a>
 </div>
 
 <table>
@@ -26,7 +28,8 @@
     <tr>
         <td>Code coverage</td>
         <td>
-            <a href="https://coveralls.io/r/JuliaLang/julia?branch=master"><img src='https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls'/></a> <a href="https://codecov.io/github/JuliaLang/julia?branch=master"><img src='https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov'/></a>
+            <a href='https://coveralls.io/github/JuliaLang/julia?branch=master'><img src='https://coveralls.io/repos/github/JuliaLang/julia/badge.svg?branch=master' alt='Coverage Status'/></a>
+            <a href="https://codecov.io/gh/JuliaLang/julia"><img src="https://codecov.io/gh/JuliaLang/julia/branch/master/graph/badge.svg?token=TckCRxc7HS"/></a>
         </td>
     </tr>
 </table>
@@ -92,7 +95,7 @@ and then use the command prompt to change into the resulting julia directory. By
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by running:
 
-    git checkout v1.8.4
+    git checkout v1.9.0
 
 To build the `julia` executable, run `make` from within the julia directory.
 
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
index 4a35bbdb1b7ce..51950d9e2c6a1 100644
--- a/THIRDPARTY.md
+++ b/THIRDPARTY.md
@@ -24,6 +24,10 @@ own licenses:
 - [LLVM](https://releases.llvm.org/12.0.1/LICENSE.TXT) [APACHE 2.0 with LLVM Exception]
 - [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
 
+and optionally:
+
+- [ITTAPI](https://github.com/intel/ittapi/blob/master/LICENSES/BSD-3-Clause.txt) [BSD-3]
+
 Julia's `stdlib` uses the following external libraries, which have their own licenses:
 
 - [DSFMT](https://github.com/MersenneTwister-Lab/dSFMT/blob/master/LICENSE.txt) [BSD-3]
diff --git a/base/Base.jl b/base/Base.jl
index 20e729256664f..06df2edb276fd 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -35,14 +35,19 @@ getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
 setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
 
 getproperty(x, f::Symbol) = (@inline; getfield(x, f))
-setproperty!(x, f::Symbol, v) = setfield!(x, f, convert(fieldtype(typeof(x), f), v))
+function setproperty!(x, f::Symbol, v)
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val)
+end
 
 dotgetproperty(x, f) = getproperty(x, f)
 
 getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
 function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
     @inline
-    val::Core.get_binding_type(x, f) = v
+    ty = Core.get_binding_type(x, f)
+    val = v isa ty ? v : convert(ty, v)
     return setglobal!(x, f, val, order)
 end
 getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
@@ -51,14 +56,29 @@ getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
 setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
 
 getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
-setproperty!(x, f::Symbol, v, order::Symbol) = (@inline; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+function setproperty!(x, f::Symbol, v, order::Symbol)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val, order)
+end
 
-swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic) =
-    (@inline; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
-modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic) =
-    (@inline; Core.modifyfield!(x, f, op, v, order))
-replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) =
-    (@inline; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
+function swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return Core.swapfield!(x, f, val, order)
+end
+function modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
+    @inline
+    return Core.modifyfield!(x, f, op, v, order)
+end
+function replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.replacefield!(x, f, expected, val, success_order, fail_order)
+end
 
 convert(::Type{Any}, Core.@nospecialize x) = x
 convert(::Type{T}, x::T) where {T} = x
@@ -105,15 +125,27 @@ include("options.jl")
 
 # define invoke(f, T, args...; kwargs...), without kwargs wrapping
 # to forward to invoke
-function Core.kwcall(kwargs, ::typeof(invoke), f, T, args...)
+function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke), f, T, args...)
     @inline
     # prepend kwargs and f to the invoked from the user
-    T = rewrap_unionall(Tuple{Any, Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
+    T = rewrap_unionall(Tuple{Core.Typeof(kwargs), Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
     return invoke(Core.kwcall, T, kwargs, f, args...)
 end
 # invoke does not have its own call cache, but kwcall for invoke does
 setfield!(typeof(invoke).name.mt, :max_args, 3, :monotonic) # invoke, f, T, args...
 
+# define applicable(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to applicable
+function Core.kwcall(kwargs::NamedTuple, ::typeof(applicable), @nospecialize(args...))
+    @inline
+    return applicable(Core.kwcall, kwargs, args...)
+end
+function Core._hasmethod(@nospecialize(f), @nospecialize(t)) # this function has a special tfunc (TODO: make this a Builtin instead like applicable)
+    tt = rewrap_unionall(Tuple{Core.Typeof(f), (unwrap_unionall(t)::DataType).parameters...}, t)
+    return Core._hasmethod(tt)
+end
+
+
 # core operations & types
 include("promotion.jl")
 include("tuple.jl")
@@ -137,7 +169,7 @@ include("refpointer.jl")
 delete_method(which(Pair{Any,Any}, (Any, Any)))
 @eval function (P::Type{Pair{A, B}})(@nospecialize(a), @nospecialize(b)) where {A, B}
     @inline
-    return $(Expr(:new, :P, :(convert(A, a)), :(convert(B, b))))
+    return $(Expr(:new, :P, :(a isa A ? a : convert(A, a)), :(b isa B ? b : convert(B, b))))
 end
 
 # The REPL stdlib hooks into Base using this Ref
@@ -186,10 +218,6 @@ end
 include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
 include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
 
-# These used to be in build_h.jl and are retained for backwards compatibility
-const libblas_name = "libblastrampoline"
-const liblapack_name = "libblastrampoline"
-
 # numeric operations
 include("hashing.jl")
 include("rounding.jl")
@@ -290,6 +318,11 @@ include("sysinfo.jl")
 include("libc.jl")
 using .Libc: getpid, gethostname, time
 
+# These used to be in build_h.jl and are retained for backwards compatibility.
+# NOTE: keep in sync with `libblastrampoline_jll.libblastrampoline`.
+const libblas_name = "libblastrampoline" * (Sys.iswindows() ? "-5" : "")
+const liblapack_name = libblas_name
+
 # Logging
 include("logging.jl")
 using .CoreLogging
@@ -329,6 +362,7 @@ include("math.jl")
 using .Math
 const (√)=sqrt
 const (∛)=cbrt
+const (∜)=fourthroot
 
 # now switch to a simple, race-y TLS, relative include for the rest of Base
 delete_method(which(include, (Module, String)))
@@ -424,17 +458,15 @@ include("loading.jl")
 # misc useful functions & macros
 include("timing.jl")
 include("util.jl")
-
+include("client.jl")
 include("asyncmap.jl")
 
 # deprecated functions
 include("deprecated.jl")
-
-# Some basic documentation
+#
+# Some additional basic documentation
 include("docs/basedocs.jl")
 
-include("client.jl")
-
 # Documentation -- should always be included last in sysimg.
 include("docs/Docs.jl")
 using .Docs
@@ -467,7 +499,7 @@ in_sysimage(pkgid::PkgId) = pkgid in _sysimage_modules
 for match = _methods(+, (Int, Int), -1, get_world_counter())
     m = match.method
     delete!(push!(Set{Method}(), m), m)
-    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match)))
+    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
 
     empty!(Set())
     push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
@@ -517,6 +549,32 @@ for match = _methods(+, (Int, Int), -1, get_world_counter())
 end
 
 if is_primary_base_module
+
+# Profiling helper
+# triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
+# Needs to be in Base because Profile is no longer loaded on boot
+const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
+function profile_printing_listener()
+    profile = nothing
+    try
+        while true
+            wait(PROFILE_PRINT_COND[])
+            profile = @something(profile, require(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))
+
+            invokelatest(profile.peek_report[])
+            if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
+                println(stderr, "Saving heap snapshot...")
+                fname = invokelatest(profile.take_heap_snapshot)
+                println(stderr, "Heap snapshot saved to `$(fname)`")
+            end
+        end
+    catch ex
+        if !isa(ex, InterruptException)
+            @error "Profile printing listener crashed" exception=ex,catch_backtrace()
+        end
+    end
+end
+
 function __init__()
     # Base library init
     reinit_stdio()
@@ -529,6 +587,17 @@ function __init__()
     if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
         MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
     end
+    # Profiling helper
+    @static if !Sys.iswindows()
+        # triggering a profile via signals is not implemented on windows
+        cond = Base.AsyncCondition()
+        Base.uv_unref(cond.handle)
+        PROFILE_PRINT_COND[] = cond
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
+        errormonitor(Threads.@spawn(profile_printing_listener()))
+    end
+    # Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
+    delete!(ENV, "JULIA_WAIT_FOR_TRACY")
     nothing
 end
 
diff --git a/base/Makefile b/base/Makefile
index d92302b766988..493302af78b02 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -3,6 +3,9 @@ BUILDDIR := .
 JULIAHOME := $(abspath $(SRCDIR)/..)
 include $(JULIAHOME)/Make.inc
 
+# import LLVM_SHARED_LIB_NAME
+include $(JULIAHOME)/deps/llvm-ver.make
+
 TAGGED_RELEASE_BANNER := ""
 
 all: $(addprefix $(BUILDDIR)/,pcre_h.jl errno_h.jl build_h.jl.phony features_h.jl file_constants.jl uv_constants.jl version_git.jl.phony)
@@ -57,6 +60,7 @@ else
 	@echo "const USE_GPL_LIBS = false" >> $@
 endif
 	@echo "const libllvm_version_string = \"$$($(LLVM_CONFIG_HOST) --version)\"" >> $@
+	@echo "const libllvm_name = \"$(LLVM_SHARED_LIB_NAME)\"" >> $@
 	@echo "const VERSION_STRING = \"$(JULIA_VERSION)\"" >> $@
 	@echo "const TAGGED_RELEASE_BANNER = \"$(TAGGED_RELEASE_BANNER)\"" >> $@
 ifeq ($(OS),WINNT)
@@ -66,6 +70,7 @@ ifeq ($(OS),WINNT)
 	@printf 'const LIBDIR = "%s"\n' '$(subst /,\\,$(libdir_rel))' >> $@
 	@printf 'const LIBEXECDIR = "%s"\n' '$(subst /,\\,$(libexecdir_rel))' >> $@
 	@printf 'const PRIVATE_LIBDIR = "%s"\n' '$(subst /,\\,$(private_libdir_rel))' >> $@
+	@printf 'const PRIVATE_LIBEXECDIR = "%s"\n' '$(subst /,\\,$(private_libexecdir_rel))' >> $@
 	@printf 'const INCLUDEDIR = "%s"\n' '$(subst /,\\,$(includedir_rel))' >> $@
 else
 	@echo "const SYSCONFDIR = \"$(sysconfdir_rel)\"" >> $@
@@ -74,6 +79,7 @@ else
 	@echo "const LIBDIR = \"$(libdir_rel)\"" >> $@
 	@echo "const LIBEXECDIR = \"$(libexecdir_rel)\"" >> $@
 	@echo "const PRIVATE_LIBDIR = \"$(private_libdir_rel)\"" >> $@
+	@echo "const PRIVATE_LIBEXECDIR = \"$(private_libexecdir_rel)\"" >> $@
 	@echo "const INCLUDEDIR = \"$(includedir_rel)\"" >> $@
 endif
 ifeq ($(DARWIN_FRAMEWORK), 1)
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 256c5262b9bcd..cb3956eb7c6d4 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -113,6 +113,7 @@ has_offset_axes(A::AbstractVector) = Int(firstindex(A))::Int != 1 # improve perf
 # note: this could call `any` directly if the compiler can infer it
 has_offset_axes(As...) = _any_tuple(has_offset_axes, false, As...)
 has_offset_axes(::Colon) = false
+has_offset_axes(::Array) = false
 
 """
     require_one_based_indexing(A::AbstractArray)
@@ -182,11 +183,13 @@ CartesianIndex{2}
      For arrays, this function requires at least Julia 1.2.
 """
 keytype(a::AbstractArray) = keytype(typeof(a))
+keytype(::Type{Union{}}, slurp...) = eltype(Union{})
 
 keytype(A::Type{<:AbstractArray}) = CartesianIndex{ndims(A)}
 keytype(A::Type{<:AbstractVector}) = Int
 
 valtype(a::AbstractArray) = valtype(typeof(a))
+valtype(::Type{Union{}}, slurp...) = eltype(Union{})
 
 """
     valtype(T::Type{<:AbstractArray})
@@ -231,7 +234,7 @@ UInt8
 ```
 """
 eltype(::Type) = Any
-eltype(::Type{Bottom}) = throw(ArgumentError("Union{} does not have elements"))
+eltype(::Type{Bottom}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
 eltype(x) = eltype(typeof(x))
 eltype(::Type{<:AbstractArray{E}}) where {E} = @isdefined(E) ? E : Any
 
@@ -267,6 +270,7 @@ julia> ndims(A)
 """
 ndims(::AbstractArray{T,N}) where {T,N} = N
 ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
+ndims(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
 
 """
     length(collection) -> Integer
@@ -766,6 +770,8 @@ false
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i) =
     throw(ArgumentError("unable to check bounds for indices of type $(typeof(i))"))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i::Real) = (first(inds) <= i) & (i <= last(inds))
+checkindex(::Type{Bool}, inds::IdentityUnitRange, i::Real) = checkindex(Bool, inds.indices, i)
+checkindex(::Type{Bool}, inds::OneTo{T}, i::T) where {T<:BitInteger} = unsigned(i - one(i)) < unsigned(last(inds))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Colon) = true
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Slice) = true
 function checkindex(::Type{Bool}, inds::AbstractUnitRange, r::AbstractRange)
@@ -1982,12 +1988,16 @@ julia> cat(1, [2], [3;;]; dims=Val(2))
 
 # The specializations for 1 and 2 inputs are important
 # especially when running with --inline=no, see #11158
+# The specializations for Union{AbstractVecOrMat,Number} are necessary
+# to have more specialized methods here than in LinearAlgebra/uniformscaling.jl
 vcat(A::AbstractArray) = cat(A; dims=Val(1))
 vcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(1))
 vcat(A::AbstractArray...) = cat(A...; dims=Val(1))
+vcat(A::Union{AbstractVecOrMat,Number}...) = cat(A...; dims=Val(1))
 hcat(A::AbstractArray) = cat(A; dims=Val(2))
 hcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(2))
 hcat(A::AbstractArray...) = cat(A...; dims=Val(2))
+hcat(A::Union{AbstractVecOrMat,Number}...) = cat(A...; dims=Val(2))
 
 typed_vcat(T::Type, A::AbstractArray) = _cat_t(Val(1), T, A)
 typed_vcat(T::Type, A::AbstractArray, B::AbstractArray) = _cat_t(Val(1), T, A, B)
@@ -2137,6 +2147,8 @@ end
 
 hvcat(rows::Tuple{Vararg{Int}}, xs::Number...) = typed_hvcat(promote_typeof(xs...), rows, xs...)
 hvcat(rows::Tuple{Vararg{Int}}, xs...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
+# the following method is needed to provide a more specific one compared to LinearAlgebra/uniformscaling.jl
+hvcat(rows::Tuple{Vararg{Int}}, xs::Union{AbstractVecOrMat,Number}...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, xs::Number...) where T
     nr = length(rows)
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index c523a25cecd3f..9dba5369a2a66 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -536,12 +536,12 @@ function hash(a::AbstractDict, h::UInt)
     hash(hv, h)
 end
 
-function getindex(t::AbstractDict, key)
+function getindex(t::AbstractDict{<:Any,V}, key) where V
     v = get(t, key, secret_table_token)
     if v === secret_table_token
         throw(KeyError(key))
     end
-    return v
+    return v::V
 end
 
 # t[k1,k2,ks...] is syntactic sugar for t[(k1,k2,ks...)].  (Note
@@ -560,8 +560,6 @@ function get!(default::Callable, t::AbstractDict{K,V}, key) where K where V
 end
 
 push!(t::AbstractDict, p::Pair) = setindex!(t, p.second, p.first)
-push!(t::AbstractDict, p::Pair, q::Pair) = push!(push!(t, p), q)
-push!(t::AbstractDict, p::Pair, q::Pair, r::Pair...) = push!(push!(push!(t, p), q), r...)
 
 # AbstractDicts are convertible
 convert(::Type{T}, x::T) where {T<:AbstractDict} = x
diff --git a/base/array.jl b/base/array.jl
index 694a3913cacf4..68e3e38992731 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -122,8 +122,50 @@ const DenseVecOrMat{T} = Union{DenseVector{T}, DenseMatrix{T}}
 
 using Core: arraysize, arrayset, const_arrayref
 
+"""
+    @_safeindex
+
+This internal macro converts:
+- `getindex(xs::Tuple, )` -> `__inbounds_getindex(args...)`
+- `setindex!(xs::Vector, args...)` -> `__inbounds_setindex!(xs, args...)`
+to tell the compiler that indexing operations within the applied expression are always
+inbounds and do not need to taint `:consistent` and `:nothrow`.
+"""
+macro _safeindex(ex)
+    return esc(_safeindex(__module__, ex))
+end
+function _safeindex(__module__, ex)
+    isa(ex, Expr) || return ex
+    if ex.head === :(=)
+        lhs = arrayref(true, ex.args, 1)
+        if isa(lhs, Expr) && lhs.head === :ref # xs[i] = x
+            rhs = arrayref(true, ex.args, 2)
+            xs = arrayref(true, lhs.args, 1)
+            args = Vector{Any}(undef, length(lhs.args)-1)
+            for i = 2:length(lhs.args)
+                arrayset(true, args, _safeindex(__module__, arrayref(true, lhs.args, i)), i-1)
+            end
+            return Expr(:call, GlobalRef(__module__, :__inbounds_setindex!), xs, _safeindex(__module__, rhs), args...)
+        end
+    elseif ex.head === :ref # xs[i]
+        return Expr(:call, GlobalRef(__module__, :__inbounds_getindex), ex.args...)
+    end
+    args = Vector{Any}(undef, length(ex.args))
+    for i = 1:length(ex.args)
+        arrayset(true, args, _safeindex(__module__, arrayref(true, ex.args, i)), i)
+    end
+    return Expr(ex.head, args...)
+end
+
 vect() = Vector{Any}()
-vect(X::T...) where {T} = T[ X[i] for i = 1:length(X) ]
+function vect(X::T...) where T
+    @_terminates_locally_meta
+    vec = Vector{T}(undef, length(X))
+    @_safeindex for i = 1:length(X)
+        vec[i] = X[i]
+    end
+    return vec
+end
 
 """
     vect(X...)
@@ -145,7 +187,7 @@ function vect(X...)
     return T[X...]
 end
 
-size(a::Array, d::Integer) = arraysize(a, convert(Int, d))
+size(a::Array, d::Integer) = arraysize(a, d isa Int ? d : convert(Int, d))
 size(a::Vector) = (arraysize(a,1),)
 size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
 size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
@@ -177,11 +219,11 @@ function _unsetindex!(A::Array{T}, i::Int) where {T}
     t = @_gc_preserve_begin A
     p = Ptr{Ptr{Cvoid}}(pointer(A, i))
     if !allocatedinline(T)
-        unsafe_store!(p, C_NULL)
+        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
     elseif T isa DataType
         if !datatype_pointerfree(T)
-            for j = 1:(Core.sizeof(T) ÷ Core.sizeof(Ptr{Cvoid}))
-                unsafe_store!(p, C_NULL, j)
+            for j = 1:Core.sizeof(Ptr{Cvoid}):Core.sizeof(T)
+                Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
             end
         end
     end
@@ -210,7 +252,10 @@ function bitsunionsize(u::Union)
     return sz
 end
 
+# Deprecate this, as it seems to have no documented meaning and is unused here,
+# but is frequently accessed in packages
 elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
+elsize(::Type{Union{}}, slurp...) = 0
 sizeof(a::Array) = Core.sizeof(a)
 
 function isassigned(a::Array, i::Int...)
@@ -267,7 +312,7 @@ end
 """
     unsafe_copyto!(dest::Array, do, src::Array, so, N)
 
-Copy `N` elements from a source array to a destination, starting at offset `so` in the
+Copy `N` elements from a source array to a destination, starting at the linear index `so` in the
 source and `do` in the destination (1-indexed).
 
 The `unsafe` prefix on this function indicates that no validation is performed to ensure
@@ -307,8 +352,8 @@ unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n) =
 """
     copyto!(dest, do, src, so, N)
 
-Copy `N` elements from collection `src` starting at offset `so`, to array `dest` starting at
-offset `do`. Return `dest`.
+Copy `N` elements from collection `src` starting at the linear index `so`, to array `dest` starting at
+the index `do`. Return `dest`.
 """
 function copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
     return _copyto_impl!(dest, doffs, src, soffs, n)
@@ -321,7 +366,7 @@ end
 
 function _copyto_impl!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
     n == 0 && return dest
-    n > 0 || _throw_argerror()
+    n > 0 || _throw_argerror("Number of elements to copy must be nonnegative.")
     @boundscheck checkbounds(dest, doffs:doffs+n-1)
     @boundscheck checkbounds(src, soffs:soffs+n-1)
     unsafe_copyto!(dest, doffs, src, soffs, n)
@@ -331,10 +376,7 @@ end
 # Outlining this because otherwise a catastrophic inference slowdown
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
-function _throw_argerror()
-    @noinline
-    throw(ArgumentError("Number of elements to copy must be nonnegative."))
-end
+_throw_argerror(s) = (@noinline; throw(ArgumentError(s)))
 
 copyto!(dest::Array, src::Array) = copyto!(dest, 1, src, 1, length(src))
 
@@ -344,7 +386,7 @@ copyto!(dest::Array{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, leng
 # N.B: The generic definition in multidimensional.jl covers, this, this is just here
 # for bootstrapping purposes.
 function fill!(dest::Array{T}, x) where T
-    xT = convert(T, x)
+    xT = x isa T ? x : convert(T, x)::T
     for i in eachindex(dest)
         @inbounds dest[i] = xT
     end
@@ -397,9 +439,11 @@ julia> getindex(Int8, 1, 2, 3)
 ```
 """
 function getindex(::Type{T}, vals...) where T
+    @inline
+    @_effect_free_terminates_locally_meta
     a = Vector{T}(undef, length(vals))
     if vals isa NTuple
-        @inbounds for i in 1:length(vals)
+        @_safeindex for i in 1:length(vals)
             a[i] = vals[i]
         end
     else
@@ -413,8 +457,9 @@ function getindex(::Type{T}, vals...) where T
 end
 
 function getindex(::Type{Any}, @nospecialize vals...)
+    @_effect_free_terminates_locally_meta
     a = Vector{Any}(undef, length(vals))
-    @inbounds for i = 1:length(vals)
+    @_safeindex for i = 1:length(vals)
         a[i] = vals[i]
     end
     return a
@@ -422,7 +467,7 @@ end
 getindex(::Type{Any}) = Vector{Any}()
 
 function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer)
-    ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), a, convert(eltype(a), x), length(a))
+    ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), a, x isa eltype(a) ? x : convert(eltype(a), x), length(a))
     return a
 end
 
@@ -966,9 +1011,15 @@ Dict{String, Int64} with 2 entries:
 """
 function setindex! end
 
-@eval setindex!(A::Array{T}, x, i1::Int) where {T} = arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1)
+@eval setindex!(A::Array{T}, x, i1::Int) where {T} =
+    arrayset($(Expr(:boundscheck)), A, x isa T ? x : convert(T,x)::T, i1)
 @eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@inline; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
+    (@inline; arrayset($(Expr(:boundscheck)), A, x isa T ? x : convert(T,x)::T, i1, i2, I...))
+
+__inbounds_setindex!(A::Array{T}, x, i1::Int) where {T} =
+    arrayset(false, A, convert(T,x)::T, i1)
+__inbounds_setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
+    (@inline; arrayset(false, A, convert(T,x)::T, i1, i2, I...))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
@@ -1055,26 +1106,27 @@ See also [`pushfirst!`](@ref).
 """
 function push! end
 
-function push!(a::Array{T,1}, item) where T
+function push!(a::Vector{T}, item) where T
     # convert first so we don't grow the array if the assignment won't work
-    itemT = convert(T, item)
+    itemT = item isa T ? item : convert(T, item)::T
     _growend!(a, 1)
-    @inbounds a[end] = itemT
+    @_safeindex a[length(a)] = itemT
     return a
 end
 
 # specialize and optimize the single argument case
 function push!(a::Vector{Any}, @nospecialize x)
     _growend!(a, 1)
-    arrayset(true, a, x, length(a))
+    @_safeindex a[length(a)] = x
     return a
 end
 function push!(a::Vector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
     na = length(a)
     nx = length(x)
     _growend!(a, nx)
-    for i = 1:nx
-        arrayset(true, a, x[i], na+i)
+    @_safeindex for i = 1:nx
+        a[na+i] = x[i]
     end
     return a
 end
@@ -1115,6 +1167,8 @@ See [`sizehint!`](@ref) for notes about the performance model.
 See also [`vcat`](@ref) for vectors, [`union!`](@ref) for sets,
 and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
 """
+function append! end
+
 function append!(a::Vector, items::AbstractVector)
     itemindices = eachindex(items)
     n = length(itemindices)
@@ -1128,17 +1182,21 @@ push!(a::AbstractVector, iter...) = append!(a, iter)
 
 append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
 
-function _append!(a, ::Union{HasLength,HasShape}, iter)
+function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
+    @_terminates_locally_meta
     n = length(a)
     i = lastindex(a)
     resize!(a, n+Int(length(iter))::Int)
-    @inbounds for (i, item) in zip(i+1:lastindex(a), iter)
-        a[i] = item
+    for (i, item) in zip(i+1:lastindex(a), iter)
+        if isa(a, Vector) # give better effects for builtin vectors
+            @_safeindex a[i] = item
+        else
+            a[i] = item
+        end
     end
     a
 end
-
-function _append!(a, ::IteratorSize, iter)
+function _append!(a::AbstractVector, ::IteratorSize, iter)
     for item in iter
         push!(a, item)
     end
@@ -1193,17 +1251,18 @@ pushfirst!(a::Vector, iter...) = prepend!(a, iter)
 
 prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, init=a)
 
-function _prepend!(a, ::Union{HasLength,HasShape}, iter)
+function _prepend!(a::Vector, ::Union{HasLength,HasShape}, iter)
+    @_terminates_locally_meta
     require_one_based_indexing(a)
     n = length(iter)
     _growbeg!(a, n)
     i = 0
     for item in iter
-        @inbounds a[i += 1] = item
+        @_safeindex a[i += 1] = item
     end
     a
 end
-function _prepend!(a, ::IteratorSize, iter)
+function _prepend!(a::Vector, ::IteratorSize, iter)
     n = 0
     for item in iter
         n += 1
@@ -1249,7 +1308,7 @@ function resize!(a::Vector, nl::Integer)
         _growend!(a, nl-l)
     elseif nl != l
         if nl < 0
-            throw(ArgumentError("new length must be ≥ 0"))
+            _throw_argerror("new length must be ≥ 0")
         end
         _deleteend!(a, l-nl)
     end
@@ -1329,7 +1388,7 @@ julia> pop!(Dict(1=>2))
 """
 function pop!(a::Vector)
     if isempty(a)
-        throw(ArgumentError("array must be non-empty"))
+        _throw_argerror("array must be non-empty")
     end
     item = a[end]
     _deleteend!(a, 1)
@@ -1403,24 +1462,25 @@ julia> pushfirst!([1, 2, 3, 4], 5, 6)
  4
 ```
 """
-function pushfirst!(a::Array{T,1}, item) where T
-    item = convert(T, item)
+function pushfirst!(a::Vector{T}, item) where T
+    item = item isa T ? item : convert(T, item)::T
     _growbeg!(a, 1)
-    a[1] = item
+    @_safeindex a[1] = item
     return a
 end
 
 # specialize and optimize the single argument case
 function pushfirst!(a::Vector{Any}, @nospecialize x)
     _growbeg!(a, 1)
-    a[1] = x
+    @_safeindex a[1] = x
     return a
 end
 function pushfirst!(a::Vector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
     na = length(a)
     nx = length(x)
     _growbeg!(a, nx)
-    for i = 1:nx
+    @_safeindex for i = 1:nx
         a[i] = x[i]
     end
     return a
@@ -1460,7 +1520,7 @@ julia> A
 """
 function popfirst!(a::Vector)
     if isempty(a)
-        throw(ArgumentError("array must be non-empty"))
+        _throw_argerror("array must be non-empty")
     end
     item = a[1]
     _deletebeg!(a, 1)
@@ -1490,7 +1550,7 @@ julia> insert!(Any[1:6;], 3, "here")
 """
 function insert!(a::Array{T,1}, i::Integer, item) where T
     # Throw convert error before changing the shape of the array
-    _item = convert(T, item)
+    _item = item isa T ? item : convert(T, item)::T
     _growat!(a, i, 1)
     # _growat! already did bound check
     @inbounds a[i] = _item
@@ -1600,7 +1660,7 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
         (i,s) = y
         if !(q <= i <= n)
             if i < q
-                throw(ArgumentError("indices must be unique and sorted"))
+                _throw_argerror("indices must be unique and sorted")
             else
                 throw(BoundsError())
             end
@@ -1856,7 +1916,7 @@ for (f,_f) in ((:reverse,:_reverse), (:reverse!,:_reverse!))
         $_f(A::AbstractVector, ::Colon) = $f(A, firstindex(A), lastindex(A))
         $_f(A::AbstractVector, dim::Tuple{Integer}) = $_f(A, first(dim))
         function $_f(A::AbstractVector, dim::Integer)
-            dim == 1 || throw(ArgumentError("invalid dimension $dim ≠ 1"))
+            dim == 1 || _throw_argerror(LazyString("invalid dimension ", dim, " ≠ 1"))
             return $_f(A, :)
         end
     end
@@ -1916,7 +1976,7 @@ function reverse!(v::AbstractVector, start::Integer, stop::Integer=lastindex(v))
     return v
 end
 
-# concatenations of homogeneous combinations of vectors, horizontal and vertical
+# concatenations of (in)homogeneous combinations of vectors, horizontal and vertical
 
 vcat() = Vector{Any}()
 hcat() = Vector{Any}()
@@ -1930,6 +1990,7 @@ function hcat(V::Vector{T}...) where T
     end
     return [ V[j][i]::T for i=1:length(V[1]), j=1:length(V) ]
 end
+hcat(A::Vector...) = cat(A...; dims=Val(2)) # more special than SparseArrays's hcat
 
 function vcat(arrays::Vector{T}...) where T
     n = 0
@@ -1946,6 +2007,19 @@ function vcat(arrays::Vector{T}...) where T
     end
     return arr
 end
+vcat(A::Vector...) = cat(A...; dims=Val(1)) # more special than SparseArrays's vcat
+
+# disambiguation with LinAlg/special.jl
+# Union{Number,Vector,Matrix} is for LinearAlgebra._DenseConcatGroup
+# VecOrMat{T} is for LinearAlgebra._TypedDenseConcatGroup
+hcat(A::Union{Number,Vector,Matrix}...) = cat(A...; dims=Val(2))
+hcat(A::VecOrMat{T}...) where {T} = typed_hcat(T, A...)
+vcat(A::Union{Number,Vector,Matrix}...) = cat(A...; dims=Val(1))
+vcat(A::VecOrMat{T}...) where {T} = typed_vcat(T, A...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::Union{Number,Vector,Matrix}...) =
+    typed_hvcat(promote_eltypeof(xs...), rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::VecOrMat{T}...) where {T} =
+    typed_hvcat(T, rows, xs...)
 
 _cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(Returns(1), n-1)..., length(x)))
 
@@ -2117,7 +2191,7 @@ findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitR
 function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::StepRange{T,S}) where {T,S}
     isempty(r) && return nothing
     minimum(r) <= p.x <= maximum(r) || return nothing
-    d = convert(S, p.x - first(r))
+    d = convert(S, p.x - first(r))::S
     iszero(d % step(r)) || return nothing
     return d ÷ step(r) + 1
 end
@@ -2338,7 +2412,11 @@ julia> findall(x -> x >= 0, d)
 
 ```
 """
-findall(testf::Function, A) = collect(first(p) for p in pairs(A) if testf(last(p)))
+function findall(testf::Function, A)
+    T = eltype(keys(A))
+    gen = (first(p) for p in pairs(A) if testf(last(p)))
+    isconcretetype(T) ? collect(T, gen) : collect(gen)
+end
 
 # Broadcasting is much faster for small testf, and computing
 # integer indices from logical index using findall has a negligible cost
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index 7d63375ab3549..a05a8d4dac51c 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -202,7 +202,7 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
     if n > maxpossiblecols
         colsA = [colsA[(0:maxpossiblecols-1) .+ firstindex(colsA)]; colsA[(end-maxpossiblecols+1):end]]
     else
-	    colsA = [colsA;]
+        colsA = [colsA;]
     end
     A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize, ncols)
     # Nine-slicing is accomplished using print_matrix_row repeatedly
@@ -462,8 +462,10 @@ function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String
 end
 
 
-_show_nonempty(io::IO, X::AbstractArray, prefix::String) =
+function _show_nonempty(io::IO, X::AbstractArray, prefix::String)
+    print(io, prefix)
     show_nd(io, X, (io, slice) -> _show_nonempty(io, inferencebarrier(slice), prefix, true, axes(slice)), false)
+end
 
 # a specific call path is used to show vectors (show_vector)
 _show_nonempty(::IO, ::AbstractVector, ::String) =
@@ -538,10 +540,12 @@ end
 # returning Any, as this would cause incorrect printing in e.g. `Vector[Any[1]]`,
 # because eltype(Vector) == Any so `Any` wouldn't be printed in `Any[1]`)
 typeinfo_eltype(typeinfo) = nothing # element type not precisely known
+typeinfo_eltype(typeinfo::Type{Union{}}, slurp...) = nothing
 typeinfo_eltype(typeinfo::Type{<:AbstractArray{T}}) where {T} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractDict{K,V}}) where {K,V} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractSet{T}}) where {T} = eltype(typeinfo)
 
+
 # types that can be parsed back accurately from their un-decorated representations
 function typeinfo_implicit(@nospecialize(T))
     if T === Float64 || T === Int || T === Char || T === String || T === Symbol ||
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index eb4bcfd8c76fc..a4935d060b74a 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -741,10 +741,10 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         end
         os_version = nothing
         if os == "macos"
-            os_version = extract_os_version("macos", r".*darwin([\d\.]+)")
+            os_version = extract_os_version("macos", r".*darwin([\d\.]+)"sa)
         end
         if os == "freebsd"
-            os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)")
+            os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa)
         end
         tags["os_version"] = os_version
 
@@ -798,13 +798,13 @@ function parse_dl_name_version(path::String, os::String)
     local dlregex
     if os == "windows"
         # On Windows, libraries look like `libnettle-6.dll`
-        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"
+        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"sa
     elseif os == "macos"
         # On OSX, libraries look like `libnettle.6.3.dylib`
-        dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"
+        dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"sa
     else
         # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
-        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"
+        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"sa
     end
 
     m = match(dlregex, basename(path))
@@ -904,7 +904,7 @@ function detect_cxxstring_abi()
     end
 
     function open_libllvm(f::Function)
-        for lib_name in ("libLLVM-14jl", "libLLVM", "LLVM", "libLLVMSupport")
+        for lib_name in (Base.libllvm_name, "libLLVM", "LLVM", "libLLVMSupport")
             hdl = Libdl.dlopen_e(lib_name)
             if hdl != C_NULL
                 try
@@ -1016,19 +1016,19 @@ function platforms_match(a::AbstractPlatform, b::AbstractPlatform)
 
         # Throw an error if `a` and `b` have both set non-default comparison strategies for `k`
         # and they're not the same strategy.
-        if a_comp != compare_default && b_comp != compare_default && a_comp != b_comp
+        if a_comp !== compare_default && b_comp !== compare_default && a_comp !== b_comp
             throw(ArgumentError("Cannot compare Platform objects with two different non-default comparison strategies for the same key \"$(k)\""))
         end
 
         # Select the custom comparator, if we have one.
         comparator = a_comp
-        if b_comp != compare_default
+        if b_comp !== compare_default
             comparator = b_comp
         end
 
         # Call the comparator, passing in which objects requested this comparison (one, the other, or both)
         # For some comparators this doesn't matter, but for non-symmetrical comparisons, it does.
-        if !(comparator(ak, bk, a_comp == comparator, b_comp == comparator)::Bool)
+        if !(comparator(ak, bk, a_comp === comparator, b_comp === comparator)::Bool)
             return false
         end
     end
@@ -1067,14 +1067,30 @@ function select_platform(download_info::Dict, platform::AbstractPlatform = HostP
         return nothing
     end
 
-    # At this point, we may have multiple possibilities.  E.g. if, in the future,
-    # Julia can be built without a direct dependency on libgfortran, we may match
-    # multiple tarballs that vary only within their libgfortran ABI.  To narrow it
-    # down, we just sort by triplet, then pick the last one.  This has the effect
-    # of generally choosing the latest release (e.g. a `libgfortran5` tarball
-    # rather than a `libgfortran3` tarball)
-    p = last(sort(ps, by = p -> triplet(p)))
-    return download_info[p]
+    # At this point, we may have multiple possibilities.  We now engage a multi-
+    # stage selection algorithm, where we first sort the matches by how complete
+    # the match is, e.g. preferring matches where the intersection of tags is
+    # equal to the union of the tags:
+    function match_loss(a, b)
+        a_tags = Set(keys(tags(a)))
+        b_tags = Set(keys(tags(b)))
+        return length(union(a_tags, b_tags)) - length(intersect(a_tags, b_tags))
+    end
+
+    # We prefer these better matches, and secondarily reverse-sort by triplet so
+    # as to generally choose the latest release (e.g. a `libgfortran5` tarball
+    # over a `libgfortran3` tarball).
+    ps = sort(ps, lt = (a, b) -> begin
+        loss_a = match_loss(a, platform)
+        loss_b = match_loss(b, platform)
+        if loss_a != loss_b
+            return loss_a < loss_b
+        end
+        return triplet(a) > triplet(b)
+    end)
+
+    # @invokelatest here to not get invalidated by new defs of `==(::Function, ::Function)`
+    return @invokelatest getindex(download_info, first(ps))
 end
 
 # precompiles to reduce latency (see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1025692379)
diff --git a/base/bitset.jl b/base/bitset.jl
index 8727b857bd36b..5ce07389c771e 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -15,7 +15,7 @@ mutable struct BitSet <: AbstractSet{Int}
     # 1st stored Int equals 64*offset
     offset::Int
 
-    BitSet() = new(sizehint!(zeros(UInt64, 0), 4), NO_OFFSET)
+    BitSet() = new(resize!(Vector{UInt64}(undef, 4), 0), NO_OFFSET)
 end
 
 """
@@ -38,8 +38,6 @@ end
 
 @inline intoffset(s::BitSet) = s.offset << 6
 
-eltype(::Type{BitSet}) = Int
-
 empty(s::BitSet, ::Type{Int}=Int) = BitSet()
 emptymutable(s::BitSet, ::Type{Int}=Int) = BitSet()
 
@@ -125,7 +123,7 @@ end
 
 function union!(s::BitSet, r::AbstractUnitRange{<:Integer})
     isempty(r) && return s
-    a, b = _check_bitset_bounds(first(r)), _check_bitset_bounds(last(r))
+    a, b = Int(first(r)), Int(last(r))
     cidxa = _div64(a)
     cidxb = _div64(b)
     if s.offset == NO_OFFSET
@@ -247,20 +245,7 @@ function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int,
     b1 # the new offset
 end
 
-
-@noinline _throw_bitset_bounds_err() =
-    throw(ArgumentError("elements of BitSet must be between typemin(Int) and typemax(Int)"))
-
-@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int)
-
-@inline _check_bitset_bounds(n) =
-    _is_convertible_Int(n) ? Int(n) : _throw_bitset_bounds_err()
-
-@inline _check_bitset_bounds(n::Int) = n
-
-@noinline _throw_keyerror(n) = throw(KeyError(n))
-
-@inline push!(s::BitSet, n::Integer) = _setint!(s, _check_bitset_bounds(n), true)
+@inline push!(s::BitSet, n::Integer) = _setint!(s, Int(n), true)
 
 push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
 
@@ -271,7 +256,7 @@ push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
         delete!(s, n)
         n
     else
-        _throw_keyerror(n)
+        throw(KeyError(n))
     end
 end
 
@@ -284,6 +269,7 @@ end
     end
 end
 
+@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int)
 @inline delete!(s::BitSet, n::Int) = _setint!(s, n, false)
 @inline delete!(s::BitSet, n::Integer) = _is_convertible_Int(n) ? delete!(s, Int(n)) : s
 
@@ -324,7 +310,7 @@ function symdiff!(s::BitSet, ns::AbstractSet)
 end
 
 function int_symdiff!(s::BitSet, n::Integer)
-    n0 = _check_bitset_bounds(n)
+    n0 = Int(n)
     val = !(n0 in s)
     _setint!(s, n0, val)
     s
diff --git a/base/boot.jl b/base/boot.jl
index 8e2ecd02bdf3c..43ced22c043d5 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -109,7 +109,7 @@
 
 #struct LineInfoNode
 #    module::Module
-#    method::Symbol
+#    method::Any (Union{Symbol, Method, MethodInstance})
 #    file::Symbol
 #    line::Int32
 #    inlined_at::Int32
@@ -245,7 +245,6 @@ ccall(:jl_toplevel_eval_in, Any, (Any, Any),
       (f::typeof(Typeof))(x) = ($(_expr(:meta,:nospecialize,:x)); isa(x,Type) ? Type{x} : typeof(x))
       end)
 
-
 macro nospecialize(x)
     _expr(:meta, :nospecialize, x)
 end
@@ -256,11 +255,25 @@ TypeVar(n::Symbol, @nospecialize(lb), @nospecialize(ub)) = _typevar(n, lb, ub)
 
 UnionAll(v::TypeVar, @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v, t)
 
-const Vararg = ccall(:jl_toplevel_eval_in, Any, (Any, Any), Core, _expr(:new, TypeofVararg))
+# simple convert for use by constructors of types in Core
+# note that there is no actual conversion defined here,
+# so the methods and ccall's in Core aren't permitted to use convert
+convert(::Type{Any}, @nospecialize(x)) = x
+convert(::Type{T}, x::T) where {T} = x
+cconvert(::Type{T}, x) where {T} = convert(T, x)
+unsafe_convert(::Type{T}, x::T) where {T} = x
 
-# let the compiler assume that calling Union{} as a constructor does not need
-# to be considered ever (which comes up often as Type{<:T})
-Union{}(a...) = throw(MethodError(Union{}, a))
+# dispatch token indicating a kwarg (keyword sorter) call
+function kwcall end
+# deprecated internal functions:
+kwfunc(@nospecialize(f)) = kwcall
+kwftype(@nospecialize(t)) = typeof(kwcall)
+
+# Let the compiler assume that calling Union{} as a constructor does not need
+# to be considered ever (which comes up often as Type{<:T} inference, and
+# occasionally in user code from eltype).
+Union{}(a...) = throw(ArgumentError("cannot construct a value of type Union{} for return result"))
+kwcall(kwargs, ::Type{Union{}}, a...) = Union{}(a...)
 
 Expr(@nospecialize args...) = _expr(args...)
 
@@ -369,12 +382,6 @@ include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname)
 
 eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)
 
-# dispatch token indicating a kwarg (keyword sorter) call
-function kwcall end
-# deprecated internal functions:
-kwfunc(@nospecialize(f)) = kwcall
-kwftype(@nospecialize(t)) = typeof(kwcall)
-
 mutable struct Box
     contents::Any
     Box(@nospecialize(x)) = new(x)
@@ -415,7 +422,6 @@ eval(Core, quote
     LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
         $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
     SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
-    TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))
     PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
     PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
     PhiCNode(values::Array{Any, 1}) = $(Expr(:new, :PhiCNode, :values))
@@ -447,14 +453,6 @@ function _Task(@nospecialize(f), reserved_stack::Int, completion_future)
     return ccall(:jl_new_task, Ref{Task}, (Any, Any, Int), f, completion_future, reserved_stack)
 end
 
-# simple convert for use by constructors of types in Core
-# note that there is no actual conversion defined here,
-# so the methods and ccall's in Core aren't permitted to use convert
-convert(::Type{Any}, @nospecialize(x)) = x
-convert(::Type{T}, x::T) where {T} = x
-cconvert(::Type{T}, x) where {T} = convert(T, x)
-unsafe_convert(::Type{T}, x::T) where {T} = x
-
 _is_internal(__module__) = __module__ === Core
 # can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
 macro _foldable_meta()
@@ -521,17 +519,18 @@ Symbol(s::Symbol) = s
 
 # module providing the IR object model
 module IR
+
 export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
-    NewvarNode, SSAValue, Slot, SlotNumber, TypedSlot, Argument,
+    NewvarNode, SSAValue, SlotNumber, Argument,
     PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct
+    Const, PartialStruct, InterConditional
 
 import Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
-    NewvarNode, SSAValue, Slot, SlotNumber, TypedSlot, Argument,
+    NewvarNode, SSAValue, SlotNumber, Argument,
     PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct
+    Const, PartialStruct, InterConditional
 
-end
+end # module IR
 
 # docsystem basics
 const unescape = Symbol("hygienic-scope")
@@ -590,28 +589,25 @@ println(@nospecialize a...) = println(stdout, a...)
 
 struct GeneratedFunctionStub
     gen
-    argnames::Array{Any,1}
-    spnames::Union{Nothing, Array{Any,1}}
-    line::Int
-    file::Symbol
-    expand_early::Bool
+    argnames::SimpleVector
+    spnames::SimpleVector
 end
 
-# invoke and wrap the results of @generated
-function (g::GeneratedFunctionStub)(@nospecialize args...)
+# invoke and wrap the results of @generated expression
+function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospecialize args...)
+    # args is (spvals..., argtypes...)
     body = g.gen(args...)
-    if body isa CodeInfo
-        return body
-    end
-    lam = Expr(:lambda, g.argnames,
-               Expr(Symbol("scope-block"),
+    file = source.file
+    file isa Symbol || (file = :none)
+    lam = Expr(:lambda, Expr(:argnames, g.argnames...).args,
+               Expr(:var"scope-block",
                     Expr(:block,
-                         LineNumberNode(g.line, g.file),
-                         Expr(:meta, :push_loc, g.file, Symbol("@generated body")),
+                         source,
+                         Expr(:meta, :push_loc, file, :var"@generated body"),
                          Expr(:return, body),
                          Expr(:meta, :pop_loc))))
     spnames = g.spnames
-    if spnames === nothing
+    if spnames === svec()
         return lam
     else
         return Expr(Symbol("with-static-parameters"), lam, spnames...)
@@ -850,4 +846,9 @@ struct Pair{A, B}
     end
 end
 
+function _hasmethod(@nospecialize(tt)) # this function has a special tfunc
+    world = ccall(:jl_get_tls_world_age, UInt, ())
+    return Intrinsics.not_int(ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world) === nothing)
+end
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 0478b1074c505..1e057789509ed 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -34,6 +34,9 @@ that you may be able to leverage; see the
 """
 abstract type BroadcastStyle end
 
+struct Unknown <: BroadcastStyle end
+BroadcastStyle(::Type{Union{}}, slurp...) = Unknown()  # ambiguity resolution
+
 """
 `Broadcast.Style{C}()` defines a [`BroadcastStyle`](@ref) signaling through the type
 parameter `C`. You can use this as an alternative to creating custom subtypes of `BroadcastStyle`,
@@ -45,9 +48,6 @@ struct Style{T} <: BroadcastStyle end
 
 BroadcastStyle(::Type{<:Tuple}) = Style{Tuple}()
 
-struct Unknown <: BroadcastStyle end
-BroadcastStyle(::Type{Union{}}) = Unknown()  # ambiguity resolution
-
 """
 `Broadcast.AbstractArrayStyle{N} <: BroadcastStyle` is the abstract supertype for any style
 associated with an `AbstractArray` type.
@@ -167,16 +167,28 @@ BroadcastStyle(a::AbstractArrayStyle{M}, ::DefaultArrayStyle{N}) where {M,N} =
 #    copyto!(dest::AbstractArray, bc::Broadcasted{MyStyle})
 
 struct Broadcasted{Style<:Union{Nothing,BroadcastStyle}, Axes, F, Args<:Tuple} <: Base.AbstractBroadcasted
+    style::Style
     f::F
     args::Args
     axes::Axes          # the axes of the resulting object (may be bigger than implied by `args` if this is nested inside a larger `Broadcasted`)
-end
 
-Broadcasted(f::F, args::Args, axes=nothing) where {F, Args<:Tuple} =
-    Broadcasted{typeof(combine_styles(args...))}(f, args, axes)
-function Broadcasted{Style}(f::F, args::Args, axes=nothing) where {Style, F, Args<:Tuple}
-    # using Core.Typeof rather than F preserves inferrability when f is a type
-    Broadcasted{Style, typeof(axes), Core.Typeof(f), Args}(f, args, axes)
+    Broadcasted(style::Union{Nothing,BroadcastStyle}, f::Tuple, args::Tuple) = error() # disambiguation: tuple is not callable
+    function Broadcasted(style::Union{Nothing,BroadcastStyle}, f::F, args::Tuple, axes=nothing) where {F}
+        # using Core.Typeof rather than F preserves inferrability when f is a type
+        return new{typeof(style), typeof(axes), Core.Typeof(f), typeof(args)}(style, f, args, axes)
+    end
+
+    function Broadcasted(f::F, args::Tuple, axes=nothing) where {F}
+        Broadcasted(combine_styles(args...)::BroadcastStyle, f, args, axes)
+    end
+
+    function Broadcasted{Style}(f::F, args, axes=nothing) where {Style, F}
+        return new{Style, typeof(axes), Core.Typeof(f), typeof(args)}(Style()::Style, f, args, axes)
+    end
+
+    function Broadcasted{Style,Axes,F,Args}(f, args, axes) where {Style,Axes,F,Args}
+        return new{Style, Axes, F, Args}(Style()::Style, f, args, axes)
+    end
 end
 
 struct AndAnd end
@@ -194,7 +206,7 @@ function broadcasted(::OrOr, a, bc::Broadcasted)
     broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
 end
 
-Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{Style,Axes,F,Args}) where {NewStyle,Style,Axes,F,Args} =
+Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{<:Any,Axes,F,Args}) where {NewStyle,Axes,F,Args} =
     Broadcasted{NewStyle,Axes,F,Args}(bc.f, bc.args, bc.axes)::Broadcasted{NewStyle,Axes,F,Args}
 
 function Base.show(io::IO, bc::Broadcasted{Style}) where {Style}
@@ -202,8 +214,8 @@ function Base.show(io::IO, bc::Broadcasted{Style}) where {Style}
     # Only show the style parameter if we have a set of axes — representing an instantiated
     # "outermost" Broadcasted. The styles of nested Broadcasteds represent an intermediate
     # computation that is not relevant for dispatch, confusing, and just extra line noise.
-    bc.axes isa Tuple && print(io, '{', Style, '}')
-    print(io, '(', bc.f, ", ", bc.args, ')')
+    bc.axes isa Tuple && print(io, "{", Style, "}")
+    print(io, "(", bc.f, ", ", bc.args, ")")
     nothing
 end
 
@@ -231,7 +243,7 @@ BroadcastStyle(::Type{<:Broadcasted{Style}}) where {Style} = Style()
 BroadcastStyle(::Type{<:Broadcasted{S}}) where {S<:Union{Nothing,Unknown}} =
     throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned"))
 
-argtype(::Type{Broadcasted{Style,Axes,F,Args}}) where {Style,Axes,F,Args} = Args
+argtype(::Type{BC}) where {BC<:Broadcasted} = fieldtype(BC, :args)
 argtype(bc::Broadcasted) = argtype(typeof(bc))
 
 @inline Base.eachindex(bc::Broadcasted) = _eachindex(axes(bc))
@@ -262,7 +274,7 @@ Base.@propagate_inbounds function Base.iterate(bc::Broadcasted, s)
 end
 
 Base.IteratorSize(::Type{T}) where {T<:Broadcasted} = Base.HasShape{ndims(T)}()
-Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(fieldtype(BC, 2))
+Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(fieldtype(BC, :args))
 Base.ndims(::Type{<:Broadcasted{<:AbstractArrayStyle{N},Nothing}}) where {N<:Integer} = N
 
 _maxndims(T::Type{<:Tuple}) = reduce(max, (ntuple(n -> _ndims(fieldtype(T, n)), Base._counttuple(T))))
@@ -289,14 +301,14 @@ Custom [`BroadcastStyle`](@ref)s may override this default in cases where it is
 to compute and verify the resulting `axes` on-demand, leaving the `axis` field
 of the `Broadcasted` object empty (populated with [`nothing`](@ref)).
 """
-@inline function instantiate(bc::Broadcasted{Style}) where {Style}
+@inline function instantiate(bc::Broadcasted)
     if bc.axes isa Nothing # Not done via dispatch to make it easier to extend instantiate(::Broadcasted{Style})
         axes = combine_axes(bc.args...)
     else
         axes = bc.axes
         check_broadcast_axes(axes, bc.args...)
     end
-    return Broadcasted{Style}(bc.f, bc.args, axes)
+    return Broadcasted(bc.style, bc.f, bc.args, axes)
 end
 instantiate(bc::Broadcasted{<:AbstractArrayStyle{0}}) = bc
 # Tuples don't need axes, but when they have axes (for .= assignment), we need to check them (#33020)
@@ -325,7 +337,7 @@ becomes
 This is an optional operation that may make custom implementation of broadcasting easier in
 some cases.
 """
-function flatten(bc::Broadcasted{Style}) where {Style}
+function flatten(bc::Broadcasted)
     isflat(bc) && return bc
     # concatenate the nested arguments into {a, b, c, d}
     args = cat_nested(bc)
@@ -341,7 +353,7 @@ function flatten(bc::Broadcasted{Style}) where {Style}
         newf = @inline function(args::Vararg{Any,N}) where N
             f(makeargs(args...)...)
         end
-        return Broadcasted{Style}(newf, args, bc.axes)
+        return Broadcasted(bc.style, newf, args, bc.axes)
     end
 end
 
@@ -732,17 +744,21 @@ broadcastable(x) = collect(x)
 broadcastable(::Union{AbstractDict, NamedTuple}) = throw(ArgumentError("broadcasting over dictionaries and `NamedTuple`s is reserved"))
 
 ## Computation of inferred result type, for empty and concretely inferred cases only
-_broadcast_getindex_eltype(bc::Broadcasted) = Base._return_type(bc.f, eltypes(bc.args))
+_broadcast_getindex_eltype(bc::Broadcasted) = combine_eltypes(bc.f, bc.args)
 _broadcast_getindex_eltype(A) = eltype(A)  # Tuple, Array, etc.
 
 eltypes(::Tuple{}) = Tuple{}
-eltypes(t::Tuple{Any}) = Tuple{_broadcast_getindex_eltype(t[1])}
-eltypes(t::Tuple{Any,Any}) = Tuple{_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2])}
-eltypes(t::Tuple) = Tuple{_broadcast_getindex_eltype(t[1]), eltypes(tail(t)).types...}
+eltypes(t::Tuple{Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]))
+eltypes(t::Tuple{Any,Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2]))
+# eltypes(t::Tuple) = (TT = eltypes(tail(t)); TT === Union{} ? Union{} : Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), TT.parameters...))
+eltypes(t::Tuple) = Iterators.TupleOrBottom(ntuple(i -> _broadcast_getindex_eltype(t[i]), Val(length(t)))...)
 
 # Inferred eltype of result of broadcast(f, args...)
-combine_eltypes(f, args::Tuple) =
-    promote_typejoin_union(Base._return_type(f, eltypes(args)))
+function combine_eltypes(f, args::Tuple)
+    argT = eltypes(args)
+    argT === Union{} && return Union{}
+    return promote_typejoin_union(Base._return_type(f, argT))
+end
 
 ## Broadcasting core
 
@@ -891,11 +907,11 @@ materialize(x) = x
     return materialize!(dest, instantiate(Broadcasted(identity, (x,), axes(dest))))
 end
 
-@inline function materialize!(dest, bc::Broadcasted{Style}) where {Style}
+@inline function materialize!(dest, bc::Broadcasted{<:Any})
     return materialize!(combine_styles(dest, bc), dest, bc)
 end
-@inline function materialize!(::BroadcastStyle, dest, bc::Broadcasted{Style}) where {Style}
-    return copyto!(dest, instantiate(Broadcasted{Style}(bc.f, bc.args, axes(dest))))
+@inline function materialize!(::BroadcastStyle, dest, bc::Broadcasted{<:Any})
+    return copyto!(dest, instantiate(Broadcasted(bc.style, bc.f, bc.args, axes(dest))))
 end
 
 ## general `copy` methods
@@ -905,7 +921,7 @@ copy(bc::Broadcasted{<:Union{Nothing,Unknown}}) =
 
 const NonleafHandlingStyles = Union{DefaultArrayStyle,ArrayConflict}
 
-@inline function copy(bc::Broadcasted{Style}) where {Style}
+@inline function copy(bc::Broadcasted)
     ElType = combine_eltypes(bc.f, bc.args)
     if Base.isconcretetype(ElType)
         # We can trust it and defer to the simpler `copyto!`
@@ -964,7 +980,7 @@ broadcast_unalias(::Nothing, src) = src
 # Preprocessing a `Broadcasted` does two things:
 # * unaliases any arguments from `dest`
 # * "extrudes" the arguments where it is advantageous to pre-compute the broadcasted indices
-@inline preprocess(dest, bc::Broadcasted{Style}) where {Style} = Broadcasted{Style}(bc.f, preprocess_args(dest, bc.args), bc.axes)
+@inline preprocess(dest, bc::Broadcasted) = Broadcasted(bc.style, bc.f, preprocess_args(dest, bc.args), bc.axes)
 preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
 
 @inline preprocess_args(dest, args::Tuple) = (preprocess(dest, args[1]), preprocess_args(dest, tail(args))...)
@@ -1034,11 +1050,11 @@ ischunkedbroadcast(R, args::Tuple{<:BroadcastedChunkableOp,Vararg{Any}}) = ischu
 ischunkedbroadcast(R, args::Tuple{}) = true
 
 # Convert compatible functions to chunkable ones. They must also be green-lighted as ChunkableOps
-liftfuncs(bc::Broadcasted{Style}) where {Style} = Broadcasted{Style}(bc.f, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(sign)}) where {Style} = Broadcasted{Style}(identity, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(!)}) where {Style} = Broadcasted{Style}(~, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(*)}) where {Style} = Broadcasted{Style}(&, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(==)}) where {Style} = Broadcasted{Style}((~)∘(xor), map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,<:Any}) = Broadcasted(bc.style, bc.f, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(sign)}) = Broadcasted(bc.style, identity, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(!)}) = Broadcasted(bc.style, ~, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(*)}) = Broadcasted(bc.style, &, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(==)}) = Broadcasted(bc.style, (~)∘(xor), map(liftfuncs, bc.args), bc.axes)
 liftfuncs(x) = x
 
 liftchunks(::Tuple{}) = ()
@@ -1311,7 +1327,7 @@ end
         return broadcasted((args...) -> f(args...; kwargs...), args...)
     end
 end
-@inline function broadcasted(f, args...)
+@inline function broadcasted(f::F, args...) where {F}
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(args′...), f, args′...)
 end
@@ -1319,18 +1335,18 @@ end
 # the totally generic varargs broadcasted(f, args...) method above loses Type{T}s in
 # mapping broadcastable across the args. These additional methods with explicit
 # arguments ensure we preserve Type{T}s in the first or second argument position.
-@inline function broadcasted(f, arg1, args...)
+@inline function broadcasted(f::F, arg1, args...) where {F}
     arg1′ = broadcastable(arg1)
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(arg1′, args′...), f, arg1′, args′...)
 end
-@inline function broadcasted(f, arg1, arg2, args...)
+@inline function broadcasted(f::F, arg1, arg2, args...) where {F}
     arg1′ = broadcastable(arg1)
     arg2′ = broadcastable(arg2)
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(arg1′, arg2′, args′...), f, arg1′, arg2′, args′...)
 end
-@inline broadcasted(::S, f, args...) where S<:BroadcastStyle = Broadcasted{S}(f, args)
+@inline broadcasted(style::BroadcastStyle, f::F, args...) where {F} = Broadcasted(style, f, args)
 
 """
     BroadcastFunction{F} <: Function
diff --git a/base/c.jl b/base/c.jl
index cfff070973f25..d94447650b9fb 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -565,9 +565,9 @@ end
 """
     ccall_macro_parse(expression)
 
-`ccall_macro_parse` is an implementation detail of `@ccall
+`ccall_macro_parse` is an implementation detail of `@ccall`.
 
-it takes an expression like `:(printf("%d"::Cstring, value::Cuint)::Cvoid)`
+It takes an expression like `:(printf("%d"::Cstring, value::Cuint)::Cvoid)`
 returns: a tuple of `(function_name, return_type, arg_types, args)`
 
 The above input outputs this:
diff --git a/base/channels.jl b/base/channels.jl
index aa4d913dcdadd..1b5b427f92671 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -183,7 +183,8 @@ Close a channel. An exception (optionally given by `excp`), is thrown by:
 * [`put!`](@ref) on a closed channel.
 * [`take!`](@ref) and [`fetch`](@ref) on an empty, closed channel.
 """
-function close(c::Channel, excp::Exception=closed_exception())
+close(c::Channel) = close(c, closed_exception()) # nospecialize on default arg seems to confuse makedocs
+function close(c::Channel, @nospecialize(excp::Exception))
     lock(c)
     try
         c.excp = excp
@@ -252,6 +253,7 @@ Stacktrace:
 """
 function bind(c::Channel, task::Task)
     T = Task(() -> close_chnl_on_taskdone(task, c))
+    T.sticky = false
     _wait2(task, T)
     return c
 end
diff --git a/base/client.jl b/base/client.jl
index 7cf6dc334b240..dd529dad5281e 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -132,14 +132,14 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
             end
             if lasterr !== nothing
                 lasterr = scrub_repl_backtrace(lasterr)
-                istrivialerror(lasterr) || setglobal!(Main, :err, lasterr)
+                istrivialerror(lasterr) || setglobal!(Base.MainInclude, :err, lasterr)
                 invokelatest(display_error, errio, lasterr)
                 errcount = 0
                 lasterr = nothing
             else
                 ast = Meta.lower(Main, ast)
                 value = Core.eval(Main, ast)
-                setglobal!(Main, :ans, value)
+                setglobal!(Base.MainInclude, :ans, value)
                 if !(value === nothing) && show_value
                     if have_color
                         print(answer_color())
@@ -159,7 +159,7 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
             end
             errcount += 1
             lasterr = scrub_repl_backtrace(current_exceptions())
-            setglobal!(Main, :err, lasterr)
+            setglobal!(Base.MainInclude, :err, lasterr)
             if errcount > 2
                 @error "It is likely that something important is broken, and Julia will not be able to continue normally" errcount
                 break
@@ -478,6 +478,25 @@ function include(fname::AbstractString)
     Base._include(identity, Main, fname)
 end
 eval(x) = Core.eval(Main, x)
+
+"""
+    ans
+
+A variable referring to the last computed value, automatically imported to the interactive prompt.
+"""
+global ans = nothing
+
+"""
+    err
+
+A variable referring to the last thrown errors, automatically imported to the interactive prompt.
+The thrown errors are collected in a stack of exceptions.
+"""
+global err = nothing
+
+# weakly exposes ans and err variables to Main
+export ans, err
+
 end
 
 """
diff --git a/base/cmd.jl b/base/cmd.jl
index e6691835e80c9..9e274b61b5e9e 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -462,7 +462,7 @@ function cmd_gen(parsed)
         (ignorestatus, flags, env, dir) = (cmd.ignorestatus, cmd.flags, cmd.env, cmd.dir)
         append!(args, cmd.exec)
         for arg in tail(parsed)
-            append!(args, arg_gen(arg...)::Vector{String})
+            append!(args, Base.invokelatest(arg_gen, arg...)::Vector{String})
         end
         return Cmd(Cmd(args), ignorestatus, flags, env, dir)
     else
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index 9c753560e3f82..d09a5b6c0ce83 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -164,7 +164,10 @@ end
 Permute vector `v` in-place, according to permutation `p`. No checking is done
 to verify that `p` is a permutation.
 
-To return a new permutation, use `v[p]`. Note that this is faster than `permute!(v, p)`.
+To return a new permutation, use `v[p]`. This is generally faster than `permute!(v, p)`;
+it is even faster to write into a pre-allocated output array with `u .= @view v[p]`.
+(Even though `permute!` overwrites `v` in-place, it internally requires some allocation
+to keep track of which elements have been moved.)
 
 See also [`invpermute!`](@ref).
 
@@ -215,6 +218,10 @@ end
 
 Like [`permute!`](@ref), but the inverse of the given permutation is applied.
 
+Note that if you have a pre-allocated output array (e.g. `u = similar(v)`),
+it is quicker to instead employ `u[p] = v`.  (`invpermute!` internally
+allocates a copy of the data.)
+
 # Examples
 ```jldoctest
 julia> A = [1, 1, 3, 4];
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 7761ef1ce6f90..097eb7a5d098e 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1,81 +1,27 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#############
-# constants #
-#############
-
-const _REF_NAME = Ref.body.name
-
-#########
-# logic #
-#########
-
 # See if the inference result of the current statement's result value might affect
 # the final answer for the method (aside from optimization potential and exceptions).
 # To do that, we need to check both for slot assignment and SSA usage.
-call_result_unused(frame::InferenceState, currpc::Int) =
-    isexpr(frame.src.code[currpc], :call) && isempty(frame.ssavalue_uses[currpc])
+call_result_unused(sv::InferenceState, currpc::Int) =
+    isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
 call_result_unused(si::StmtInfo) = !si.used
 
-function get_max_methods(mod::Module, interp::AbstractInterpreter)
-    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
-    max_methods < 0 ? InferenceParams(interp).max_methods : max_methods
-end
-
-function get_max_methods(@nospecialize(f), mod::Module, interp::AbstractInterpreter)
-    if f !== nothing
-        fmm = typeof(f).name.max_methods
-        fmm !== UInt8(0) && return Int(fmm)
-    end
-    return get_max_methods(mod, interp)
-end
-
-function should_infer_this_call(sv::InferenceState)
-    if sv.params.unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-
-function should_infer_for_effects(sv::InferenceState)
-    effects = Effects(sv)
-    return is_terminates(effects) && is_effect_free(effects)
-end
-
 function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                                   arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
-                                  sv::InferenceState, max_methods::Int)
+                                  sv::AbsIntState, max_methods::Int)
     ⊑ₚ = ⊑(ipo_lattice(interp))
-    if !should_infer_this_call(sv)
+    if !should_infer_this_call(interp, sv)
         add_remark!(interp, sv, "Skipped call in throw block")
-        nonoverlayed = false
-        if isoverlayed(method_table(interp)) && is_nonoverlayed(sv.ipo_effects)
-            # as we may want to concrete-evaluate this frame in cases when there are
-            # no overlayed calls, try an additional effort now to check if this call
-            # isn't overlayed rather than just handling it conservatively
-            matches = find_matching_methods(arginfo.argtypes, atype, method_table(interp),
-                InferenceParams(interp).max_union_splitting, max_methods)
-            if !isa(matches, FailedMethodMatch)
-                nonoverlayed = matches.nonoverlayed
-            end
-        else
-            nonoverlayed = true
-        end
         # At this point we are guaranteed to end up throwing on this path,
         # which is all that's required for :consistent-cy. Of course, we don't
         # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed)
+        effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed=!isoverlayed(method_table(interp)))
         return CallMeta(Any, effects, NoCallInfo())
     end
 
     argtypes = arginfo.argtypes
-    matches = find_matching_methods(argtypes, atype, method_table(interp),
+    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
         InferenceParams(interp).max_union_splitting, max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
@@ -96,24 +42,19 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     all_effects = EFFECTS_TOTAL
     if !matches.nonoverlayed
         # currently we don't have a good way to execute the overlayed method definition,
-        # so we should give up pure/concrete eval when any of the matched methods is overlayed
+        # so we should give up concrete eval when any of the matched methods is overlayed
         f = nothing
         all_effects = Effects(all_effects; nonoverlayed=false)
     end
 
-    # try pure-evaluation
-    val = pure_eval_call(interp, f, applicable, arginfo)
-    val !== nothing && return CallMeta(val, all_effects, MethodResultPure(info)) # TODO: add some sort of edge(s)
-
     𝕃ₚ = ipo_lattice(interp)
     for i in 1:napplicable
         match = applicable[i]::MethodMatch
         method = match.method
         sig = match.spec_types
-        if bail_out_toplevel_call(interp, sig, sv)
+        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
             # only infer concrete call sites in top-level expressions
             add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            rettype = Any
             break
         end
         this_rt = Bottom
@@ -183,7 +124,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
         seen += 1
         rettype = tmerge(𝕃ₚ, rettype, this_rt)
-        if has_conditional(𝕃ₚ) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
+        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
             if conditionals === nothing
                 conditionals = Any[Bottom for _ in 1:length(argtypes)],
                                Any[Bottom for _ in 1:length(argtypes)]
@@ -194,8 +135,8 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 conditionals[2][i] = tmerge(conditionals[2][i], cnd.elsetype)
             end
         end
-        if bail_out_call(interp, rettype, sv, effects)
-            add_remark!(interp, sv, "One of the matched returned maximally imprecise information. Bailing on call.")
+        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
             break
         end
     end
@@ -205,10 +146,10 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         info = ConstCallInfo(info, const_results)
     end
 
-    if seen != napplicable
-        # there may be unanalyzed effects within unseen dispatch candidate,
-        # but we can still ignore nonoverlayed effect here since we already accounted for it
-        all_effects = merge_effects(all_effects, EFFECTS_UNKNOWN)
+    if seen ≠ napplicable
+        # there is unanalyzed candidate, widen type and effects to the top
+        rettype = Any
+        all_effects = Effects()
     elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
             (!all(matches.fullmatches) || any_ambig(matches))
         # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
@@ -219,7 +160,9 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
 
     # Also considering inferring the compilation signature for this method, so
     # it is available to the compiler in case it ends up needing it.
-    if infer_compilation_signature(interp) && 1 == seen == napplicable && rettype !== Any && rettype !== Union{} && !is_removable_if_unused(all_effects)
+    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
+        !is_removable_if_unused(all_effects))
         match = applicable[1]::MethodMatch
         method = match.method
         sig = match.spec_types
@@ -243,10 +186,16 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         rettype = Any
     end
     add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
-    if !isempty(sv.pclimitations) # remove self, if present
-        delete!(sv.pclimitations, sv)
-        for caller in sv.callers_in_cycle
-            delete!(sv.pclimitations, caller)
+    if isa(sv, InferenceState)
+        # TODO (#48913) implement a proper recursion handling for irinterp:
+        # This works just because currently the `:terminate` condition guarantees that
+        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+        # We should revisit this once we have a better story for handling cycles in irinterp.
+        if !isempty(sv.pclimitations) # remove self, if present
+            delete!(sv.pclimitations, sv)
+            for caller in callers_in_cycle(sv)
+                delete!(sv.pclimitations, caller)
+            end
         end
     end
     return CallMeta(rettype, all_effects, info)
@@ -260,7 +209,7 @@ struct MethodMatches
     applicable::Vector{Any}
     info::MethodMatchInfo
     valid_worlds::WorldRange
-    mt::Core.MethodTable
+    mt::MethodTable
     fullmatch::Bool
     nonoverlayed::Bool
 end
@@ -272,22 +221,23 @@ struct UnionSplitMethodMatches
     applicable_argtypes::Vector{Vector{Any}}
     info::UnionSplitInfo
     valid_worlds::WorldRange
-    mts::Vector{Core.MethodTable}
+    mts::Vector{MethodTable}
     fullmatches::Vector{Bool}
     nonoverlayed::Bool
 end
 any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches)
 
-function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
+function find_matching_methods(𝕃::AbstractLattice,
+                               argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
                                max_union_splitting::Int, max_methods::Int)
     # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    if 1 < unionsplitcost(argtypes) <= max_union_splitting
-        split_argtypes = switchtupleunion(argtypes)
+    if 1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
+        split_argtypes = switchtupleunion(𝕃, argtypes)
         infos = MethodMatchInfo[]
         applicable = Any[]
         applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
         valid_worlds = WorldRange()
-        mts = Core.MethodTable[]
+        mts = MethodTable[]
         fullmatches = Bool[]
         nonoverlayed = true
         for i in 1:length(split_argtypes)
@@ -295,7 +245,7 @@ function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), meth
             sig_n = argtypes_to_type(arg_n)
             mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
             mt === nothing && return FailedMethodMatch("Could not identify method table for call")
-            mt = mt::Core.MethodTable
+            mt = mt::MethodTable
             result = findall(sig_n, method_table; limit = max_methods)
             if result === nothing
                 return FailedMethodMatch("For one of the union split cases, too many methods matched")
@@ -334,7 +284,7 @@ function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), meth
         if mt === nothing
             return FailedMethodMatch("Could not identify method table for call")
         end
-        mt = mt::Core.MethodTable
+        mt = mt::MethodTable
         result = findall(atype, method_table; limit = max_methods)
         if result === nothing
             # this means too many methods matched
@@ -353,7 +303,7 @@ function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), meth
 end
 
 """
-    from_interprocedural!(𝕃ₚ::AbstractLattice, rt, sv::InferenceState, arginfo::ArgInfo, maybecondinfo) -> newrt
+    from_interprocedural!(𝕃ₚ::AbstractLattice, rt, sv::AbsIntState, arginfo::ArgInfo, maybecondinfo) -> newrt
 
 Converts inter-procedural return type `rt` into a local lattice element `newrt`,
 that is appropriate in the context of current local analysis frame `sv`, especially:
@@ -372,7 +322,7 @@ In such cases `maybecondinfo` should be either of:
 When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
 `tmerge`ing argument signature type of each method call.
 """
-function from_interprocedural!(𝕃ₚ::AbstractLattice, @nospecialize(rt), sv::InferenceState, arginfo::ArgInfo, @nospecialize(maybecondinfo))
+function from_interprocedural!(𝕃ₚ::AbstractLattice, @nospecialize(rt), sv::AbsIntState, arginfo::ArgInfo, @nospecialize(maybecondinfo))
     rt = collect_limitations!(rt, sv)
     if isa(rt, InterMustAlias)
         rt = from_intermustalias(rt, arginfo)
@@ -411,11 +361,13 @@ function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo)
     return widenmustalias(rt)
 end
 
-function from_interconditional(𝕃ₚ::AbstractLattice, @nospecialize(typ),
-        sv::InferenceState, (; fargs, argtypes)::ArgInfo, @nospecialize(maybecondinfo))
-    𝕃 = widenlattice(𝕃ₚ)
-    has_conditional(𝕃ₚ) || return widenconditional(typ)
+function from_interconditional(𝕃ₚ::AbstractLattice,
+    typ, sv::AbsIntState, arginfo::ArgInfo, maybecondinfo)
+    @nospecialize typ maybecondinfo
+    has_conditional(𝕃ₚ, sv) || return widenconditional(typ)
+    (; fargs, argtypes) = arginfo
     fargs === nothing && return widenconditional(typ)
+    𝕃 = widenlattice(𝕃ₚ)
     slot = 0
     alias = nothing
     thentype = elsetype = Any
@@ -507,22 +459,25 @@ function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Ve
     end
 end
 
-function add_call_backedges!(interp::AbstractInterpreter,
-    @nospecialize(rettype), all_effects::Effects,
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), all_effects::Effects,
     edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
-    sv::InferenceState)
-    # we don't need to add backedges when:
-    # - a new method couldn't refine (widen) this type and
-    # - the effects are known to not provide any useful IPO information
+    sv::AbsIntState)
+    # don't bother to add backedges when both type and effects information are already
+    # maximized to the top since a new method couldn't refine or widen them anyway
     if rettype === Any
+        # ignore the `:nonoverlayed` property if `interp` doesn't use overlayed method table
+        # since it will never be tainted anyway
         if !isoverlayed(method_table(interp))
-            # we can ignore the `nonoverlayed` property if `interp` doesn't use
-            # overlayed method table at all since it will never be tainted anyway
             all_effects = Effects(all_effects; nonoverlayed=false)
         end
-        if all_effects === Effects()
-            return
+        if (# ignore the `:noinbounds` property if `:consistent`-cy is tainted already
+            (sv isa InferenceState && sv.ipo_effects.consistent === ALWAYS_FALSE) ||
+            all_effects.consistent === ALWAYS_FALSE ||
+            # or this `:noinbounds` doesn't taint it
+            !stmt_taints_inbounds_consistency(sv))
+            all_effects = Effects(all_effects; noinbounds=false)
         end
+        all_effects === Effects() && return nothing
     end
     for edge in edges
         add_backedge!(sv, edge)
@@ -536,17 +491,22 @@ function add_call_backedges!(interp::AbstractInterpreter,
             thisfullmatch || add_mt_backedge!(sv, mt, atype)
         end
     end
+    return nothing
 end
 
 const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
 const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
 const RECURSION_MSG_HARDLIMIT = "Bounded recursion detected under hardlimit. Call was widened to force convergence."
 
-function abstract_call_method(interp::AbstractInterpreter, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, si::StmtInfo, sv::InferenceState)
+function abstract_call_method(interp::AbstractInterpreter,
+                              method::Method, @nospecialize(sig), sparams::SimpleVector,
+                              hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
     if method.name === :depwarn && isdefined(Main, :Base) && method.module === Main.Base
         add_remark!(interp, sv, "Refusing to infer into `depwarn`")
         return MethodCallResult(Any, false, false, nothing, Effects())
     end
+    sigtuple = unwrap_unionall(sig)
+    sigtuple isa DataType || return MethodCallResult(Any, false, false, nothing, Effects())
 
     # Limit argument type tuple growth of functions:
     # look through the parents list to see if there's a call to the same method
@@ -555,9 +515,10 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
     edgecycle = edgelimited = false
     topmost = nothing
 
-    for infstate in InfStackUnwind(sv)
-        if method === infstate.linfo.def
-            if infstate.linfo.specTypes::Type == sig::Type
+    for sv′ in AbsIntStackUnwind(sv)
+        infmi = frame_instance(sv′)
+        if method === infmi.def
+            if infmi.specTypes::Type == sig::Type
                 # avoid widening when detecting self-recursion
                 # TODO: merge call cycle and return right away
                 if call_result_unused(si)
@@ -573,8 +534,8 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
                 break
             end
             topmost === nothing || continue
-            if edge_matches_sv(infstate, method, sig, sparams, hardlimit, sv)
-                topmost = infstate
+            if edge_matches_sv(interp, sv′, method, sig, sparams, hardlimit, sv)
+                topmost = sv′
                 edgecycle = true
             end
         end
@@ -582,15 +543,15 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
     washardlimit = hardlimit
 
     if topmost !== nothing
-        sigtuple = unwrap_unionall(sig)::DataType
         msig = unwrap_unionall(method.sig)::DataType
         spec_len = length(msig.parameters) + 1
         ls = length(sigtuple.parameters)
+        mi = frame_instance(sv)
 
-        if method === sv.linfo.def
+        if method === mi.def
             # Under direct self-recursion, permit much greater use of reducers.
             # here we assume that complexity(specTypes) :>= complexity(sig)
-            comparison = sv.linfo.specTypes
+            comparison = mi.specTypes
             l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
             spec_len = max(spec_len, l_comparison)
         else
@@ -604,7 +565,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
         end
 
         # see if the type is actually too big (relative to the caller), and limit it if required
-        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : sv.linfo.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
+        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : mi.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
 
         if newsig !== sig
             # continue inference, but note that we've limited parameter complexity
@@ -619,9 +580,16 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
                 return MethodCallResult(Any, true, true, nothing, Effects())
             end
             add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
-            topmost = topmost::InferenceState
-            parentframe = topmost.parent
-            poison_callstack(sv, parentframe === nothing ? topmost : parentframe)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition guarantees that
+            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if isa(topmost, InferenceState)
+                parentframe = frame_parent(topmost)
+                if isa(sv, InferenceState) && isa(parentframe, InferenceState)
+                    poison_callstack!(sv, parentframe === nothing ? topmost : parentframe)
+                end
+            end
             sig = newsig
             sparams = svec()
             edgelimited = true
@@ -681,19 +649,22 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
     return MethodCallResult(rt, edgecycle, edgelimited, edge, effects)
 end
 
-function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, sv::InferenceState)
+function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
+                         method::Method, @nospecialize(sig), sparams::SimpleVector,
+                         hardlimit::Bool, sv::AbsIntState)
     # The `method_for_inference_heuristics` will expand the given method's generator if
     # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
     # The other `CodeInfo`s we inspect will already have this field inflated, so we just
     # access it directly instead (to avoid regeneration).
-    callee_method2 = method_for_inference_heuristics(method, sig, sparams) # Union{Method, Nothing}
+    world = get_world_counter(interp)
+    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world) # Union{Method, Nothing}
 
-    inf_method2 = frame.src.method_for_inference_limit_heuristics # limit only if user token match
+    inf_method2 = method_for_inference_limit_heuristics(frame) # limit only if user token match
     inf_method2 isa Method || (inf_method2 = nothing)
     if callee_method2 !== inf_method2
         return false
     end
-    if !hardlimit
+    if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
         # if this is a soft limit,
         # also inspect the parent of this edge,
         # to see if they are the same Method as sv
@@ -702,11 +673,10 @@ function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(si
 
         # check in the cycle list first
         # all items in here are mutual parents of all others
-        if !any(p::InferenceState->matches_sv(p, sv), frame.callers_in_cycle)
-            let parent = frame.parent
+        if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
+            let parent = frame_parent(frame)
                 parent !== nothing || return false
-                parent = parent::InferenceState
-                (parent.cached || parent.parent !== nothing) || return false
+                (is_cached(parent) || frame_parent(parent) !== nothing) || return false
                 matches_sv(parent, sv) || return false
             end
         end
@@ -714,7 +684,7 @@ function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(si
         # If the method defines a recursion relation, give it a chance
         # to tell us that this recursion is actually ok.
         if isdefined(method, :recursion_relation)
-            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame.linfo.specTypes])
+            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame_instance(frame).specTypes])
                 return false
             end
         end
@@ -723,11 +693,11 @@ function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(si
 end
 
 # This function is used for computing alternate limit heuristics
-function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector)
-    if isdefined(method, :generator) && method.generator.expand_early && may_invoke_generator(method, sig, sparams)
+function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
+    if isdefined(method, :generator) && !(method.generator isa Core.GeneratedFunctionStub) && may_invoke_generator(method, sig, sparams)
         method_instance = specialize_method(method, sig, sparams)
         if isa(method_instance, MethodInstance)
-            cinfo = get_staged(method_instance)
+            cinfo = get_staged(method_instance, world)
             if isa(cinfo, CodeInfo)
                 method2 = cinfo.method_for_inference_limit_heuristics
                 if method2 isa Method
@@ -739,35 +709,35 @@ function method_for_inference_heuristics(method::Method, @nospecialize(sig), spa
     return nothing
 end
 
-function matches_sv(parent::InferenceState, sv::InferenceState)
-    sv_method2 = sv.src.method_for_inference_limit_heuristics # limit only if user token match
+function matches_sv(parent::AbsIntState, sv::AbsIntState)
+    sv_method2 = method_for_inference_limit_heuristics(sv) # limit only if user token match
     sv_method2 isa Method || (sv_method2 = nothing)
-    parent_method2 = parent.src.method_for_inference_limit_heuristics # limit only if user token match
+    parent_method2 = method_for_inference_limit_heuristics(parent) # limit only if user token match
     parent_method2 isa Method || (parent_method2 = nothing)
-    return parent.linfo.def === sv.linfo.def && sv_method2 === parent_method2
+    return frame_instance(parent).def === frame_instance(sv).def && sv_method2 === parent_method2
 end
 
-function is_edge_recursed(edge::MethodInstance, sv::InferenceState)
-    return any(InfStackUnwind(sv)) do infstate
-        return edge === infstate.linfo
+function is_edge_recursed(edge::MethodInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge === frame_instance(sv)
     end
 end
 
-function is_method_recursed(method::Method, sv::InferenceState)
-    return any(InfStackUnwind(sv)) do infstate
-        return method === infstate.linfo.def
+function is_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def
     end
 end
 
-function is_constprop_edge_recursed(edge::MethodInstance, sv::InferenceState)
-    return any(InfStackUnwind(sv)) do infstate
-        return edge === infstate.linfo && any(infstate.result.overridden_by_const)
+function is_constprop_edge_recursed(edge::MethodInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge === frame_instance(sv) && is_constproped(sv)
     end
 end
 
-function is_constprop_method_recursed(method::Method, sv::InferenceState)
-    return any(InfStackUnwind(sv)) do infstate
-        return method === infstate.linfo.def && any(infstate.result.overridden_by_const)
+function is_constprop_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def && is_constproped(sv)
     end
 end
 
@@ -788,48 +758,11 @@ struct MethodCallResult
     end
 end
 
-function pure_eval_eligible(interp::AbstractInterpreter,
-    @nospecialize(f), applicable::Vector{Any}, arginfo::ArgInfo)
-    # XXX we need to check that this pure function doesn't call any overlayed method
-    return f !== nothing &&
-           length(applicable) == 1 &&
-           is_method_pure(applicable[1]::MethodMatch) &&
-           is_all_const_arg(arginfo, #=start=#2)
-end
-
-function is_method_pure(method::Method, @nospecialize(sig), sparams::SimpleVector)
-    if isdefined(method, :generator)
-        method.generator.expand_early || return false
-        mi = specialize_method(method, sig, sparams)
-        isa(mi, MethodInstance) || return false
-        staged = get_staged(mi)
-        (staged isa CodeInfo && (staged::CodeInfo).pure) || return false
-        return true
-    end
-    return method.pure
-end
-is_method_pure(match::MethodMatch) = is_method_pure(match.method, match.spec_types, match.sparams)
-
-function pure_eval_call(interp::AbstractInterpreter,
-    @nospecialize(f), applicable::Vector{Any}, arginfo::ArgInfo)
-    pure_eval_eligible(interp, f, applicable, arginfo) || return nothing
-    return _pure_eval_call(f, arginfo)
-end
-function _pure_eval_call(@nospecialize(f), arginfo::ArgInfo)
-    args = collect_const_args(arginfo, #=start=#2)
-    value = try
-        Core._apply_pure(f, args)
-    catch
-        return nothing
-    end
-    return Const(value)
-end
-
 # - true: eligible for concrete evaluation
 # - false: eligible for semi-concrete evaluation
 # - nothing: not eligible for either of it
 function concrete_eval_eligible(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::InferenceState)
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
     # disable all concrete-evaluation if this function call is tainted by some overlayed
     # method since currently there is no direct way to execute overlayed methods
     if inbounds_option() === :off
@@ -879,7 +812,7 @@ end
 
 function concrete_eval_call(interp::AbstractInterpreter,
     @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, si::StmtInfo,
-    sv::InferenceState, invokecall::Union{Nothing,InvokeCall}=nothing)
+    sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
     eligible = concrete_eval_eligible(interp, f, result, arginfo, sv)
     eligible === nothing && return false
     if eligible
@@ -906,7 +839,7 @@ end
 any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes)
 any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes)
 
-function const_prop_enabled(interp::AbstractInterpreter, sv::InferenceState, match::MethodMatch)
+function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch)
     if !InferenceParams(interp).ipo_constant_propagation
         add_remark!(interp, sv, "[constprop] Disabled by parameter")
         return false
@@ -930,7 +863,7 @@ struct ConstCallResults
         new(rt, const_result, effects, edge)
 end
 
-# TODO MustAlias forwarding
+# TODO implement MustAlias forwarding
 
 struct ConditionalArgtypes <: ForwardableArgtypes
     arginfo::ArgInfo
@@ -995,9 +928,23 @@ function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, a
     return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
 end
 
+# check if there is a cycle and duplicated inference of `mi`
+function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv::AbsIntState)
+    result.edgecycle || return false
+    if result.edgelimited
+        return is_constprop_method_recursed(mi.def::Method, sv)
+    else
+        # if the type complexity limiting didn't decide to limit the call signature (as
+        # indicated by `result.edgelimited === false`), we can relax the cycle detection
+        # by comparing `MethodInstance`s and allow inference to propagate different
+        # constant elements if the recursion is finite over the lattice
+        return is_constprop_edge_recursed(mi, sv)
+    end
+end
+
 function abstract_call_method_with_const_args(interp::AbstractInterpreter,
     result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, match::MethodMatch,
-    sv::InferenceState, invokecall::Union{Nothing,InvokeCall}=nothing)
+    sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
     if !const_prop_enabled(interp, sv, match)
         return nothing
     end
@@ -1011,18 +958,28 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter,
     isa(res, ConstCallResults) && return res
     mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
     mi === nothing && return nothing
+    if is_constprop_recursed(result, mi, sv)
+        add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+        return nothing
+    end
     # try semi-concrete evaluation
     if res::Bool && !any_conditional(arginfo)
-        mi_cache = WorldView(code_cache(interp), sv.world)
+        world = frame_world(sv)
+        mi_cache = WorldView(code_cache(interp), world)
         code = get(mi_cache, mi, nothing)
         if code !== nothing
-            ir = codeinst_to_ir(interp, code)
-            if isa(ir, IRCode)
-                irsv = IRInterpretationState(interp, ir, mi, sv.world, arginfo.argtypes)
+            irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world)
+            if irsv !== nothing
+                irsv.parent = sv
                 rt, nothrow = ir_abstract_constant_propagation(interp, irsv)
-                @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from IR interpretation"
-                if !isa(rt, Type) || typeintersect(rt, Bool) === Union{}
-                    new_effects = Effects(result.effects; nothrow=nothrow)
+                @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
+                if !(isa(rt, Type) && hasintersect(rt, Bool))
+                    ir = irsv.ir
+                    # TODO (#48913) enable double inlining pass when there are any calls
+                    # that are newly resovled by irinterp
+                    # state = InliningState(interp)
+                    # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv))
+                    new_effects = Effects(result.effects; nothrow)
                     return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi)
                 end
             end
@@ -1033,19 +990,9 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter,
     𝕃ᵢ = typeinf_lattice(interp)
     inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache)
     if inf_result === nothing
-        # if there might be a cycle, check to make sure we don't end up
-        # calling ourselves here.
-        if result.edgecycle && (result.edgelimited ?
-            is_constprop_method_recursed(match.method, sv) :
-            # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
-            # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
-            # propagate different constant elements if the recursion is finite over the lattice
-            is_constprop_edge_recursed(mi, sv))
-            add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-            return nothing
-        end
-        argtypes = has_conditional(𝕃ᵢ) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
-        inf_result = InferenceResult(mi, argtypes)
+        # fresh constant prop'
+        argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
+        inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp))
         if !any(inf_result.overridden_by_const)
             add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
             return nothing
@@ -1060,9 +1007,10 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter,
             add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
             return nothing
         end
-        @assert !isa(inf_result.result, InferenceState)
+        @assert inf_result.result !== nothing
     else
-        if isa(inf_result.result, InferenceState)
+        # found the cache for this constant prop'
+        if inf_result.result === nothing
             add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
             return nothing
         end
@@ -1074,7 +1022,7 @@ end
 # (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
 function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
     result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
-    match::MethodMatch, sv::InferenceState)
+    match::MethodMatch, sv::AbsIntState)
     method = match.method
     force = force_const_prop(interp, f, method)
     force || const_prop_entry_heuristic(interp, result, si, sv) || return nothing
@@ -1086,8 +1034,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
         return nothing
     end
     all_overridden = is_all_overridden(interp, arginfo, sv)
-    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden,
-            is_nothrow(sv.ipo_effects), sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden, sv)
         add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
         return nothing
     end
@@ -1105,7 +1052,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
     return mi
 end
 
-function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo, sv::InferenceState)
+function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo, sv::AbsIntState)
     if call_result_unused(si) && result.edgecycle
         add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
         return false
@@ -1144,12 +1091,12 @@ end
 
 # determines heuristically whether if constant propagation can be worthwhile
 # by checking if any of given `argtypes` is "interesting" enough to be propagated
-function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
+function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
     𝕃ᵢ = typeinf_lattice(interp)
     argtypes = arginfo.argtypes
     for i in 1:length(argtypes)
         a = argtypes[i]
-        if has_conditional(𝕃ᵢ) && isa(a, Conditional) && arginfo.fargs !== nothing
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && arginfo.fargs !== nothing
             is_const_prop_profitable_conditional(a, arginfo.fargs, sv) && return true
         else
             a = widenslotwrapper(a)
@@ -1182,11 +1129,11 @@ function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::Inferenc
 end
 
 # checks if all argtypes has additional information other than what `Type` can provide
-function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
+function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::AbsIntState)
     𝕃ᵢ = typeinf_lattice(interp)
     for i in 1:length(argtypes)
         a = argtypes[i]
-        if has_conditional(𝕃ᵢ) && isa(a, Conditional) && fargs !== nothing
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && fargs !== nothing
             is_const_prop_profitable_conditional(a, fargs, sv) || return false
         else
             is_forwardable_argtype(𝕃ᵢ, widenslotwrapper(a)) || return false
@@ -1202,8 +1149,8 @@ function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method:
            istopfunction(f, :setproperty!)
 end
 
-function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo,
-    nargs::Int, all_overridden::Bool, still_nothrow::Bool, _::InferenceState)
+function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
+    arginfo::ArgInfo, nargs::Int, all_overridden::Bool, sv::AbsIntState)
     argtypes = arginfo.argtypes
     if nargs > 1
         𝕃ᵢ = typeinf_lattice(interp)
@@ -1213,6 +1160,7 @@ function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecializ
             if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
                 # For static arrays, allow the constprop if we could possibly
                 # deduce nothrow as a result.
+                still_nothrow = isa(sv, InferenceState) ? is_nothrow(sv.ipo_effects) : false
                 if !still_nothrow || ismutabletype(arrty)
                     return false
                 end
@@ -1250,7 +1198,7 @@ end
 # where we would spend a lot of time, but are probably unlikely to get an improved
 # result anyway.
 function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
-    mi::MethodInstance, arginfo::ArgInfo, sv::InferenceState)
+    mi::MethodInstance, arginfo::ArgInfo, sv::AbsIntState)
     method = mi.def::Method
     if method.is_for_opaque_closure
         # Not inlining an opaque closure can be very expensive, so be generous
@@ -1281,12 +1229,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
         # If so, there will be a good chance we might be able to const prop
         # all the way through and learn something new.
         code = get(code_cache(interp), mi, nothing)
-        if isdefined(code, :inferred)
-            if isa(code, CodeInstance)
-                inferred = @atomic :monotonic code.inferred
-            else
-                inferred = code.inferred
-            end
+        if isa(code, CodeInstance)
+            inferred = @atomic :monotonic code.inferred
             # TODO propagate a specific `CallInfo` that conveys information about this call
             if inlining_policy(interp, inferred, NoCallInfo(), IR_FLAG_NULL, mi, arginfo.argtypes) !== nothing
                 return true
@@ -1298,7 +1242,6 @@ end
 
 # This is only for use with `Conditional`.
 # In general, usage of this is wrong.
-ssa_def_slot(@nospecialize(arg), sv::IRCode) = nothing
 function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
     code = sv.src.code
     init = sv.currpc
@@ -1362,17 +1305,21 @@ AbstractIterationResult(cti::Vector{Any}, info::MaybeAbstractIterationInfo) =
 # Union of Tuples of the same length is converted to Tuple of Unions.
 # returns an array of types
 function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ),
-                                sv::Union{InferenceState, IRCode})
+                                sv::AbsIntState)
     if isa(typ, PartialStruct)
         widet = typ.typ
-        if isa(widet, DataType) && widet.name === Tuple.name
-            return AbstractIterationResult(typ.fields, nothing)
+        if isa(widet, DataType)
+            if widet.name === Tuple.name
+                return AbstractIterationResult(typ.fields, nothing)
+            elseif widet.name === _NAMEDTUPLE_NAME
+                return AbstractIterationResult(typ.fields, nothing)
+            end
         end
     end
 
     if isa(typ, Const)
         val = typ.val
-        if isa(val, SimpleVector) || isa(val, Tuple)
+        if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
             return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
         end
     end
@@ -1388,7 +1335,7 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     if isa(tti, Union)
         utis = uniontypes(tti)
         if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing, EFFECTS_UNKNOWN′)
+            return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
         end
         ltp = length((utis[1]::DataType).parameters)
         for t in utis
@@ -1416,15 +1363,22 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
             va = isvarargtype(last)
             elts = Any[ fieldtype(tti0, i) for i = 1:len ]
             if va
-                elts[len] = Vararg{elts[len]}
+                if elts[len] === Union{}
+                    pop!(elts)
+                else
+                    elts[len] = Vararg{elts[len]}
+                end
             end
             return AbstractIterationResult(elts, nothing)
         end
     elseif tti0 === SimpleVector
         return AbstractIterationResult(Any[Vararg{Any}], nothing)
     elseif tti0 === Any
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, EFFECTS_UNKNOWN′)
+        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
     elseif tti0 <: Array
+        if eltype(tti0) === Union{}
+            return AbstractIterationResult(Any[], nothing)
+        end
         return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
     else
         return abstract_iteration(interp, itft, typ, sv)
@@ -1432,11 +1386,11 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
 end
 
 # simulate iteration protocol on container type up to fixpoint
-function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::Union{InferenceState, IRCode})
+function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::AbsIntState)
     if isa(itft, Const)
         iteratef = itft.val
     else
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, EFFECTS_UNKNOWN′)
+        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
     end
     @assert !isvarargtype(itertype)
     call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
@@ -1498,7 +1452,7 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
                 # ... but cannot terminate
                 if !may_have_terminated
                     #  ... and cannot have terminated prior to this loop
-                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_UNKNOWN′)
+                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
                 else
                     # iterator may have terminated prior to this loop, but not during it
                     valtype = Bottom
@@ -1521,8 +1475,7 @@ end
 
 # do apply(af, fargs...), where af is a function value
 function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
-                        sv::Union{InferenceState, IRCode},
-                        max_methods::Int = get_max_methods(sv.mod, interp))
+                        sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
     itft = argtype_by_index(argtypes, 2)
     aft = argtype_by_index(argtypes, 3)
     (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
@@ -1538,7 +1491,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
     end
     res = Union{}
     nargs = length(aargtypes)
-    splitunions = 1 < unionsplitcost(aargtypes) <= InferenceParams(interp).max_apply_union_enum
+    splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
     ctypes = [Any[aft]]
     infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
     effects = EFFECTS_TOTAL
@@ -1577,7 +1530,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
                     # This is vararg, we're not gonna be able to do any inlining,
                     # drop the info
                     info = nothing
-                    tail = tuple_tail_elem(unwrapva(ct[end]), cti)
+                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
                     push!(ctypes´, push!(ct[1:(end - 1)], tail))
                 else
                     push!(ctypes´, append!(ct[:], cti))
@@ -1590,7 +1543,9 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
     end
     retinfos = ApplyCallInfo[]
     retinfo = UnionSplitApplyCallInfo(retinfos)
-    for i = 1:length(ctypes)
+    napplicable = length(ctypes)
+    seen = 0
+    for i = 1:napplicable
         ct = ctypes[i]
         arginfo = infos[i]
         lct = length(ct)
@@ -1598,23 +1553,27 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
         for i = 1:lct-1
             cti = ct[i]
             if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(unwrapva(cti), ct[(i+1):lct])
+                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
                 resize!(ct, i)
                 break
             end
         end
         call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
+        seen += 1
         push!(retinfos, ApplyCallInfo(call.info, arginfo))
         res = tmerge(res, call.rt)
         effects = merge_effects(effects, call.effects)
-        if bail_out_apply(interp, res, sv)
-            if i != length(ctypes)
-                # No point carrying forward the info, we're not gonna inline it anyway
-                retinfo = NoCallInfo()
-            end
+        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
+            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
             break
         end
     end
+    if seen ≠ napplicable
+        # there is unanalyzed candidate, widen type and effects to the top
+        res = Any
+        effects = Effects()
+        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+    end
     # TODO: Add a special info type to capture all the iteration info.
     # For now, only propagate info if we don't also union-split the iteration
     return CallMeta(res, effects, retinfo)
@@ -1698,12 +1657,12 @@ end
 end
 
 function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
-                               sv::Union{InferenceState, IRCode}, max_methods::Int)
+                               sv::AbsIntState, max_methods::Int)
     @nospecialize f
     la = length(argtypes)
     𝕃ᵢ = typeinf_lattice(interp)
     ⊑ᵢ = ⊑(𝕃ᵢ)
-    if has_conditional(𝕃ᵢ) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
+    if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
         cnd = argtypes[2]
         if isa(cnd, Conditional)
             newcnd = widenconditional(cnd)
@@ -1742,25 +1701,37 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 end
             end
         end
-    elseif has_conditional(𝕃ᵢ) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
+    elseif has_conditional(𝕃ᵢ, sv) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
         # perform very limited back-propagation of type information for `is` and `isa`
         if f === isa
+            # try splitting value argument, based on types
             a = ssa_def_slot(fargs[2], sv)
             a2 = argtypes[2]
+            a3 = argtypes[3]
             if isa(a, SlotNumber)
-                cndt = isa_condition(a2, argtypes[3], InferenceParams(interp).max_union_splitting, rt)
+                cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting, rt)
                 if cndt !== nothing
                     return Conditional(a, cndt.thentype, cndt.elsetype)
                 end
             end
             if isa(a2, MustAlias)
                 if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
-                    cndt = isa_condition(a2, argtypes[3], InferenceParams(interp).max_union_splitting)
+                    cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting)
                     if cndt !== nothing
                         return form_mustalias_conditional(a2, cndt.thentype, cndt.elsetype)
                     end
                 end
             end
+            # try splitting type argument, based on value
+            if isdispatchelem(widenconst(a2)) && a3 isa Union && !has_free_typevars(a3) && !isa(rt, Const)
+                b = ssa_def_slot(fargs[3], sv)
+                if isa(b, SlotNumber)
+                    # !(x isa T) implies !(Type{a2} <: T)
+                    # TODO: complete splitting, based on which portions of the Union a3 for which isa_tfunc returns Const(true) or Const(false) instead of Bool
+                    elsetype = typesubtract(a3, Type{widenconst(a2)}, InferenceParams(interp).max_union_splitting)
+                    return Conditional(b, a3, elsetype)
+                end
+            end
         elseif f === (===)
             a = ssa_def_slot(fargs[2], sv)
             b = ssa_def_slot(fargs[3], sv)
@@ -1886,14 +1857,14 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An
     return CallMeta(Any, EFFECTS_UNKNOWN, NoCallInfo())
 end
 
-function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::InferenceState)
+function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState)
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
     ft === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
     isexact || return CallMeta(Any, Effects(), NoCallInfo())
     unwrapped = unwrap_unionall(types)
-    if types === Bottom || types === Any || !(unwrapped isa DataType)
+    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
         return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
     end
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
@@ -1949,10 +1920,10 @@ function invoke_rewrite(xs::Vector{Any})
     return newxs
 end
 
-function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
     if length(argtypes) == 3
         finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, 1)
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
         return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects))
     end
     return CallMeta(Nothing, Effects(), NoCallInfo())
@@ -1960,8 +1931,8 @@ end
 
 # call where the function is known exactly
 function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
-        arginfo::ArgInfo, si::StmtInfo, sv::Union{InferenceState, IRCode},
-        max_methods::Int = isa(sv, InferenceState) ? get_max_methods(f, sv.mod, interp) : 0)
+        arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+        max_methods::Int = get_max_methods(interp, f, sv))
     (; fargs, argtypes) = arginfo
     la = length(argtypes)
 
@@ -1975,9 +1946,19 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return abstract_modifyfield!(interp, argtypes, si, sv)
         elseif f === Core.finalizer
             return abstract_finalizer(interp, argtypes, sv)
+        elseif f === applicable
+            return abstract_applicable(interp, argtypes, sv, max_methods)
         end
         rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods)
         effects = builtin_effects(𝕃ᵢ, f, arginfo, rt)
+        if f === getfield && (fargs !== nothing && isexpr(fargs[end], :boundscheck)) && !is_nothrow(effects) && isa(sv, InferenceState)
+            # As a special case, we delayed tainting `noinbounds` for getfield calls in case we can prove
+            # in-boundedness indepedently. Here we need to put that back in other cases.
+            # N.B.: This isn't about the effects of the call itself, but a delayed contribution of the :boundscheck
+            # statement, so we need to merge this directly into sv, rather than modifying thte effects.
+            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; noinbounds=false,
+                consistent = (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0 ? ALWAYS_FALSE : ALWAYS_TRUE))
+        end
         return CallMeta(rt, effects, NoCallInfo())
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
@@ -2034,27 +2015,11 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             fargs = nothing
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return CallMeta(abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods).rt, EFFECTS_TOTAL, NoCallInfo())
-    elseif la == 2 &&
-           (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
-           istopfunction(f, :length)
-        # mark length(::SimpleVector) as @pure
-        return CallMeta(Const(length(svecval)), EFFECTS_TOTAL, MethodResultPure())
-    elseif la == 3 &&
-           (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
-           (a3 = argtypes[3]; isa(a3, Const)) && (idx = a3.val; isa(idx, Int)) &&
-           istopfunction(f, :getindex)
-        # mark getindex(::SimpleVector, i::Int) as @pure
-        if 1 <= idx <= length(svecval) && isassigned(svecval, idx)
-            return CallMeta(Const(getindex(svecval, idx)), EFFECTS_TOTAL, MethodResultPure())
-        end
+        return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
     elseif la == 2 && istopfunction(f, :typename)
         return CallMeta(typename_static(argtypes[2]), EFFECTS_TOTAL, MethodResultPure())
-    elseif la == 3 && istopfunction(f, :typejoin)
-        if is_all_const_arg(arginfo, #=start=#2)
-            val = _pure_eval_call(f, arginfo)
-            return CallMeta(val === nothing ? Type : val, EFFECTS_TOTAL, MethodResultPure())
-        end
+    elseif f === Core._hasmethod
+        return _hasmethod_tfunc(interp, argtypes, sv)
     end
     atype = argtypes_to_type(argtypes)
     return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
@@ -2063,7 +2028,7 @@ end
 function abstract_call_opaque_closure(interp::AbstractInterpreter,
     closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::InferenceState, check::Bool=true)
     sig = argtypes_to_type(arginfo.argtypes)
-    result = abstract_call_method(interp, closure.source, sig, Core.svec(), false, si, sv)
+    result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, si, sv)
     (; rt, edge, effects) = result
     tt = closure.typ
     sigT = (unwrap_unionall(tt)::DataType).parameters[1]
@@ -2101,12 +2066,12 @@ function most_general_argtypes(closure::PartialOpaque)
     if !isa(argt, DataType) || argt.name !== typename(Tuple)
         argt = Tuple
     end
-    return most_general_argtypes(closure.source, argt, false)
+    return Any[argt.parameters...]
 end
 
 # call where the function is any lattice element
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo,
-                       sv::Union{InferenceState, IRCode}, max_methods::Union{Int, Nothing} = isa(sv, IRCode) ? 0 : nothing)
+                       sv::AbsIntState, max_methods::Union{Int, Nothing} = nothing)
     argtypes = arginfo.argtypes
     ft = widenslotwrapper(argtypes[1])
     f = singleton_type(ft)
@@ -2129,19 +2094,19 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtIn
             return CallMeta(Any, Effects(), NoCallInfo())
         end
         # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
-        max_methods = max_methods === nothing ? get_max_methods(sv.mod, interp) : max_methods
+        max_methods = max_methods === nothing ? get_max_methods(interp, sv) : max_methods
         return abstract_call_gf_by_type(interp, nothing, arginfo, si, argtypes_to_type(argtypes), sv, max_methods)
     end
-    max_methods = max_methods === nothing ? get_max_methods(f, sv.mod, interp) : max_methods
+    max_methods = max_methods === nothing ? get_max_methods(interp, f, sv) : max_methods
     return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
 end
 
 function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
     isref = false
-    if T === Bottom
+    if unwrapva(T) === Bottom
         return Bottom
     elseif isa(T, Type)
-        if isa(T, DataType) && (T::DataType).name === _REF_NAME
+        if isa(T, DataType) && (T::DataType).name === Ref.body.name
             isref = true
             T = T.parameters[1]
             if isreturn && T === Any
@@ -2172,11 +2137,16 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
     return unwraptv(T)
 end
 
-function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::VarTable, sv::InferenceState)
+function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     f = abstract_eval_value(interp, e.args[2], vtypes, sv)
     # rt = sp_type_rewrap(e.args[3], sv.linfo, true)
-    at = Any[ sp_type_rewrap(argt, sv.linfo, false) for argt in e.args[4]::SimpleVector ]
-    pushfirst!(at, f)
+    atv = e.args[4]::SimpleVector
+    at = Vector{Any}(undef, length(atv) + 1)
+    at[1] = f
+    for i = 1:length(atv)
+        at[i + 1] = sp_type_rewrap(at[i], frame_instance(sv), false)
+        at[i + 1] === Bottom && return
+    end
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
@@ -2184,16 +2154,30 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::V
     nothing
 end
 
-function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable, Nothing}, sv::Union{InferenceState, IRCode})
+function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     rt = Any
     head = e.head
     if head === :static_parameter
         n = e.args[1]::Int
+        nothrow = false
         if 1 <= n <= length(sv.sptypes)
-            rt = sv.sptypes[n]
+            sp = sv.sptypes[n]
+            rt = sp.typ
+            nothrow = !sp.undef
         end
+        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow))
+        return rt
     elseif head === :boundscheck
         if isa(sv, InferenceState)
+            stmt = sv.src.code[sv.currpc]
+            if isexpr(stmt, :call)
+                f = abstract_eval_value(interp, stmt.args[1], vtypes, sv)
+                if f isa Const && f.val === getfield
+                    # boundscheck of `getfield` call is analyzed by tfunc potentially without
+                    # tainting :inbounds or :consistent when it's known to be nothrow
+                    @goto delay_effects_analysis
+                end
+            end
             # If there is no particular `@inbounds` for this function, then we only taint `:noinbounds`,
             # which will subsequently taint `:consistent`-cy if this function is called from another
             # function that uses `@inbounds`. However, if this `:boundscheck` is itself within an
@@ -2202,6 +2186,7 @@ function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::
             merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; noinbounds=false,
                 consistent = (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0 ? ALWAYS_FALSE : ALWAYS_TRUE))
         end
+        @label delay_effects_analysis
         rt = Bool
     elseif head === :inbounds
         @assert false && "Expected this to have been moved into flags"
@@ -2211,23 +2196,27 @@ function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::
     return rt
 end
 
-function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable, Nothing}, sv::Union{InferenceState, IRCode})
+function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     if isa(e, QuoteNode)
         return Const(e.value)
     elseif isa(e, SSAValue)
         return abstract_eval_ssavalue(e, sv)
     elseif isa(e, SlotNumber)
-        vtyp = vtypes[slot_id(e)]
-        if vtyp.undef
-            merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
+        if vtypes !== nothing
+            vtyp = vtypes[slot_id(e)]
+            if vtyp.undef
+                merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
+            end
+            return vtyp.typ
         end
-        return vtyp.typ
+        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow=false))
+        return Any
     elseif isa(e, Argument)
-        if !isa(vtypes, Nothing)
+        if vtypes !== nothing
             return vtypes[slot_id(e)].typ
         else
-            @assert isa(sv, IRCode)
-            return sv.argtypes[e.n]
+            @assert isa(sv, IRInterpretationState)
+            return sv.ir.argtypes[e.n] # TODO frame_argtypes(sv)[e.n] and remove the assertion
         end
     elseif isa(e, GlobalRef)
         return abstract_eval_globalref(interp, e, sv)
@@ -2236,7 +2225,7 @@ function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(
     return Const(e)
 end
 
-function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable, Nothing}, sv::Union{InferenceState, IRCode})
+function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     if isa(e, Expr)
         return abstract_eval_value_expr(interp, e, vtypes, sv)
     else
@@ -2245,7 +2234,7 @@ function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtyp
     end
 end
 
-function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::Union{VarTable, Nothing}, sv::Union{InferenceState, IRCode})
+function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     n = length(ea)
     argtypes = Vector{Any}(undef, n)
     @inbounds for i = 1:n
@@ -2264,33 +2253,39 @@ struct RTEffects
     RTEffects(@nospecialize(rt), effects::Effects) = new(rt, effects)
 end
 
-function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable, Nothing},
-                                      sv::Union{InferenceState, IRCode}, mi::Union{MethodInstance, Nothing})::RTEffects
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
+    si = StmtInfo(!call_result_unused(sv, sv.currpc))
+    (; rt, effects, info) = abstract_call(interp, arginfo, si, sv)
+    sv.stmt_info[sv.currpc] = info
+    # mark this call statement as DCE-elgible
+    # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret?
+    if is_removable_if_unused(effects)
+        add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
+    else
+        sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
+    end
+    return RTEffects(rt, effects)
+end
+
+function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                            sv::AbsIntState)
+    ea = e.args
+    argtypes = collect_argtypes(interp, ea, vtypes, sv)
+    if argtypes === nothing
+        return RTEffects(Bottom, Effects())
+    end
+    arginfo = ArgInfo(ea, argtypes)
+    return abstract_call(interp, arginfo, sv)
+end
+
+function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                                      sv::AbsIntState)
     effects = EFFECTS_UNKNOWN
     ehead = e.head
     𝕃ᵢ = typeinf_lattice(interp)
     ⊑ᵢ = ⊑(𝕃ᵢ)
     if ehead === :call
-        ea = e.args
-        argtypes = collect_argtypes(interp, ea, vtypes, sv)
-        if argtypes === nothing
-            rt = Bottom
-            effects = Effects()
-        else
-            arginfo = ArgInfo(ea, argtypes)
-            si = StmtInfo(isa(sv, IRCode) ? true : !call_result_unused(sv, sv.currpc))
-            (; rt, effects, info) = abstract_call(interp, arginfo, si, sv)
-            if isa(sv, InferenceState)
-                sv.stmt_info[sv.currpc] = info
-                # mark this call statement as DCE-elgible
-                # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret?
-                if is_removable_if_unused(effects)
-                    add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
-                else
-                    sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
-                end
-            end
-        end
+        (; rt, effects) = abstract_eval_call(interp, e, vtypes, sv)
         t = rt
     elseif ehead === :new
         t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
@@ -2301,7 +2296,9 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
             ismutable = ismutabletype(ut)
             fcount = datatype_fieldcount(ut)
             nargs = length(e.args) - 1
-            if fcount === nothing || (fcount > nargs && any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount))
+            if (fcount === nothing || (fcount > nargs && (let t = t
+                    any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
+                end)))
                 # allocation with undefined field leads to undefined behavior and should taint `:consistent`-cy
                 consistent = ALWAYS_FALSE
             elseif ismutable
@@ -2360,12 +2357,16 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
         if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
-            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
-                let t = t, at = at; all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
+            if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+                (let t = t, at = at
+                    all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
+                end))
                 nothrow = isexact
                 t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif isa(at, PartialStruct) && at ⊑ᵢ Tuple && n == length(at.fields::Vector{Any}) &&
-                let t = t, at = at; all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n); end
+            elseif (isa(at, PartialStruct) && at ⊑ᵢ Tuple && n > 0 && n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
+                    (let t = t, at = at, ⊑ᵢ = ⊑ᵢ
+                        all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n)
+                    end))
                 nothrow = isexact
                 t = PartialStruct(t, at.fields::Vector{Any})
             end
@@ -2384,9 +2385,9 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
             if argtypes === nothing
                 t = Bottom
             else
-                mi′ = isa(sv, InferenceState) ? sv.linfo : mi
-                t = _opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
-                    argtypes[4], argtypes[5:end], mi′)
+                mi = frame_instance(sv)
+                t = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
+                    argtypes[4], argtypes[5:end], mi)
                 if isa(t, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
                     # Infer this now so that the specialization is available to
                     # optimization.
@@ -2399,7 +2400,7 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
             end
         end
     elseif ehead === :foreigncall
-        (;rt, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv, mi)
+        (; rt, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv)
         t = rt
         if isa(sv, InferenceState)
             # mark this call statement as DCE-elgible
@@ -2430,7 +2431,7 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
         sym = e.args[1]
         t = Bool
         effects = EFFECTS_TOTAL
-        if isa(sym, SlotNumber)
+        if isa(sym, SlotNumber) && vtypes !== nothing
             vtyp = vtypes[slot_id(sym)]
             if vtyp.typ === Bottom
                 t = Const(false) # never assigned previously
@@ -2438,23 +2439,25 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
                 t = Const(true) # definitely assigned previously
             end
         elseif isa(sym, Symbol)
-            if isdefined(sv.mod, sym)
+            if isdefined(frame_module(sv), sym)
                 t = Const(true)
-            elseif sv.params.assume_bindings_static
+            elseif InferenceParams(interp).assume_bindings_static
                 t = Const(false)
             end
         elseif isa(sym, GlobalRef)
             if isdefined(sym.mod, sym.name)
                 t = Const(true)
-            elseif sv.params.assume_bindings_static
+            elseif InferenceParams(interp).assume_bindings_static
                 t = Const(false)
             end
         elseif isexpr(sym, :static_parameter)
             n = sym.args[1]::Int
             if 1 <= n <= length(sv.sptypes)
-                spty = sv.sptypes[n]
-                if isa(spty, Const)
+                sp = sv.sptypes[n]
+                if !sp.undef
                     t = Const(true)
+                elseif sp.typ === Bottom
+                    t = Const(false)
                 end
             end
         end
@@ -2482,10 +2485,10 @@ function refine_partial_type(@nospecialize t)
     return t
 end
 
-function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable, Nothing}, sv::Union{InferenceState, IRCode}, mi::Union{MethodInstance, Nothing}=nothing)
+function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     abstract_eval_value(interp, e.args[1], vtypes, sv)
-    mi′ = isa(sv, InferenceState) ? sv.linfo : mi
-    t = sp_type_rewrap(e.args[2], mi′, true)
+    mi = frame_instance(sv)
+    t = sp_type_rewrap(e.args[2], mi, true)
     for i = 3:length(e.args)
         if abstract_eval_value(interp, e.args[i], vtypes, sv) === Bottom
             return RTEffects(Bottom, EFFECTS_THROWS)
@@ -2504,12 +2507,13 @@ function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, vtypes:
             override.terminates_globally ? true        : effects.terminates,
             override.notaskstate         ? true        : effects.notaskstate,
             override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
-            effects.nonoverlayed)
+            effects.nonoverlayed,
+            effects.noinbounds)
     end
     return RTEffects(t, effects)
 end
 
-function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Union{VarTable, Nothing}, sv::Union{InferenceState, IRCode})
+function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     rt = Union{}
     for i in 1:length(phi.values)
         isassigned(phi.values, i) || continue
@@ -2519,21 +2523,22 @@ function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Un
     return rt
 end
 
-function stmt_taints_inbounds_consistency(sv::InferenceState)
-    sv.src.propagate_inbounds && return true
+function stmt_taints_inbounds_consistency(sv::AbsIntState)
+    propagate_inbounds(sv) && return true
     return (get_curr_ssaflag(sv) & IR_FLAG_INBOUNDS) != 0
 end
 
 function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
     if !isa(e, Expr)
         if isa(e, PhiNode)
+            add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE)
             return abstract_eval_phi(interp, e, vtypes, sv)
         end
         return abstract_eval_special_value(interp, e, vtypes, sv)
     end
-    (;rt, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv, nothing)
+    (; rt, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv)
     if !effects.noinbounds
-        if !sv.src.propagate_inbounds
+        if !propagate_inbounds(sv)
             # The callee read our inbounds flag, but unless we propagate inbounds,
             # we ourselves don't read our parent's inbounds.
             effects = Effects(effects; noinbounds=true)
@@ -2571,7 +2576,7 @@ function abstract_eval_globalref(g::GlobalRef)
 end
 abstract_eval_global(M::Module, s::Symbol) = abstract_eval_globalref(GlobalRef(M, s))
 
-function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, frame::Union{InferenceState, IRCode})
+function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
     rt = abstract_eval_globalref(g)
     consistent = inaccessiblememonly = ALWAYS_FALSE
     nothrow = false
@@ -2585,11 +2590,11 @@ function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, fram
         end
     elseif isdefined_globalref(g)
         nothrow = true
-    elseif isa(frame, InferenceState) && frame.params.assume_bindings_static
+    elseif InferenceParams(interp).assume_bindings_static
         consistent = inaccessiblememonly = ALWAYS_TRUE
         rt = Union{}
     end
-    merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly))
+    merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly))
     return rt
 end
 
@@ -2602,7 +2607,7 @@ function handle_global_assignment!(interp::AbstractInterpreter, frame::Inference
 end
 
 abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.ssavaluetypes)
-abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
+
 function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
     typ = ssavaluetypes[s.id]
     if typ === NOT_FOUND
@@ -2849,10 +2854,10 @@ end
 
 # make as much progress on `frame` as possible (without handling cycles)
 function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
-    @assert !frame.inferred
+    @assert !is_inferred(frame)
     frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
-    nargs = narguments(frame)
+    nargs = narguments(frame, #=include_va=#false)
     slottypes = frame.slottypes
     ssavaluetypes = frame.ssavaluetypes
     bbs = frame.cfg.blocks
@@ -2893,6 +2898,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         empty!(frame.pclimitations)
                         @goto find_next_bb
                     end
+                    orig_condt = condt
                     if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condx, SlotNumber)
                         # if this non-`Conditional` object is a slot, we form and propagate
                         # the conditional constraint on it
@@ -2924,6 +2930,14 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                             handle_control_backedge!(interp, frame, currpc, stmt.dest)
                             @goto branch
                         else
+                            if !⊑(𝕃ᵢ, orig_condt, Bool)
+                                merge_effects!(interp, frame, EFFECTS_THROWS)
+                                if !hasintersect(widenconst(orig_condt), Bool)
+                                    ssavaluetypes[currpc] = Bottom
+                                    @goto find_next_bb
+                                end
+                            end
+
                             # We continue with the true branch, but process the false
                             # branch here.
                             if isa(condt, Conditional)
@@ -2971,7 +2985,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     if !isempty(frame.limitations)
                         rt = LimitedAccuracy(rt, copy(frame.limitations))
                     end
-                    if tchanged(𝕃ₚ, rt, bestguess)
+                    if !⊑(𝕃ₚ, rt, bestguess)
                         # new (wider) return type for frame
                         bestguess = tmerge(𝕃ₚ, bestguess, rt)
                         # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
@@ -3101,7 +3115,7 @@ function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
                 typeinf_local(interp, caller)
                 no_active_ips_in_callers = false
             end
-            caller.valid_worlds = intersect(caller.valid_worlds, frame.valid_worlds)
+            update_valid_age!(caller, frame.valid_worlds)
         end
     end
     return true
diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl
index f578ec8d6f60d..a84050816cb21 100644
--- a/base/compiler/abstractlattice.jl
+++ b/base/compiler/abstractlattice.jl
@@ -293,6 +293,10 @@ has_mustalias(𝕃::AbstractLattice) = has_mustalias(widenlattice(𝕃))
 has_mustalias(::AnyMustAliasesLattice) = true
 has_mustalias(::JLTypeLattice) = false
 
+has_extended_unionsplit(𝕃::AbstractLattice) = has_extended_unionsplit(widenlattice(𝕃))
+has_extended_unionsplit(::AnyMustAliasesLattice) = true
+has_extended_unionsplit(::JLTypeLattice) = false
+
 # Curried versions
 ⊑(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(lattice, a, b)
 ⊏(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(lattice, a, b)
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index 77b36cb9c7f71..1f62d21c9d2d9 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -36,7 +36,7 @@ let interp = NativeInterpreter()
         else
             tt = Tuple{typeof(f), Vararg{Any}}
         end
-        for m in _methods_by_ftype(tt, 10, typemax(UInt))::Vector
+        for m in _methods_by_ftype(tt, 10, get_world_counter())::Vector
             # remove any TypeVars from the intersection
             m = m::MethodMatch
             typ = Any[m.spec_types.parameters...]
diff --git a/base/compiler/cicache.jl b/base/compiler/cicache.jl
index 294b1f0055f79..8332777e6d5bc 100644
--- a/base/compiler/cicache.jl
+++ b/base/compiler/cicache.jl
@@ -7,11 +7,11 @@ Internally, each `MethodInstance` keep a unique global cache of code instances
 that have been created for the given method instance, stratified by world age
 ranges. This struct abstracts over access to this cache.
 """
-struct InternalCodeCache
-end
+struct InternalCodeCache end
 
 function setindex!(cache::InternalCodeCache, ci::CodeInstance, mi::MethodInstance)
     ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
+    return cache
 end
 
 const GLOBAL_CI_CACHE = InternalCodeCache()
@@ -49,11 +49,11 @@ WorldView(wvc::WorldView, wr::WorldRange) = WorldView(wvc.cache, wr)
 WorldView(wvc::WorldView, args...) = WorldView(wvc.cache, args...)
 
 function haskey(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
-    ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))::Union{Nothing, CodeInstance} !== nothing
+    return ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds)) !== nothing
 end
 
 function get(wvc::WorldView{InternalCodeCache}, mi::MethodInstance, default)
-    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))::Union{Nothing, CodeInstance}
+    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))
     if r === nothing
         return default
     end
@@ -66,5 +66,7 @@ function getindex(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
     return r::CodeInstance
 end
 
-setindex!(wvc::WorldView{InternalCodeCache}, ci::CodeInstance, mi::MethodInstance) =
+function setindex!(wvc::WorldView{InternalCodeCache}, ci::CodeInstance, mi::MethodInstance)
     setindex!(wvc.cache, ci, mi)
+    return wvc
+end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 7213b3615e8e1..0a1b852b052f9 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -6,7 +6,7 @@ using Core.Intrinsics, Core.IR
 
 import Core: print, println, show, write, unsafe_write, stdout, stderr,
              _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
-             MethodInstance, CodeInstance, MethodMatch, PartialOpaque,
+             MethodInstance, CodeInstance, MethodTable, MethodMatch, PartialOpaque,
              TypeofVararg
 
 const getproperty = Core.getfield
@@ -152,7 +152,6 @@ include("compiler/ssair/domtree.jl")
 include("compiler/ssair/ir.jl")
 
 include("compiler/abstractlattice.jl")
-
 include("compiler/inferenceresult.jl")
 include("compiler/inferencestate.jl")
 
diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl
index 27e41bf04865d..ec64b7601bc76 100644
--- a/base/compiler/effects.jl
+++ b/base/compiler/effects.jl
@@ -54,6 +54,37 @@ analyzed method (see the implementation of `merge_effects!`). Each effect proper
 initialized with `ALWAYS_TRUE`/`true` and then transitioned towards `ALWAYS_FALSE`/`false`.
 Note that within the current flow-insensitive analysis design, effects detected by local
 analysis on each statement usually taint the global conclusion conservatively.
+
+## Key for `show` output of Effects:
+
+The output represents the state of different effect properties in the following order:
+
+1. `consistent` (`c`):
+    - `+c` (green): `ALWAYS_TRUE`
+    - `-c` (red): `ALWAYS_FALSE`
+    - `?c` (yellow): `CONSISTENT_IF_NOTRETURNED` and/or `CONSISTENT_IF_INACCESSIBLEMEMONLY`
+2. `effect_free` (`e`):
+    - `+e` (green): `ALWAYS_TRUE`
+    - `-e` (red): `ALWAYS_FALSE`
+    - `?e` (yellow): `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`
+3. `nothrow` (`n`):
+    - `+n` (green): `true`
+    - `-n` (red): `false`
+4. `terminates` (`t`):
+    - `+t` (green): `true`
+    - `-t` (red): `false`
+5. `notaskstate` (`s`):
+    - `+s` (green): `true`
+    - `-s` (red): `false`
+6. `inaccessiblememonly` (`m`):
+    - `+m` (green): `ALWAYS_TRUE`
+    - `-m` (red): `ALWAYS_FALSE`
+    - `?m` (yellow): `INACCESSIBLEMEM_OR_ARGMEMONLY`
+7. `noinbounds` (`i`):
+    - `+i` (green): `true`
+    - `-i` (red): `false`
+
+Additionally, if the `nonoverlayed` property is false, a red prime symbol (′) is displayed after the tuple.
 """
 struct Effects
     consistent::UInt8
@@ -72,7 +103,7 @@ struct Effects
         notaskstate::Bool,
         inaccessiblememonly::UInt8,
         nonoverlayed::Bool,
-        noinbounds::Bool = true)
+        noinbounds::Bool)
         return new(
             consistent,
             effect_free,
@@ -98,12 +129,12 @@ const EFFECT_FREE_IF_INACCESSIBLEMEMONLY = 0x01 << 1
 # :inaccessiblememonly bits
 const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
 
-const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  true, true)
-const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  true, true)
-const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true, true)  # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
-const EFFECTS_UNKNOWN′ = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, false, true) # unknown really
+const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  true,  true)
+const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  true,  true)
+const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true,  false)  # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
+const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, false, false) # unknown really
 
-function Effects(e::Effects = EFFECTS_UNKNOWN′;
+function Effects(e::Effects = _EFFECTS_UNKNOWN;
     consistent::UInt8 = e.consistent,
     effect_free::UInt8 = e.effect_free,
     nothrow::Bool = e.nothrow,
@@ -157,7 +188,7 @@ is_foldable(effects::Effects) =
     is_effect_free(effects) &&
     is_terminates(effects)
 
-is_total(effects::Effects) =
+is_foldable_nothrow(effects::Effects) =
     is_foldable(effects) &&
     is_nothrow(effects)
 
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index c079553fca06a..3a96b21d7c40a 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -78,15 +78,15 @@ function is_argtype_match(𝕃::AbstractLattice,
     return !overridden_by_const
 end
 
-va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, linfo::MethodInstance) =
-    va_process_argtypes(Returns(nothing), 𝕃, given_argtypes, linfo)
-function va_process_argtypes(@nospecialize(va_handler!), 𝕃::AbstractLattice, given_argtypes::Vector{Any}, linfo::MethodInstance)
-    def = linfo.def::Method
-    isva = def.isva
-    nargs = Int(def.nargs)
+va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance) =
+    va_process_argtypes(Returns(nothing), 𝕃, given_argtypes, mi)
+function va_process_argtypes(@nospecialize(va_handler!), 𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance)
+    def = mi.def
+    isva = isa(def, Method) ? def.isva : false
+    nargs = isa(def, Method) ? Int(def.nargs) : length(mi.specTypes.parameters)
     if isva || isvarargtype(given_argtypes[end])
         isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs - isva)
+        for i = 1:(nargs-isva)
             isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
         end
         if isva
@@ -100,7 +100,7 @@ function va_process_argtypes(@nospecialize(va_handler!), 𝕃::AbstractLattice,
         end
         return isva_given_argtypes
     end
-    @assert length(given_argtypes) == nargs "invalid `given_argtypes` for `linfo`"
+    @assert length(given_argtypes) == nargs "invalid `given_argtypes` for `mi`"
     return given_argtypes
 end
 
@@ -110,18 +110,16 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
     isva = !toplevel && method.isva
     linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
     nargs::Int = toplevel ? 0 : method.nargs
-    if !withfirst
-        # For opaque closure, the closure environment is processed elsewhere
-        nargs -= 1
-    end
+    # For opaque closure, the closure environment is processed elsewhere
+    withfirst || (nargs -= 1)
     cache_argtypes = Vector{Any}(undef, nargs)
     # First, if we're dealing with a varargs method, then we set the last element of `args`
     # to the appropriate `Tuple` type or `PartialStruct` instance.
     if !toplevel && isva
         if specTypes::Type == Tuple
+            linfo_argtypes = Any[Any for i = 1:nargs]
             if nargs > 1
-                linfo_argtypes = Any[Any for i = 1:nargs]
-                linfo_argtypes[end] = Vararg{Any}
+                linfo_argtypes[end] = Tuple
             end
             vargtype = Tuple
         else
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 0ae7989c82c76..97a7ed66ab9b5 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -1,22 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# The type of a variable load is either a value or an UndefVarError
-# (only used in abstractinterpret, doesn't appear in optimize)
-struct VarState
-    typ
-    undef::Bool
-    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
-end
-
-"""
-    const VarTable = Vector{VarState}
-
-The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
-Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
-Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
-to enable flow-sensitive analysis.
-"""
-const VarTable = Vector{VarState}
+# data structures
+# ===============
 
 mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
     elems::BitSet
@@ -86,15 +71,140 @@ function append!(bsbmp::BitSetBoundedMinPrioritySet, itr)
     end
 end
 
+mutable struct TwoPhaseVectorView <: AbstractVector{Int}
+    const data::Vector{Int}
+    count::Int
+    const range::UnitRange{Int}
+end
+size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
+function getindex(tpvv::TwoPhaseVectorView, i::Int)
+    checkbounds(tpvv, i)
+    @inbounds tpvv.data[first(tpvv.range) + i - 1]
+end
+function push!(tpvv::TwoPhaseVectorView, v::Int)
+    tpvv.count += 1
+    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
+    return nothing
+end
+
+"""
+    mutable struct TwoPhaseDefUseMap
+
+This struct is intended as a memory- and GC-pressure-efficient mechanism
+for incrementally computing def-use maps. The idea is that the def-use map
+is constructed into two passes over the IR. In the first, we simply count the
+the number of uses, computing the number of uses for each def as well as the
+total number of uses. In the second pass, we actually fill in the def-use
+information.
+
+The idea is that either of these two phases can be combined with other useful
+work that needs to scan the instruction stream anyway, while avoiding the
+significant allocation pressure of e.g. allocating an array for every SSA value
+or attempting to dynamically move things around as new uses are discovered.
+
+The def-use map is presented as a vector of vectors. For every def, indexing
+into the map will return a vector of uses.
+"""
+mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
+    ssa_uses::Vector{Int}
+    data::Vector{Int}
+    complete::Bool
+end
+
+function complete!(tpdum::TwoPhaseDefUseMap)
+    cumsum = 0
+    for i = 1:length(tpdum.ssa_uses)
+        this_val = cumsum + 1
+        cumsum += tpdum.ssa_uses[i]
+        tpdum.ssa_uses[i] = this_val
+    end
+    resize!(tpdum.data, cumsum)
+    fill!(tpdum.data, 0)
+    tpdum.complete = true
+end
+
+function TwoPhaseDefUseMap(nssas::Int)
+    ssa_uses = zeros(Int, nssas)
+    data = Int[]
+    complete = false
+    return TwoPhaseDefUseMap(ssa_uses, data, complete)
+end
+
+function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
+    @assert !tpdum.complete
+    tpdum.ssa_uses[arg.id] += 1
+end
+
+function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
+    if !tpdum.complete
+        tpdum.ssa_uses[def] -= 1
+    else
+        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
+        # TODO: Sorted
+        useidx = findfirst(idx->tpdum.data[idx] == use, range)
+        @assert useidx !== nothing
+        idx = range[useidx]
+        while idx < lastindex(range)
+            ndata = tpdum.data[idx+1]
+            ndata == 0 && break
+            tpdum.data[idx] = ndata
+        end
+        tpdum.data[idx + 1] = 0
+    end
+end
+kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
+    kill_def_use!(tpdum, def.id, use)
+
+function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
+    @assert tpdum.complete
+    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
+    # TODO: Make logarithmic
+    nelems = 0
+    for i in range
+        tpdum.data[i] == 0 && break
+        nelems += 1
+    end
+    return TwoPhaseVectorView(tpdum.data, nelems, range)
+end
+
+mutable struct LazyGenericDomtree{IsPostDom}
+    ir::IRCode
+    domtree::GenericDomTree{IsPostDom}
+    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
+end
+function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
+    isdefined(x, :domtree) && return x.domtree
+    return @timeit "domtree 2" x.domtree = IsPostDom ?
+        construct_postdomtree(x.ir.cfg.blocks) :
+        construct_domtree(x.ir.cfg.blocks)
+end
+
+const LazyDomtree = LazyGenericDomtree{false}
+const LazyPostDomtree = LazyGenericDomtree{true}
+
+# InferenceState
+# ==============
+
+"""
+    const VarTable = Vector{VarState}
+
+The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
+Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
+Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
+to enable flow-sensitive analysis.
+"""
+const VarTable = Vector{VarState}
+
 mutable struct InferenceState
     #= information about this method instance =#
     linfo::MethodInstance
     world::UInt
     mod::Module
-    sptypes::Vector{Any}
+    sptypes::Vector{VarState}
     slottypes::Vector{Any}
     src::CodeInfo
     cfg::CFG
+    method_info::MethodInfo
 
     #= intermediate states for local abstract interpretation =#
     currbb::Int
@@ -114,8 +224,7 @@ mutable struct InferenceState
     cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
     callers_in_cycle::Vector{InferenceState}
     dont_work_on_me::Bool
-    parent::Union{Nothing, InferenceState}
-    inferred::Bool # TODO move this to InferenceResult?
+    parent # ::Union{Nothing,AbsIntState}
 
     #= results =#
     result::InferenceResult # remember where to put the result
@@ -124,11 +233,11 @@ mutable struct InferenceState
     ipo_effects::Effects
 
     #= flags =#
-    params::InferenceParams
     # Whether to restrict inference of abstract call sites to avoid excessive work
     # Set by default for toplevel frame.
     restrict_abstract_call_sites::Bool
     cached::Bool # TODO move this to InferenceResult?
+    insert_coverage::Bool
 
     # The interpreter that created this inference state. Not looked at by
     # NativeInterpreter. But other interpreters may use this to detect cycles
@@ -136,7 +245,7 @@ mutable struct InferenceState
 
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
     function InferenceState(result::InferenceResult, src::CodeInfo, cache::Symbol,
-        interp::AbstractInterpreter)
+                            interp::AbstractInterpreter)
         linfo = result.linfo
         world = get_world_counter(interp)
         def = linfo.def
@@ -144,6 +253,7 @@ mutable struct InferenceState
         sptypes = sptypes_from_meth_instance(linfo)
         code = src.code::Vector{Any}
         cfg = compute_basic_blocks(code)
+        method_info = MethodInfo(src)
 
         currbb = currpc = 1
         ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
@@ -173,63 +283,36 @@ mutable struct InferenceState
         callers_in_cycle = Vector{InferenceState}()
         dont_work_on_me = false
         parent = nothing
-        inferred = false
 
         valid_worlds = WorldRange(src.min_world, src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
         bestguess = Bottom
         ipo_effects = EFFECTS_TOTAL
 
-        params = InferenceParams(interp)
+        insert_coverage = should_insert_coverage(mod, src)
+        if insert_coverage
+            ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
+        end
+
         restrict_abstract_call_sites = isa(linfo.def, Module)
         @assert cache === :no || cache === :local || cache === :global
         cached = cache === :global
 
-        frame = new(
-            linfo, world, mod, sptypes, slottypes, src, cfg,
-            currbb, currpc, ip, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent, inferred,
-            result, valid_worlds, bestguess, ipo_effects,
-            params, restrict_abstract_call_sites, cached,
-            interp)
-
         # some more setups
-        params.unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
-        result.result = frame
+        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
         cache !== :no && push!(get_inference_cache(interp), result)
 
-        return frame
+        return new(
+            linfo, world, mod, sptypes, slottypes, src, cfg, method_info,
+            currbb, currpc, ip, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
+            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
+            result, valid_worlds, bestguess, ipo_effects,
+            restrict_abstract_call_sites, cached, insert_coverage,
+            interp)
     end
 end
 
-Effects(state::InferenceState) = state.ipo_effects
-
-function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects)
-    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
-end
-
-merge_effects!(interp::AbstractInterpreter, caller::InferenceState, callee::InferenceState) =
-    merge_effects!(interp, caller, Effects(callee))
-merge_effects!(interp::AbstractInterpreter, caller::IRCode, effects::Effects) = nothing
-
-is_effect_overridden(sv::InferenceState, effect::Symbol) = is_effect_overridden(sv.linfo, effect)
-function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
-    def = linfo.def
-    return isa(def, Method) && is_effect_overridden(def, effect)
-end
-is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
-is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
-
-add_remark!(::AbstractInterpreter, sv::Union{InferenceState, IRCode}, remark) = return
-
-function bail_out_toplevel_call(::AbstractInterpreter, @nospecialize(callsig), sv::Union{InferenceState, IRCode})
-    return isa(sv, InferenceState) && sv.restrict_abstract_call_sites && !isdispatchtuple(callsig)
-end
-function bail_out_call(::AbstractInterpreter, @nospecialize(rt), sv::Union{InferenceState, IRCode}, effects::Effects)
-    return rt === Any && !is_foldable(effects)
-end
-function bail_out_apply(::AbstractInterpreter, @nospecialize(rt), sv::Union{InferenceState, IRCode})
-    return rt === Any
-end
+is_inferred(sv::InferenceState) = is_inferred(sv.result)
+is_inferred(result::InferenceResult) = result.result !== nothing
 
 was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
 
@@ -317,62 +400,129 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
     return handler_at
 end
 
-"""
-    Iterate through all callers of the given InferenceState in the abstract
-    interpretation stack (including the given InferenceState itself), vising
-    children before their parents (i.e. ascending the tree from the given
-    InferenceState). Note that cycles may be visited in any order.
-"""
-struct InfStackUnwind
-    inf::InferenceState
-end
-iterate(unw::InfStackUnwind) = (unw.inf, (unw.inf, 0))
-function iterate(unw::InfStackUnwind, (infstate, cyclei)::Tuple{InferenceState, Int})
-    # iterate through the cycle before walking to the parent
-    if cyclei < length(infstate.callers_in_cycle)
-        cyclei += 1
-        infstate = infstate.callers_in_cycle[cyclei]
-    else
-        cyclei = 0
-        infstate = infstate.parent
+# check if coverage mode is enabled
+function should_insert_coverage(mod::Module, src::CodeInfo)
+    coverage_enabled(mod) && return true
+    JLOptions().code_coverage == 3 || return false
+    # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
+    linetable = src.linetable
+    if isa(linetable, Vector{Any})
+        for line in linetable
+            line = line::LineInfoNode
+            if is_file_tracked(line.file)
+                return true
+            end
+        end
+    elseif isa(linetable, Vector{LineInfoNode})
+        for line in linetable
+            if is_file_tracked(line.file)
+                return true
+            end
+        end
     end
-    infstate === nothing && return nothing
-    (infstate::InferenceState, (infstate, cyclei))
+    return false
 end
 
 function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
     # prepare an InferenceState object for inferring lambda
-    src = retrieve_code_info(result.linfo)
+    world = get_world_counter(interp)
+    src = retrieve_code_info(result.linfo, world)
     src === nothing && return nothing
     validate_code_in_debug_mode(result.linfo, src, "lowered")
     return InferenceState(result, src, cache, interp)
 end
 
+"""
+    constrains_param(var::TypeVar, sig, covariant::Bool, type_constrains::Bool)
+
+Check if `var` will be constrained to have a definite value
+in any concrete leaftype subtype of `sig`.
+
+It is used as a helper to determine whether type intersection is guaranteed to be able to
+find a value for a particular type parameter.
+A necessary condition for type intersection to not assign a parameter is that it only
+appears in a `Union[All]` and during subtyping some other union component (that does not
+constrain the type parameter) is selected.
+
+The `type_constrains` flag determines whether Type{T} is considered to be constraining
+`T`. This is not true in general, because of the existence of types with free type
+parameters, however, some callers would like to ignore this corner case.
+"""
+function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool, type_constrains::Bool=false)
+    typ === var && return true
+    while typ isa UnionAll
+        covariant && constrains_param(var, typ.var.ub, covariant, type_constrains) && return true
+        # typ.var.lb doesn't constrain var
+        typ = typ.body
+    end
+    if typ isa Union
+        # for unions, verify that both options would constrain var
+        ba = constrains_param(var, typ.a, covariant, type_constrains)
+        bb = constrains_param(var, typ.b, covariant, type_constrains)
+        (ba && bb) && return true
+    elseif typ isa DataType
+        # return true if any param constrains var
+        fc = length(typ.parameters)
+        if fc > 0
+            if typ.name === Tuple.name
+                # vararg tuple needs special handling
+                for i in 1:(fc - 1)
+                    p = typ.parameters[i]
+                    constrains_param(var, p, covariant, type_constrains) && return true
+                end
+                lastp = typ.parameters[fc]
+                vararg = unwrap_unionall(lastp)
+                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
+                    constrains_param(var, vararg.N, covariant, type_constrains) && return true
+                    # T = vararg.parameters[1] doesn't constrain var
+                else
+                    constrains_param(var, lastp, covariant, type_constrains) && return true
+                end
+            else
+                if typ.name === typename(Type) && typ.parameters[1] === var && var.ub === Any
+                    # Types with free type parameters are <: Type cause the typevar
+                    # to be unconstrained because Type{T} with free typevars is illegal
+                    return type_constrains
+                end
+                for i in 1:fc
+                    p = typ.parameters[i]
+                    constrains_param(var, p, false, type_constrains) && return true
+                end
+            end
+        end
+    end
+    return false
+end
+
+const EMPTY_SPTYPES = VarState[]
+
 function sptypes_from_meth_instance(linfo::MethodInstance)
-    toplevel = !isa(linfo.def, Method)
-    if !toplevel && isempty(linfo.sparam_vals) && isa(linfo.def.sig, UnionAll)
+    def = linfo.def
+    isa(def, Method) || return EMPTY_SPTYPES # toplevel
+    sig = def.sig
+    if isempty(linfo.sparam_vals)
+        isa(sig, UnionAll) || return EMPTY_SPTYPES
         # linfo is unspecialized
-        sp = Any[]
-        sig = linfo.def.sig
-        while isa(sig, UnionAll)
-            push!(sp, sig.var)
-            sig = sig.body
+        spvals = Any[]
+        sig′ = sig
+        while isa(sig′, UnionAll)
+            push!(spvals, sig′.var)
+            sig′ = sig′.body
         end
     else
-        sp = collect(Any, linfo.sparam_vals)
+        spvals = linfo.sparam_vals
     end
-    for i = 1:length(sp)
-        v = sp[i]
+    nvals = length(spvals)
+    sptypes = Vector{VarState}(undef, nvals)
+    for i = 1:nvals
+        v = spvals[i]
         if v isa TypeVar
-            temp = linfo.def.sig
+            temp = sig
             for j = 1:i-1
                 temp = temp.body
             end
             vᵢ = (temp::UnionAll).var
-            while temp isa UnionAll
-                temp = temp.body
-            end
-            sigtypes = (temp::DataType).parameters
+            sigtypes = (unwrap_unionall(temp)::DataType).parameters
             for j = 1:length(sigtypes)
                 sⱼ = sigtypes[j]
                 if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
@@ -382,47 +532,45 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
                     @goto ty_computed
                 end
             end
-            ub = v.ub
-            while ub isa TypeVar
-                ub = ub.ub
-            end
+            ub = unwraptv_ub(v)
             if has_free_typevars(ub)
                 ub = Any
             end
-            lb = v.lb
-            while lb isa TypeVar
-                lb = lb.lb
-            end
+            lb = unwraptv_lb(v)
             if has_free_typevars(lb)
                 lb = Bottom
             end
-            if Any <: ub && lb <: Bottom
+            if Any === ub && lb === Bottom
                 ty = Any
             else
                 tv = TypeVar(v.name, lb, ub)
                 ty = UnionAll(tv, Type{tv})
             end
+            @label ty_computed
+            undef = !(let sig=sig
+                # if the specialized signature `linfo.specTypes` doesn't contain any free
+                # type variables, we can use it for a more accurate analysis of whether `v`
+                # is constrained or not, otherwise we should use `def.sig` which always
+                # doesn't contain any free type variables
+                if !has_free_typevars(linfo.specTypes)
+                    sig = linfo.specTypes
+                end
+                @assert !has_free_typevars(sig)
+                constrains_param(v, sig, #=covariant=#true)
+            end)
         elseif isvarargtype(v)
             ty = Int
+            undef = false
         else
             ty = Const(v)
+            undef = false
         end
-        @label ty_computed
-        sp[i] = ty
+        sptypes[i] = VarState(ty, undef)
     end
-    return sp
+    return sptypes
 end
 
-_topmod(sv::InferenceState) = _topmod(sv.mod)
-
-# work towards converging the valid age range for sv
-function update_valid_age!(sv::InferenceState, worlds::WorldRange)
-    sv.valid_worlds = intersect(worlds, sv.valid_worlds)
-    @assert(sv.world in sv.valid_worlds, "invalid age range update")
-    nothing
-end
-
-update_valid_age!(edge::InferenceState, sv::InferenceState) = update_valid_age!(sv, edge.valid_worlds)
+_topmod(sv::InferenceState) = _topmod(frame_module(sv))
 
 function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize(new), frame::InferenceState)
     ssavaluetypes = frame.ssavaluetypes
@@ -449,51 +597,23 @@ function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize
 end
 
 function add_cycle_backedge!(caller::InferenceState, frame::InferenceState, currpc::Int)
-    update_valid_age!(frame, caller)
+    update_valid_age!(caller, frame.valid_worlds)
     backedge = (caller, currpc)
     contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
     add_backedge!(caller, frame.linfo)
     return frame
 end
 
-# temporarily accumulate our edges to later add as backedges in the callee
-function add_backedge!(caller::InferenceState, li::MethodInstance)
-    edges = get_stmt_edges!(caller)
-    if edges !== nothing
-        push!(edges, li)
-    end
-    return nothing
-end
-
-function add_invoke_backedge!(caller::InferenceState, @nospecialize(invokesig::Type), li::MethodInstance)
-    edges = get_stmt_edges!(caller)
-    if edges !== nothing
-        push!(edges, invokesig, li)
-    end
-    return nothing
-end
-
-# used to temporarily accumulate our no method errors to later add as backedges in the callee method table
-function add_mt_backedge!(caller::InferenceState, mt::Core.MethodTable, @nospecialize(typ))
-    edges = get_stmt_edges!(caller)
-    if edges !== nothing
-        push!(edges, mt, typ)
-    end
-    return nothing
-end
-
-function get_stmt_edges!(caller::InferenceState)
-    if !isa(caller.linfo.def, Method)
-        return nothing # don't add backedges to toplevel exprs
-    end
-    edges = caller.stmt_edges[caller.currpc]
+function get_stmt_edges!(caller::InferenceState, currpc::Int=caller.currpc)
+    stmt_edges = caller.stmt_edges
+    edges = stmt_edges[currpc]
     if edges === nothing
-        edges = caller.stmt_edges[caller.currpc] = []
+        edges = stmt_edges[currpc] = []
     end
     return edges
 end
 
-function empty_backedges!(frame::InferenceState, currpc::Int = frame.currpc)
+function empty_backedges!(frame::InferenceState, currpc::Int=frame.currpc)
     edges = frame.stmt_edges[currpc]
     edges === nothing || empty!(edges)
     return nothing
@@ -512,13 +632,262 @@ function print_callstack(sv::InferenceState)
     end
 end
 
+function narguments(sv::InferenceState, include_va::Bool=true)
+    def = sv.linfo.def
+    nargs = length(sv.result.argtypes)
+    if !include_va
+        nargs -= isa(def, Method) && def.isva
+    end
+    return nargs
+end
+
+# IRInterpretationState
+# =====================
+
+# TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
+mutable struct IRInterpretationState
+    const method_info::MethodInfo
+    const ir::IRCode
+    const mi::MethodInstance
+    const world::UInt
+    curridx::Int
+    const argtypes_refined::Vector{Bool}
+    const sptypes::Vector{VarState}
+    const tpdum::TwoPhaseDefUseMap
+    const ssa_refined::BitSet
+    const lazydomtree::LazyDomtree
+    valid_worlds::WorldRange
+    const edges::Vector{Any}
+    parent # ::Union{Nothing,AbsIntState}
+
+    function IRInterpretationState(interp::AbstractInterpreter,
+        method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
+        world::UInt, min_world::UInt, max_world::UInt)
+        curridx = 1
+        given_argtypes = Vector{Any}(undef, length(argtypes))
+        for i = 1:length(given_argtypes)
+            given_argtypes[i] = widenslotwrapper(argtypes[i])
+        end
+        given_argtypes = va_process_argtypes(optimizer_lattice(interp), given_argtypes, mi)
+        argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
+            for i = 1:length(given_argtypes)]
+        empty!(ir.argtypes)
+        append!(ir.argtypes, given_argtypes)
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        ssa_refined = BitSet()
+        lazydomtree = LazyDomtree(ir)
+        valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        edges = Any[]
+        parent = nothing
+        return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
+                   ssa_refined, lazydomtree, valid_worlds, edges, parent)
+    end
+end
+
+function IRInterpretationState(interp::AbstractInterpreter,
+    code::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
+    @assert code.def === mi
+    src = @atomic :monotonic code.inferred
+    if isa(src, String)
+        src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
+    else
+        isa(src, CodeInfo) || return nothing
+    end
+    method_info = MethodInfo(src)
+    ir = inflate_ir(src, mi)
+    return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
+                                 src.min_world, src.max_world)
+end
+
+# AbsIntState
+# ===========
+
+const AbsIntState = Union{InferenceState,IRInterpretationState}
+
+frame_instance(sv::InferenceState) = sv.linfo
+frame_instance(sv::IRInterpretationState) = sv.mi
+
+function frame_module(sv::AbsIntState)
+    mi = frame_instance(sv)
+    def = mi.def
+    isa(def, Module) && return def
+    return def.module
+end
+
+frame_parent(sv::InferenceState) = sv.parent::Union{Nothing,AbsIntState}
+frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
+
+is_constproped(sv::InferenceState) = any(sv.result.overridden_by_const)
+is_constproped(::IRInterpretationState) = true
+
+is_cached(sv::InferenceState) = sv.cached
+is_cached(::IRInterpretationState) = false
+
+method_info(sv::InferenceState) = sv.method_info
+method_info(sv::IRInterpretationState) = sv.method_info
+
+propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds
+method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics
+
+frame_world(sv::InferenceState) = sv.world
+frame_world(sv::IRInterpretationState) = sv.world
+
+callers_in_cycle(sv::InferenceState) = sv.callers_in_cycle
+callers_in_cycle(sv::IRInterpretationState) = ()
+
+is_effect_overridden(sv::AbsIntState, effect::Symbol) = is_effect_overridden(frame_instance(sv), effect)
+function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
+    def = linfo.def
+    return isa(def, Method) && is_effect_overridden(def, effect)
+end
+is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
+is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
+
+has_conditional(𝕃::AbstractLattice, ::InferenceState) = has_conditional(𝕃)
+has_conditional(::AbstractLattice, ::IRInterpretationState) = false
+
+# work towards converging the valid age range for sv
+function update_valid_age!(sv::AbsIntState, valid_worlds::WorldRange)
+    valid_worlds = sv.valid_worlds = intersect(valid_worlds, sv.valid_worlds)
+    @assert sv.world in valid_worlds "invalid age range update"
+    return valid_worlds
+end
+
+"""
+    AbsIntStackUnwind(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract interpretation stack
+(including the given `AbsIntState` itself), visiting children before their parents (i.e.
+ascending the tree from the given `AbsIntState`).
+Note that cycles may be visited in any order.
+"""
+struct AbsIntStackUnwind
+    sv::AbsIntState
+end
+iterate(unw::AbsIntStackUnwind) = (unw.sv, (unw.sv, 0))
+function iterate(unw::AbsIntStackUnwind, (sv, cyclei)::Tuple{AbsIntState, Int})
+    # iterate through the cycle before walking to the parent
+    if cyclei < length(callers_in_cycle(sv))
+        cyclei += 1
+        parent = callers_in_cycle(sv)[cyclei]
+    else
+        cyclei = 0
+        parent = frame_parent(sv)
+    end
+    parent === nothing && return nothing
+    return (parent, (parent, cyclei))
+end
+
+# temporarily accumulate our edges to later add as backedges in the callee
+function add_backedge!(caller::InferenceState, mi::MethodInstance)
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return push!(get_stmt_edges!(caller), mi)
+end
+function add_backedge!(irsv::IRInterpretationState, mi::MethodInstance)
+    return push!(irsv.edges, mi)
+end
+
+function add_invoke_backedge!(caller::InferenceState, @nospecialize(invokesig::Type), mi::MethodInstance)
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return push!(get_stmt_edges!(caller), invokesig, mi)
+end
+function add_invoke_backedge!(irsv::IRInterpretationState, @nospecialize(invokesig::Type), mi::MethodInstance)
+    return push!(irsv.edges, invokesig, mi)
+end
+
+# used to temporarily accumulate our no method errors to later add as backedges in the callee method table
+function add_mt_backedge!(caller::InferenceState, mt::MethodTable, @nospecialize(typ))
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return push!(get_stmt_edges!(caller), mt, typ)
+end
+function add_mt_backedge!(irsv::IRInterpretationState, mt::MethodTable, @nospecialize(typ))
+    return push!(irsv.edges, mt, typ)
+end
+
 get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
+get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag]
+
 add_curr_ssaflag!(sv::InferenceState, flag::UInt8) = sv.src.ssaflags[sv.currpc] |= flag
+add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt8) = sv.ir.stmts[sv.curridx][:flag] |= flag
+
 sub_curr_ssaflag!(sv::InferenceState, flag::UInt8) = sv.src.ssaflags[sv.currpc] &= ~flag
+sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt8) = sv.ir.stmts[sv.curridx][:flag] &= ~flag
 
-function narguments(sv::InferenceState)
-    def = sv.linfo.def
-    isva = isa(def, Method) && def.isva
-    nargs = length(sv.result.argtypes) - isva
-    return nargs
+merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects) =
+    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
+
+struct InferenceLoopState
+    sig
+    rt
+    effects::Effects
+    function InferenceLoopState(@nospecialize(sig), @nospecialize(rt), effects::Effects)
+        new(sig, rt, effects)
+    end
+end
+
+bail_out_toplevel_call(::AbstractInterpreter, state::InferenceLoopState, sv::InferenceState) =
+    sv.restrict_abstract_call_sites && !isdispatchtuple(state.sig)
+bail_out_toplevel_call(::AbstractInterpreter, ::InferenceLoopState, ::IRInterpretationState) = false
+
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any && !is_foldable(state.effects)
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any && !is_foldable(state.effects)
+
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any
+
+function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
+    if InferenceParams(interp).unoptimize_throw_blocks
+        # Disable inference of calls in throw blocks, since we're unlikely to
+        # need their types. There is one exception however: If up until now, the
+        # function has not seen any side effects, we would like to make sure there
+        # aren't any in the throw block either to enable other optimizations.
+        if is_stmt_throw_block(get_curr_ssaflag(sv))
+            should_infer_for_effects(sv) || return false
+        end
+    end
+    return true
+end
+function should_infer_for_effects(sv::InferenceState)
+    effects = sv.ipo_effects
+    return is_terminates(effects) && is_effect_free(effects)
+end
+should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
+
+add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
+add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
+
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f), sv::AbsIntState)
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp, sv)
+end
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f))
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp)
+end
+function get_max_methods(interp::AbstractInterpreter, sv::AbsIntState)
+    mmax = get_max_methods_for_module(sv)
+    mmax !== nothing && return mmax
+    return get_max_methods(interp)
+end
+get_max_methods(interp::AbstractInterpreter) = InferenceParams(interp).max_methods
+
+function get_max_methods_for_func(@nospecialize(f))
+    if f !== nothing
+        fmm = typeof(f).name.max_methods
+        fmm !== UInt8(0) && return Int(fmm)
+    end
+    return nothing
+end
+get_max_methods_for_module(sv::AbsIntState) = get_max_methods_for_module(frame_module(sv))
+function get_max_methods_for_module(mod::Module)
+    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
+    max_methods < 0 && return nothing
+    return max_methods
 end
diff --git a/base/compiler/methodtable.jl b/base/compiler/methodtable.jl
index 7f344aeb0e6de..8c79b2d8a8468 100644
--- a/base/compiler/methodtable.jl
+++ b/base/compiler/methodtable.jl
@@ -39,7 +39,7 @@ external table, e.g., to override existing method.
 """
 struct OverlayMethodTable <: MethodTableView
     world::UInt
-    mt::Core.MethodTable
+    mt::MethodTable
 end
 
 struct MethodMatchKey
@@ -98,7 +98,7 @@ function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int
         !isempty(result))
 end
 
-function _findall(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable}, world::UInt, limit::Int)
+function _findall(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt, limit::Int)
     _min_val = RefValue{UInt}(typemin(UInt))
     _max_val = RefValue{UInt}(typemax(UInt))
     _ambig = RefValue{Int32}(0)
@@ -155,7 +155,7 @@ function findsup(@nospecialize(sig::Type), table::OverlayMethodTable)
         false)
 end
 
-function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable}, world::UInt)
+function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt)
     min_valid = RefValue{UInt}(typemin(UInt))
     max_valid = RefValue{UInt}(typemax(UInt))
     match = ccall(:jl_gf_invoke_lookup_worlds, Any, (Any, Any, UInt, Ptr{Csize_t}, Ptr{Csize_t}),
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 2c26848ac1ca1..71eeb15d53eb0 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -32,6 +32,10 @@ const IR_FLAG_EFFECT_FREE = 0x01 << 4
 const IR_FLAG_NOTHROW     = 0x01 << 5
 # This is :consistent
 const IR_FLAG_CONSISTENT  = 0x01 << 6
+# An optimization pass has updated this statement in a way that may
+# have exposed information that inference did not see. Re-running
+# inference on this statement may be profitable.
+const IR_FLAG_REFINED     = 0x01 << 7
 
 const TOP_TUPLE = GlobalRef(Core, :tuple)
 
@@ -39,7 +43,7 @@ const TOP_TUPLE = GlobalRef(Core, :tuple)
 const InlineCostType = UInt16
 const MAX_INLINE_COST = typemax(InlineCostType)
 const MIN_INLINE_COST = InlineCostType(10)
-const MaybeCompressed = Union{CodeInfo, Vector{UInt8}}
+const MaybeCompressed = Union{CodeInfo, String}
 
 is_inlineable(@nospecialize src::MaybeCompressed) =
     ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
@@ -121,17 +125,16 @@ function inlining_policy(interp::AbstractInterpreter,
 end
 
 struct InliningState{Interp<:AbstractInterpreter}
-    params::OptimizationParams
     et::Union{EdgeTracker,Nothing}
     world::UInt
     interp::Interp
 end
-function InliningState(frame::InferenceState, params::OptimizationParams, interp::AbstractInterpreter)
-    et = EdgeTracker(frame.stmt_edges[1]::Vector{Any}, frame.valid_worlds)
-    return InliningState(params, et, frame.world, interp)
+function InliningState(sv::InferenceState, interp::AbstractInterpreter)
+    et = EdgeTracker(sv.stmt_edges[1]::Vector{Any}, sv.valid_worlds)
+    return InliningState(et, sv.world, interp)
 end
-function InliningState(params::OptimizationParams, interp::AbstractInterpreter)
-    return InliningState(params, nothing, get_world_counter(interp), interp)
+function InliningState(interp::AbstractInterpreter)
+    return InliningState(nothing, get_world_counter(interp), interp)
 end
 
 # get `code_cache(::AbstractInterpreter)` from `state::InliningState`
@@ -145,20 +148,20 @@ mutable struct OptimizationState{Interp<:AbstractInterpreter}
     ir::Union{Nothing, IRCode}
     stmt_info::Vector{CallInfo}
     mod::Module
-    sptypes::Vector{Any}
+    sptypes::Vector{VarState}
     slottypes::Vector{Any}
     inlining::InliningState{Interp}
     cfg::Union{Nothing,CFG}
+    insert_coverage::Bool
 end
-function OptimizationState(frame::InferenceState, params::OptimizationParams,
-                           interp::AbstractInterpreter, recompute_cfg::Bool=true)
-    inlining = InliningState(frame, params, interp)
-    cfg = recompute_cfg ? nothing : frame.cfg
-    return OptimizationState(frame.linfo, frame.src, nothing, frame.stmt_info, frame.mod,
-               frame.sptypes, frame.slottypes, inlining, cfg)
+function OptimizationState(sv::InferenceState, interp::AbstractInterpreter,
+                           recompute_cfg::Bool=true)
+    inlining = InliningState(sv, interp)
+    cfg = recompute_cfg ? nothing : sv.cfg
+    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
+                             sv.sptypes, sv.slottypes, inlining, cfg, sv.insert_coverage)
 end
-function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams,
-                           interp::AbstractInterpreter)
+function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
     # prepare src for running optimization passes if it isn't already
     nssavalues = src.ssavaluetypes
     if nssavalues isa Int
@@ -178,24 +181,28 @@ function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::Optimiz
     mod = isa(def, Method) ? def.module : def
     # Allow using the global MI cache, but don't track edges.
     # This method is mostly used for unit testing the optimizer
-    inlining = InliningState(params, interp)
-    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, nothing)
+    inlining = InliningState(interp)
+    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, nothing, false)
 end
-function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
-    src = retrieve_code_info(linfo)
+function OptimizationState(linfo::MethodInstance, interp::AbstractInterpreter)
+    world = get_world_counter(interp)
+    src = retrieve_code_info(linfo, world)
     src === nothing && return nothing
-    return OptimizationState(linfo, src, params, interp)
+    return OptimizationState(linfo, src, interp)
 end
 
 function ir_to_codeinf!(opt::OptimizationState)
     (; linfo, src) = opt
-    optdef = linfo.def
-    replace_code_newstyle!(src, opt.ir::IRCode, isa(optdef, Method) ? Int(optdef.nargs) : 0)
+    src = ir_to_codeinf!(src, opt.ir::IRCode)
     opt.ir = nothing
+    validate_code_in_debug_mode(linfo, src, "optimized")
+    return src
+end
+
+function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
+    replace_code_newstyle!(src, ir)
     widen_all_consts!(src)
     src.inferred = true
-    # finish updating the result struct
-    validate_code_in_debug_mode(linfo, src, "optimized")
     return src
 end
 
@@ -228,23 +235,6 @@ is_stmt_inline(stmt_flag::UInt8)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
 is_stmt_noinline(stmt_flag::UInt8)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
 is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
 
-# These affect control flow within the function (so may not be removed
-# if there is no usage within the function), but don't affect the purity
-# of the function as a whole.
-function stmt_affects_purity(@nospecialize(stmt), ir)
-    if isa(stmt, GotoNode) || isa(stmt, ReturnNode)
-        return false
-    end
-    if isa(stmt, GotoIfNot)
-        t = argextype(stmt.cond, ir)
-        return !(t ⊑ Bool)
-    end
-    if isa(stmt, Expr)
-        return stmt.head !== :loopinfo && stmt.head !== :enter
-    end
-    return true
-end
-
 """
     stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact}) ->
         (consistent::Bool, effect_free_and_nothrow::Bool, nothrow::Bool)
@@ -258,18 +248,16 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
     isa(stmt, ReturnNode) && return (true, false, true)
     isa(stmt, GotoNode) && return (true, false, true)
     isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
-    isa(stmt, Slot) && return (true, false, false) # Slots shouldn't occur in the IR at this point, but let's be defensive here
     if isa(stmt, GlobalRef)
         nothrow = isdefined(stmt.mod, stmt.name)
         consistent = nothrow && isconst(stmt.mod, stmt.name)
         return (consistent, nothrow, nothrow)
-    end
-    if isa(stmt, Expr)
+    elseif isa(stmt, Expr)
         (; head, args) = stmt
         if head === :static_parameter
-            etyp = (isa(src, IRCode) ? src.sptypes : src.ir.sptypes)[args[1]::Int]
             # if we aren't certain enough about the type, it might be an UndefVarError at runtime
-            nothrow = isa(etyp, Const)
+            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
+            nothrow = !sptypes[args[1]::Int].undef
             return (true, nothrow, nothrow)
         end
         if head === :call
@@ -354,30 +342,31 @@ function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospe
             return (false, false, false)
         end
     end
+    isa(stmt, UnoptSlot) && error("unexpected IR elements")
     return (true, true, true)
 end
 
 """
     argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
-    argextype(x, src::CodeInfo, sptypes::Vector{Any}) -> t
+    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
 
 Return the type of value `x` in the context of inferred source `src`.
 Note that `t` might be an extended lattice element.
 Use `widenconst(t)` to get the native Julia type of `x`.
 """
-argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{Any} = ir.sptypes) =
+argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
     argextype(x, ir, sptypes, ir.argtypes)
-function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{Any} = compact.ir.sptypes)
+function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
     isa(x, AnySSAValue) && return types(compact)[x]
     return argextype(x, compact, sptypes, compact.ir.argtypes)
 end
-argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{Any}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
+argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
 function argextype(
     @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
-    sptypes::Vector{Any}, slottypes::Vector{Any})
+    sptypes::Vector{VarState}, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]::Int]
+            return sptypes[x.args[1]::Int].typ
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
@@ -404,82 +393,27 @@ function argextype(
         return Const(x)
     end
 end
+abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
 abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
 
-struct ConstAPI
-    val
-    ConstAPI(@nospecialize val) = new(val)
-end
-
 """
     finish(interp::AbstractInterpreter, opt::OptimizationState,
-           params::OptimizationParams, ir::IRCode, caller::InferenceResult) -> analyzed::Union{Nothing,ConstAPI}
+           ir::IRCode, caller::InferenceResult)
 
-Post process information derived by Julia-level optimizations for later uses:
-- computes "purity", i.e. side-effect-freeness
-- computes inlining cost
-
-In a case when the purity is proven, `finish` can return `ConstAPI` object wrapping the constant
-value so that the runtime system will use the constant calling convention for the method calls.
+Post-process information derived by Julia-level optimizations for later use.
+In particular, this function determines the inlineability of the optimized code.
 """
 function finish(interp::AbstractInterpreter, opt::OptimizationState,
-                params::OptimizationParams, ir::IRCode, caller::InferenceResult)
+                ir::IRCode, caller::InferenceResult)
     (; src, linfo) = opt
     (; def, specTypes) = linfo
 
-    analyzed = nothing # `ConstAPI` if this call can use constant calling convention
     force_noinline = is_declared_noinline(src)
 
     # compute inlining and other related optimizations
     result = caller.result
     @assert !(result isa LimitedAccuracy)
     result = widenslotwrapper(result)
-    if (isa(result, Const) || isconstType(result))
-        proven_pure = false
-        # must be proven pure to use constant calling convention;
-        # otherwise we might skip throwing errors (issue #20704)
-        # TODO: Improve this analysis; if a function is marked @pure we should really
-        # only care about certain errors (e.g. method errors and type errors).
-        if length(ir.stmts) < 15
-            proven_pure = true
-            for i in 1:length(ir.stmts)
-                node = ir.stmts[i]
-                stmt = node[:inst]
-                if stmt_affects_purity(stmt, ir) && !stmt_effect_flags(optimizer_lattice(interp), stmt, node[:type], ir)[2]
-                    proven_pure = false
-                    break
-                end
-            end
-            if proven_pure
-                for fl in src.slotflags
-                    if (fl & SLOT_USEDUNDEF) != 0
-                        proven_pure = false
-                        break
-                    end
-                end
-            end
-        end
-
-        if proven_pure
-            # use constant calling convention
-            # Do not emit `jl_fptr_const_return` if coverage is enabled
-            # so that we don't need to add coverage support
-            # to the `jl_call_method_internal` fast path
-            # Still set pure flag to make sure `inference` tests pass
-            # and to possibly enable more optimization in the future
-            src.pure = true
-            if isa(result, Const)
-                val = result.val
-                if is_inlineable_constant(val)
-                    analyzed = ConstAPI(val)
-                end
-            else
-                @assert isconstType(result)
-                analyzed = ConstAPI(result.parameters[1])
-            end
-            force_noinline || set_inlineable!(src, true)
-        end
-    end
 
     opt.ir = ir
 
@@ -510,6 +444,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState,
             set_inlineable!(src, true)
         else
             # compute the cost (size) of inlining this code
+            params = OptimizationParams(interp)
             cost_threshold = default = params.inline_cost_threshold
             if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
                 cost_threshold += params.inline_tupleret_bonus
@@ -528,15 +463,13 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState,
             src.inlining_cost = inline_cost(ir, params, union_penalties, cost_threshold)
         end
     end
-
-    return analyzed
+    return nothing
 end
 
 # run the optimization work
-function optimize(interp::AbstractInterpreter, opt::OptimizationState,
-                  params::OptimizationParams, caller::InferenceResult)
+function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
     @timeit "optimizer" ir = run_passes(opt.src, opt, caller)
-    return finish(interp, opt, params, ir, caller)
+    return finish(interp, opt, ir, caller)
 end
 
 using .EscapeAnalysis
@@ -612,18 +545,6 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
         linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
     end
 
-    # check if coverage mode is enabled
-    coverage = coverage_enabled(sv.mod)
-    if !coverage && JLOptions().code_coverage == 3 # path-specific coverage mode
-        for line in linetable
-            if is_file_tracked(line.file)
-                # if any line falls in a tracked file enable coverage for all
-                coverage = true
-                break
-            end
-        end
-    end
-
     # Go through and add an unreachable node after every
     # Union{} call. Then reindex labels.
     code = copy_exprargs(ci.code)
@@ -639,7 +560,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
     prevloc = zero(eltype(ci.codelocs))
     while idx <= length(code)
         codeloc = codelocs[idx]
-        if coverage && codeloc != prevloc && codeloc != 0
+        if sv.insert_coverage && codeloc != prevloc && codeloc != 0
             # insert a side-effect instruction before the current instruction in the same basic block
             insert!(code, idx, Expr(:code_coverage_effect))
             insert!(codelocs, idx, codeloc)
@@ -650,7 +571,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
                 ssachangemap = fill(0, nstmts)
             end
             if labelchangemap === nothing
-                labelchangemap = coverage ? fill(0, nstmts) : ssachangemap
+                labelchangemap = fill(0, nstmts)
             end
             ssachangemap[oldidx] += 1
             if oldidx < length(labelchangemap)
@@ -671,11 +592,11 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
                     ssachangemap = fill(0, nstmts)
                 end
                 if labelchangemap === nothing
-                    labelchangemap = coverage ? fill(0, nstmts) : ssachangemap
+                    labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
                 end
                 if oldidx < length(ssachangemap)
                     ssachangemap[oldidx + 1] += 1
-                    coverage && (labelchangemap[oldidx + 1] += 1)
+                    sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
                 end
                 idx += 1
             end
@@ -699,7 +620,11 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
     if cfg === nothing
         cfg = compute_basic_blocks(code)
     end
-    return IRCode(stmts, cfg, linetable, sv.slottypes, meta, sv.sptypes)
+    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
+    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
+    # and eliminates slots (see below)
+    argtypes = sv.slottypes
+    return IRCode(stmts, cfg, linetable, argtypes, meta, sv.sptypes)
 end
 
 function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
@@ -718,6 +643,9 @@ function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
     defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
     𝕃ₒ = optimizer_lattice(sv.inlining.interp)
     @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes, 𝕃ₒ) # consumes `ir`
+    # NOTE now we have converted `ir` to the SSA form and eliminated slots
+    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
+    resize!(ir.argtypes, nargs)
     return ir
 end
 
@@ -729,7 +657,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 # known return type
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
-function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
+function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
                         union_penalties::Bool, params::OptimizationParams, error_path::Bool = false)
     head = ex.head
     if is_meta_expr_head(head)
@@ -820,7 +748,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
     return 0
 end
 
-function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
+function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
                                   union_penalties::Bool, params::OptimizationParams)
     thiscost = 0
     dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
@@ -850,7 +778,7 @@ function inline_cost(ir::IRCode, params::OptimizationParams, union_penalties::Bo
     return inline_cost_clamp(bodycost)
 end
 
-function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
+function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, unionpenalties::Bool, params::OptimizationParams)
     maxcost = 0
     for line = 1:length(body)
         stmt = body[line]
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
index 729e9a9a49b94..8bc173add6eaa 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
@@ -791,6 +791,7 @@ function compute_frameinfo(ir::IRCode, call_resolved::Bool)
             check_effect_free!(ir, idx, stmt, inst[:type], 𝕃ₒ)
         end
         if callinfo !== nothing && isexpr(stmt, :call)
+            # TODO: pass effects here
             callinfo[idx] = resolve_call(ir, stmt, inst[:info])
         elseif isexpr(stmt, :enter)
             @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
diff --git a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
index d87b0edaf295e..26b0e5b404641 100644
--- a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
+++ b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
@@ -14,6 +14,7 @@ struct EACallInfo
 end
 
 function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info::CallInfo))
+    # TODO: if effect free, return true
     sig = call_sig(ir, stmt)
     if sig === nothing
         return missing
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index efb30c05811d0..3c444894dd4b6 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -79,7 +79,7 @@ function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds
     @timeit "analysis" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, state.params)
+    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, OptimizationParams(state.interp))
     return ir
 end
 
@@ -302,18 +302,39 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
+# duplicated from IRShow
+function normalize_method_name(m)
+    if m isa Method
+        return m.name
+    elseif m isa MethodInstance
+        return (m.def::Method).name
+    elseif m isa Symbol
+        return m
+    else
+        return Symbol("")
+    end
+end
+@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
+
+inline_node_is_duplicate(topline::LineInfoNode, line::LineInfoNode) =
+    topline.module === line.module &&
+    method_name(topline) === method_name(line) &&
+    topline.file === line.file &&
+    topline.line === line.line
+
 function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode,
-                              inlinee::Method,
+                              inlinee::MethodInstance,
                               inlined_at::Int32)
-    coverage = coverage_enabled(inlinee.module)
+    inlinee_def = inlinee.def::Method
+    coverage = coverage_enabled(inlinee_def.module)
     linetable_offset::Int32 = length(linetable)
     # Append the linetable of the inlined function to our line table
     topline::Int32 = linetable_offset + Int32(1)
     coverage_by_path = JLOptions().code_coverage == 3
-    push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at))
+    push!(linetable, LineInfoNode(inlinee_def.module, inlinee, inlinee_def.file, inlinee_def.line, inlined_at))
     oldlinetable = inlinee_ir.linetable
     extra_coverage_line = zero(Int32)
-    for oldline in 1:length(oldlinetable)
+    for oldline in eachindex(oldlinetable)
         entry = oldlinetable[oldline]
         if !coverage && coverage_by_path && is_file_tracked(entry.file)
             # include topline coverage entry if in path-specific coverage mode, and any file falls under path
@@ -323,7 +344,7 @@ function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCod
             (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
         if oldline == 1
             # check for a duplicate on the first iteration (likely true)
-            if newentry === linetable[topline]
+            if inline_node_is_duplicate(linetable[topline], newentry)
                 continue
             else
                 linetable_offset += 1
@@ -339,9 +360,10 @@ end
 
 function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
         linetable::Vector{LineInfoNode}, ir′::IRCode, sparam_vals::SimpleVector,
-        def::Method, inlined_at::Int32, argexprs::Vector{Any})
+        mi::MethodInstance, inlined_at::Int32, argexprs::Vector{Any})
+    def = mi.def::Method
     topline::Int32 = length(linetable) + Int32(1)
-    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, ir′, def, inlined_at)
+    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, ir′, mi, inlined_at)
     if extra_coverage_line != 0
         insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
     end
@@ -371,11 +393,10 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                          boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
     sparam_vals = item.mi.sparam_vals
-    def = item.mi.def::Method
     inlined_at = compact.result[idx][:line]
 
     ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertHere(compact),
-        compact, linetable, item.ir, sparam_vals, def, inlined_at, argexprs)
+        compact, linetable, item.ir, sparam_vals, item.mi, inlined_at, argexprs)
 
     if boundscheck === :default || boundscheck === :propagate
         if (compact.result[idx][:flag] & IR_FLAG_INBOUNDS) != 0
@@ -385,6 +406,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     # If the iterator already moved on to the next basic block,
     # temporarily re-open in again.
     local return_value
+    def = item.mi.def::Method
     sig = def.sig
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     new_new_offset = length(compact.new_new_nodes)
@@ -596,7 +618,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
             case = case::ConstantCase
             val = case.val
         end
-        if !isempty(compact.result_bbs[bb].preds)
+        if !isempty(compact.cfg_transform.result_bbs[bb].preds)
             push!(pn.edges, bb)
             push!(pn.values, val)
             insert_node_here!(compact,
@@ -648,8 +670,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
         boundscheck = :propagate
     end
 
-    let compact = IncrementalCompact(ir, false)
-        compact.result_bbs = state.new_cfg_blocks
+    let compact = IncrementalCompact(ir, CFGTransformState!(state.new_cfg_blocks, false))
         # This needs to be a minimum and is more of a size hint
         nn = 0
         for (_, item) in todo
@@ -670,7 +691,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun
                     argexprs = copy(stmt.args)
                 end
                 refinish = false
-                if compact.result_idx == first(compact.result_bbs[compact.active_result_bb].stmts)
+                if compact.result_idx == first(compact.cfg_transform.result_bbs[compact.active_result_bb].stmts)
                     compact.active_result_bb -= 1
                     refinish = true
                 end
@@ -794,25 +815,38 @@ function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
     return new_argtypes
 end
 
-function compileable_specialization(match::MethodMatch, effects::Effects,
-    et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
-    mi = specialize_method(match; compilesig=compilesig_invokes)
-    mi === nothing && return nothing
+function compileable_specialization(mi::MethodInstance, effects::Effects,
+        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
+    mi_invoke = mi
+    if compilesig_invokes
+        method, atype, sparams = mi.def::Method, mi.specTypes, mi.sparam_vals
+        new_atype = get_compileable_sig(method, atype, sparams)
+        new_atype === nothing && return nothing
+        if atype !== new_atype
+            sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), new_atype, method.sig)::SimpleVector
+            if sparams === sp_[2]::SimpleVector
+                mi_invoke = specialize_method(method, new_atype, sparams)
+                mi_invoke === nothing && return nothing
+            end
+        end
+    else
+        # If this caller does not want us to optimize calls to use their
+        # declared compilesig, then it is also likely they would handle sparams
+        # incorrectly if there were any unknown typevars, so we conservatively return nothing
+        if any(@nospecialize(t)->isa(t, TypeVar), mi.sparam_vals)
+            return nothing
+        end
+    end
     add_inlining_backedge!(et, mi)
-    return InvokeCase(mi, effects, info)
+    return InvokeCase(mi_invoke, effects, info)
 end
 
-function compileable_specialization(linfo::MethodInstance, effects::Effects,
-    et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
-    mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals; compilesig=compilesig_invokes)
-    mi === nothing && return nothing
-    add_inlining_backedge!(et, mi)
-    return InvokeCase(mi, effects, info)
+function compileable_specialization(match::MethodMatch, effects::Effects,
+        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
+    mi = specialize_method(match)
+    return compileable_specialization(mi, effects, et, info; compilesig_invokes)
 end
 
-compileable_specialization(result::InferenceResult, args...; kwargs...) = (@nospecialize;
-    compileable_specialization(result.linfo, args...; kwargs...))
-
 struct CachedResult
     src::Any
     effects::Effects
@@ -829,9 +863,8 @@ end
         end
         effects = decode_effects(code.ipo_purity_bits)
         return CachedResult(src, effects)
-    else # fallback pass for external AbstractInterpreter cache
-        return CachedResult(code, Effects())
     end
+    return CachedResult(nothing, Effects())
 end
 
 # the general resolver for usual and const-prop'ed calls
@@ -843,12 +876,15 @@ function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceRes
     #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
     if isa(result, InferenceResult)
         src = result.src
-        if isa(src, ConstAPI)
-            # use constant calling convention
-            add_inlining_backedge!(et, mi)
-            return ConstantCase(quoted(src.val))
-        end
         effects = result.ipo_effects
+        if is_foldable_nothrow(effects)
+            res = result.result
+            if isa(res, Const) && is_inlineable_constant(res.val)
+                # use constant calling convention
+                add_inlining_backedge!(et, mi)
+                return ConstantCase(quoted(res.val))
+            end
+        end
     else
         cached_result = get_cached_result(state, mi)
         if cached_result isa ConstantCase
@@ -860,24 +896,14 @@ function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceRes
 
     # the duplicated check might have been done already within `analyze_method!`, but still
     # we need it here too since we may come here directly using a constant-prop' result
-    if !state.params.inlining || is_stmt_noinline(flag)
-        return compileable_specialization(result, effects, et, info;
-            compilesig_invokes=state.params.compilesig_invokes)
+    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
+        return compileable_specialization(mi, effects, et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     end
 
     src = inlining_policy(state.interp, src, info, flag, mi, argtypes)
-
-    if isa(src, ConstAPI)
-        # duplicates the check above in case inlining_policy has a better idea.
-        # We still keep the check above to make sure we can inline to ConstAPI
-        # even if is_stmt_noinline. This doesn't currently happen in Base, but
-        # can happen with external AbstractInterpreter.
-        add_inlining_backedge!(et, mi)
-        return ConstantCase(quoted(src.val))
-    end
-
-    src === nothing && return compileable_specialization(result, effects, et, info;
-        compilesig_invokes=state.params.compilesig_invokes)
+    src === nothing && return compileable_specialization(mi, effects, et, info;
+        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
 
     add_inlining_backedge!(et, mi)
     return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
@@ -886,7 +912,7 @@ end
 # the special resolver for :invoke-d call
 function resolve_todo(mi::MethodInstance, argtypes::Vector{Any},
     @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
-    if !state.params.inlining || is_stmt_noinline(flag)
+    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
         return nothing
     end
 
@@ -918,7 +944,7 @@ end
 function may_have_fcalls(m::Method)
     isdefined(m, :source) || return true
     src = m.source
-    isa(src, CodeInfo) || isa(src, Vector{UInt8}) || return true
+    isa(src, MaybeCompressed) || return true
     return ccall(:jl_ir_flag_has_fcall, Bool, (Any,), src)
 end
 
@@ -950,20 +976,14 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
         (allow_typevars && !may_have_fcalls(match.method)) || return nothing
     end
 
-    # See if there exists a specialization for this method signature
-    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
-    if mi === nothing
-        et = InliningEdgeTracker(state.et, invokesig)
-        effects = info_effects(nothing, match, state)
-        return compileable_specialization(match, effects, et, info;
-            compilesig_invokes=state.params.compilesig_invokes)
-    end
-
+    # Get the specialization for this method signature
+    # (later we will decide what to do with it)
+    mi = specialize_method(match)
     return resolve_todo(mi, match, argtypes, info, flag, state; invokesig)
 end
 
-function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1})
-    src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo
+function retrieve_ir_for_inlining(mi::MethodInstance, src::String)
+    src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
     return inflate_ir!(src, mi)
 end
 retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)
@@ -988,7 +1008,7 @@ function handle_single_case!(todo::Vector{Pair{Int,Any}},
     if isa(case, ConstantCase)
         ir[SSAValue(idx)][:inst] = case.val
     elseif isa(case, InvokeCase)
-        is_total(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
+        is_foldable_nothrow(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
         isinvoke && rewrite_invoke_exprargs!(stmt)
         stmt.head = :invoke
         pushfirst!(stmt.args, case.invoke)
@@ -1122,7 +1142,7 @@ function inline_apply!(todo::Vector{Pair{Int,Any}},
         arginfos = MaybeAbstractIterationInfo[]
         for i = (arg_start + 1):length(argtypes)
             thisarginfo = nothing
-            if !is_valid_type_for_apply_rewrite(argtypes[i], state.params)
+            if !is_valid_type_for_apply_rewrite(argtypes[i], OptimizationParams(state.interp))
                 isa(info, ApplyCallInfo) || return nothing
                 thisarginfo = info.arginfo[i-arg_start]
                 if thisarginfo === nothing || !thisarginfo.complete
@@ -1169,15 +1189,15 @@ function handle_invoke_call!(todo::Vector{Pair{Int,Any}},
         if isa(result, ConstPropResult)
             mi = result.result.linfo
             validate_sparams(mi.sparam_vals) || return nothing
-            if argtypes_to_type(argtypes) <: mi.def.sig
+            if Union{} !== argtypes_to_type(argtypes) <: mi.def.sig
                 item = resolve_todo(mi, result.result, argtypes, info, flag, state; invokesig)
-                handle_single_case!(todo, ir, idx, stmt, item, state.params, true)
+                handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp), true)
                 return nothing
             end
         end
         item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig)
     end
-    handle_single_case!(todo, ir, idx, stmt, item, state.params, true)
+    handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp), true)
     return nothing
 end
 
@@ -1431,7 +1451,7 @@ function handle_call!(todo::Vector{Pair{Int,Any}},
     cases === nothing && return nothing
     cases, all_covered, joint_effects = cases
     handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, joint_effects, state.params)
+        all_covered, joint_effects, OptimizationParams(state.interp))
 end
 
 function handle_match!(cases::Vector{InliningCase},
@@ -1469,12 +1489,12 @@ end
 function semiconcrete_result_item(result::SemiConcreteResult,
         @nospecialize(info::CallInfo), flag::UInt8, state::InliningState)
     mi = result.mi
-    if !state.params.inlining || is_stmt_noinline(flag)
+    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
         et = InliningEdgeTracker(state.et, nothing)
         return compileable_specialization(mi, result.effects, et, info;
-            compilesig_invokes=state.params.compilesig_invokes)
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     else
-        return InliningTodo(mi, result.ir, result.effects)
+        return InliningTodo(mi, retrieve_ir_for_inlining(mi, result.ir), result.effects)
     end
 end
 
@@ -1493,6 +1513,7 @@ end
 
 function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
     case = concrete_result_item(result, info, state)
+    case === nothing && return false
     push!(cases, InliningCase(result.mi.specTypes, case))
     return true
 end
@@ -1504,10 +1525,8 @@ function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallIn
     invokesig::Union{Nothing,Vector{Any}}=nothing)
     if !may_inline_concrete_result(result)
         et = InliningEdgeTracker(state.et, invokesig)
-        case = compileable_specialization(result.mi, result.effects, et, info;
-            compilesig_invokes=state.params.compilesig_invokes)
-        @assert case !== nothing "concrete evaluation should never happen for uncompileable callsite"
-        return case
+        return compileable_specialization(result.mi, result.effects, et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     end
     @assert result.effects === EFFECTS_TOTAL
     return ConstantCase(quoted(result.result))
@@ -1553,7 +1572,7 @@ function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}},
             item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false)
         end
     end
-    handle_single_case!(todo, ir, idx, stmt, item, state.params)
+    handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp))
     return nothing
 end
 
@@ -1566,7 +1585,7 @@ function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::Modif
     match = info.results[1]::MethodMatch
     match.fully_covers || return nothing
     case = compileable_specialization(match, Effects(), InliningEdgeTracker(state.et), info;
-        compilesig_invokes=state.params.compilesig_invokes)
+        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     case === nothing && return nothing
     stmt.head = :invoke_modify
     pushfirst!(stmt.args, case.invoke)
@@ -1691,18 +1710,10 @@ function linear_inline_eligible(ir::IRCode)
     return true
 end
 
-# Check for a number of functions known to be pure
-function ispuretopfunction(@nospecialize(f))
-    return istopfunction(f, :typejoin) ||
-        istopfunction(f, :isbits) ||
-        istopfunction(f, :isbitstype) ||
-        istopfunction(f, :promote_type)
-end
-
 function early_inline_special_case(
     ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
     state::InliningState)
-    state.params.inlining || return nothing
+    OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
 
     if isa(type, Const) # || isconstType(type)
@@ -1712,7 +1723,7 @@ function early_inline_special_case(
             if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
                 return SomeCase(quoted(val))
             end
-        elseif ispuretopfunction(f) || contains_is(_PURE_BUILTINS, f)
+        elseif contains_is(_PURE_BUILTINS, f)
             return SomeCase(quoted(val))
         elseif contains_is(_EFFECT_FREE_BUILTINS, f)
             if _builtin_nothrow(optimizer_lattice(state.interp), f, argtypes[2:end], type)
@@ -1755,7 +1766,7 @@ end
 function late_inline_special_case!(
     ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
     state::InliningState)
-    state.params.inlining || return nothing
+    OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
     if length(argtypes) == 3 && istopfunction(f, :!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 1d6be5a2b09d8..c5415add51cc5 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -334,13 +334,13 @@ end
 struct IRCode
     stmts::InstructionStream
     argtypes::Vector{Any}
-    sptypes::Vector{Any}
+    sptypes::Vector{VarState}
     linetable::Vector{LineInfoNode}
     cfg::CFG
     new_nodes::NewNodeStream
     meta::Vector{Expr}
 
-    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{Any})
+    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState})
         return new(stmts, argtypes, sptypes, linetable, cfg, NewNodeStream(), meta)
     end
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
@@ -358,7 +358,7 @@ for debugging and unit testing of IRCode APIs. The compiler itself should genera
 from the frontend or one of the caches.
 """
 function IRCode()
-    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], Any[])
+    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], VarState[])
     ir[SSAValue(1)][:inst] = ReturnNode(nothing)
     ir[SSAValue(1)][:type] = Nothing
     ir[SSAValue(1)][:flag] = 0x00
@@ -562,14 +562,60 @@ end
 insert_node!(ir::IRCode, pos::Int, newinst::NewInstruction, attach_after::Bool=false) =
     insert_node!(ir, SSAValue(pos), newinst, attach_after)
 
+struct CFGTransformState
+    cfg_transforms_enabled::Bool
+    fold_constant_branches::Bool
+    result_bbs::Vector{BasicBlock}
+    bb_rename_pred::Vector{Int}
+    bb_rename_succ::Vector{Int}
+end
+
+# N.B.: Takes ownership of the CFG array
+function CFGTransformState!(blocks::Vector{BasicBlock}, allow_cfg_transforms::Bool=false)
+    if allow_cfg_transforms
+        bb_rename = Vector{Int}(undef, length(blocks))
+        cur_bb = 1
+        domtree = construct_domtree(blocks)
+        for i = 1:length(bb_rename)
+            if bb_unreachable(domtree, i)
+                bb_rename[i] = -1
+            else
+                bb_rename[i] = cur_bb
+                cur_bb += 1
+            end
+        end
+        for i = 1:length(bb_rename)
+            bb_rename[i] == -1 && continue
+            preds, succs = blocks[i].preds, blocks[i].succs
+            # Rename preds
+            for j = 1:length(preds)
+                if preds[j] != 0
+                    preds[j] = bb_rename[preds[j]]
+                end
+            end
+            # Dead blocks get removed from the predecessor list
+            filter!(x->x !== -1, preds)
+            # Rename succs
+            for j = 1:length(succs)
+                succs[j] = bb_rename[succs[j]]
+            end
+        end
+        let blocks = blocks, bb_rename = bb_rename
+            result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
+        end
+    else
+        bb_rename = Vector{Int}()
+        result_bbs = blocks
+    end
+    return CFGTransformState(allow_cfg_transforms, allow_cfg_transforms, result_bbs, bb_rename, bb_rename)
+end
+
 mutable struct IncrementalCompact
     ir::IRCode
     result::InstructionStream
-    result_bbs::Vector{BasicBlock}
 
+    cfg_transform::CFGTransformState
     ssa_rename::Vector{Any}
-    bb_rename_pred::Vector{Int}
-    bb_rename_succ::Vector{Int}
 
     used_ssas::Vector{Int}
     late_fixup::Vector{Int}
@@ -587,10 +633,8 @@ mutable struct IncrementalCompact
     active_bb::Int
     active_result_bb::Int
     renamed_new_nodes::Bool
-    cfg_transforms_enabled::Bool
-    fold_constant_branches::Bool
 
-    function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
+    function IncrementalCompact(code::IRCode, cfg_transform::CFGTransformState)
         # Sort by position with attach after nodes after regular ones
         info = code.new_nodes.info
         perm = sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i))
@@ -599,49 +643,14 @@ mutable struct IncrementalCompact
         used_ssas = fill(0, new_len)
         new_new_used_ssas = Vector{Int}()
         blocks = code.cfg.blocks
-        if allow_cfg_transforms
-            bb_rename = Vector{Int}(undef, length(blocks))
-            cur_bb = 1
-            domtree = construct_domtree(blocks)
-            for i = 1:length(bb_rename)
-                if bb_unreachable(domtree, i)
-                    bb_rename[i] = -1
-                else
-                    bb_rename[i] = cur_bb
-                    cur_bb += 1
-                end
-            end
-            for i = 1:length(bb_rename)
-                bb_rename[i] == -1 && continue
-                preds, succs = blocks[i].preds, blocks[i].succs
-                # Rename preds
-                for j = 1:length(preds)
-                    if preds[j] != 0
-                        preds[j] = bb_rename[preds[j]]
-                    end
-                end
-                # Dead blocks get removed from the predecessor list
-                filter!(x->x !== -1, preds)
-                # Rename succs
-                for j = 1:length(succs)
-                    succs[j] = bb_rename[succs[j]]
-                end
-            end
-            let blocks = blocks, bb_rename = bb_rename
-                result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
-            end
-        else
-            bb_rename = Vector{Int}()
-            result_bbs = code.cfg.blocks
-        end
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
         late_fixup = Vector{Int}()
         new_new_nodes = NewNodeStream()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1,
+        return new(code, result, cfg_transform, ssa_rename, used_ssas, late_fixup, perm, 1,
             new_new_nodes, new_new_used_ssas, pending_nodes, pending_perm,
-            1, 1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms)
+            1, 1, 1, 1, false)
     end
 
     # For inlining
@@ -653,14 +662,18 @@ mutable struct IncrementalCompact
         bb_rename = Vector{Int}()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, parent.result,
-            parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas,
+        return new(code, parent.result, CFGTransformState(false, false, parent.cfg_transform.result_bbs, bb_rename, bb_rename),
+            ssa_rename, parent.used_ssas,
             parent.late_fixup, perm, 1,
             parent.new_new_nodes, parent.new_new_used_ssas, pending_nodes, pending_perm,
-            1, result_offset, 1, parent.active_result_bb, false, false, false)
+            1, result_offset, 1, parent.active_result_bb, false)
     end
 end
 
+function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
+    return IncrementalCompact(code, CFGTransformState!(code.cfg.blocks, allow_cfg_transforms))
+end
+
 struct TypesView{T}
     ir::T # ::Union{IRCode, IncrementalCompact}
 end
@@ -674,7 +687,7 @@ end
 function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
     id = ssa.id
     if id < compact.idx
-        new_idx = compact.ssa_rename[id]
+        new_idx = compact.ssa_rename[id]::Int
         return compact.result[new_idx]
     elseif id <= length(compact.ir.stmts)
         return compact.ir.stmts[id]
@@ -698,7 +711,7 @@ end
 function block_for_inst(compact::IncrementalCompact, idx::SSAValue)
     id = idx.id
     if id < compact.result_idx # if ssa within result
-        return searchsortedfirst(compact.result_bbs, BasicBlock(StmtRange(id, id)),
+        return searchsortedfirst(compact.cfg_transform.result_bbs, BasicBlock(StmtRange(id, id)),
             1, compact.active_result_bb, bb_ordering())-1
     else
         return block_for_inst(compact.ir.cfg, id)
@@ -708,7 +721,7 @@ end
 function block_for_inst(compact::IncrementalCompact, idx::OldSSAValue)
     id = idx.id
     if id < compact.idx # if ssa within result
-        id = compact.ssa_rename[id]
+        id = compact.ssa_rename[id]::Int
         return block_for_inst(compact, SSAValue(id))
     else
         return block_for_inst(compact.ir.cfg, id)
@@ -883,9 +896,10 @@ function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction,
     newline = newinst.line::Int32
     refinish = false
     result_idx = compact.result_idx
+    result_bbs = compact.cfg_transform.result_bbs
     if reverse_affinity &&
-            ((compact.active_result_bb == length(compact.result_bbs) + 1) ||
-             result_idx == first(compact.result_bbs[compact.active_result_bb].stmts))
+            ((compact.active_result_bb == length(result_bbs) + 1) ||
+             result_idx == first(result_bbs[compact.active_result_bb].stmts))
         compact.active_result_bb -= 1
         refinish = true
     end
@@ -936,7 +950,7 @@ end
 
 function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
     @assert idx.id < compact.result_idx
-    (compact.result[idx.id][:inst] === v) && return
+    (compact.result[idx.id][:inst] === v) && return compact
     # Kill count for current uses
     kill_current_uses!(compact, compact.result[idx.id][:inst])
     compact.result[idx.id][:inst] = v
@@ -948,8 +962,8 @@ end
 function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::OldSSAValue)
     id = idx.id
     if id < compact.idx
-        new_idx = compact.ssa_rename[id]
-        (compact.result[new_idx][:inst] === v) && return
+        new_idx = compact.ssa_rename[id]::Int
+        (compact.result[new_idx][:inst] === v) && return compact
         kill_current_uses!(compact, compact.result[new_idx][:inst])
         compact.result[new_idx][:inst] = v
         count_added_node!(compact, v) && push!(compact.late_fixup, new_idx)
@@ -1173,18 +1187,19 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
     # Note: We recursively kill as many edges as are obviously dead. However, this
     # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or
     # worstcase during codegen).
-    preds = compact.result_bbs[compact.bb_rename_succ[to]].preds
-    succs = compact.result_bbs[compact.bb_rename_pred[from]].succs
-    deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int)
-    deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int)
+    (; bb_rename_pred, bb_rename_succ, result_bbs) = compact.cfg_transform
+    preds = result_bbs[bb_rename_succ[to]].preds
+    succs = result_bbs[bb_rename_pred[from]].succs
+    deleteat!(preds, findfirst(x->x === bb_rename_pred[from], preds)::Int)
+    deleteat!(succs, findfirst(x->x === bb_rename_succ[to], succs)::Int)
     # Check if the block is now dead
     if length(preds) == 0
-        for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs)
-            kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)::Int)
+        for succ in copy(result_bbs[bb_rename_succ[to]].succs)
+            kill_edge!(compact, active_bb, to, findfirst(x->x === succ, bb_rename_pred)::Int)
         end
         if to < active_bb
             # Kill all statements in the block
-            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
+            stmts = result_bbs[bb_rename_succ[to]].stmts
             for stmt in stmts
                 compact.result[stmt][:inst] = nothing
             end
@@ -1194,20 +1209,20 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
             # indicates that the block is not to be scheduled, but there should
             # still be an (unreachable) BB inserted into the final IR to avoid
             # disturbing the BB numbering.
-            compact.bb_rename_succ[to] = -2
+            bb_rename_succ[to] = -2
         end
     else
         # Remove this edge from all phi nodes in `to` block
         # NOTE: It is possible for `to` to contain only `nothing` statements,
         #       so we must be careful to stop at its last statement
         if to < active_bb
-            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
+            stmts = result_bbs[bb_rename_succ[to]].stmts
             idx = first(stmts)
             while idx <= last(stmts)
                 stmt = compact.result[idx][:inst]
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
-                i = findfirst(x-> x == compact.bb_rename_pred[from], stmt.edges)
+                i = findfirst(x-> x == bb_rename_pred[from], stmt.edges)
                 if i !== nothing
                     deleteat!(stmt.edges, i)
                     deleteat!(stmt.values, i)
@@ -1232,14 +1247,15 @@ end
 
 function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool)
     stmt = inst[:inst]
-    (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas, cfg_transforms_enabled, fold_constant_branches) = compact
+    (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
+    (; cfg_transforms_enabled, fold_constant_branches, bb_rename_succ, bb_rename_pred, result_bbs) = compact.cfg_transform
     ssa_rename[idx] = SSAValue(result_idx)
     if stmt === nothing
         ssa_rename[idx] = stmt
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
-        label = compact.bb_rename_succ[stmt.label]
+        label = bb_rename_succ[stmt.label]
         @assert label > 0
         result[result_idx][:inst] = GotoNode(label)
         result_idx += 1
@@ -1272,7 +1288,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 kill_edge!(compact, active_bb, active_bb, stmt.dest)
                 # Don't increment result_idx => Drop this statement
             else
-                label = compact.bb_rename_succ[stmt.dest]
+                label = bb_rename_succ[stmt.dest]
                 @assert label > 0
                 result[result_idx][:inst] = GotoNode(label)
                 kill_edge!(compact, active_bb, active_bb, active_bb+1)
@@ -1280,7 +1296,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             end
         else
             @label bail
-            label = compact.bb_rename_succ[stmt.dest]
+            label = bb_rename_succ[stmt.dest]
             @assert label > 0
             result[result_idx][:inst] = GotoIfNot(cond, label)
             result_idx += 1
@@ -1288,7 +1304,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     elseif isa(stmt, Expr)
         stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::Expr
         if cfg_transforms_enabled && isexpr(stmt, :enter)
-            label = compact.bb_rename_succ[stmt.args[1]::Int]
+            label = bb_rename_succ[stmt.args[1]::Int]
             @assert label > 0
             stmt.args[1] = label
         elseif isexpr(stmt, :throw_undef_if_not)
@@ -1333,7 +1349,9 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     elseif isa(stmt, PhiNode)
         if cfg_transforms_enabled
             # Rename phi node edges
-            map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges)
+            let bb_rename_pred=bb_rename_pred
+                map!(i::Int32 -> bb_rename_pred[i], stmt.edges, stmt.edges)
+            end
 
             # Remove edges and values associated with dead blocks. Entries in
             # `values` can be undefined when the phi node refers to something
@@ -1375,7 +1393,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
         if length(edges) == 1 && isassigned(values, 1) && !before_def &&
                 length(cfg_transforms_enabled ?
-                    compact.result_bbs[compact.bb_rename_succ[active_bb]].preds :
+                    result_bbs[bb_rename_succ[active_bb]].preds :
                     compact.ir.cfg.blocks[active_bb].preds) == 1
             # There's only one predecessor left - just replace it
             v = values[1]
@@ -1417,15 +1435,16 @@ function resize!(compact::IncrementalCompact, nnewnodes)
 end
 
 function finish_current_bb!(compact::IncrementalCompact, active_bb, old_result_idx=compact.result_idx, unreachable=false)
-    if compact.active_result_bb > length(compact.result_bbs)
+    (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
+    if compact.active_result_bb > length(result_bbs)
         #@assert compact.bb_rename[active_bb] == -1
         return true
     end
-    bb = compact.result_bbs[compact.active_result_bb]
+    bb = result_bbs[compact.active_result_bb]
     # If this was the last statement in the BB and we decided to skip it, insert a
     # dummy `nothing` node, to prevent changing the structure of the CFG
     skipped = false
-    if !compact.cfg_transforms_enabled || active_bb == 0 || active_bb > length(compact.bb_rename_succ) || compact.bb_rename_succ[active_bb] != -1
+    if !cfg_transforms_enabled || active_bb == 0 || active_bb > length(bb_rename_succ) || bb_rename_succ[active_bb] != -1
         if compact.result_idx == first(bb.stmts)
             length(compact.result) < old_result_idx && resize!(compact, old_result_idx)
             node = compact.result[old_result_idx]
@@ -1435,17 +1454,17 @@ function finish_current_bb!(compact::IncrementalCompact, active_bb, old_result_i
                 node[:inst], node[:type], node[:line] = nothing, Nothing, 0
             end
             compact.result_idx = old_result_idx + 1
-        elseif compact.cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
+        elseif cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
             # Optimization: If this BB consists of only a branch, eliminate this bb
         end
-        compact.result_bbs[compact.active_result_bb] = BasicBlock(bb, StmtRange(first(bb.stmts), compact.result_idx-1))
+        result_bbs[compact.active_result_bb] = BasicBlock(bb, StmtRange(first(bb.stmts), compact.result_idx-1))
         compact.active_result_bb += 1
     else
         skipped = true
     end
-    if compact.active_result_bb <= length(compact.result_bbs)
-        new_bb = compact.result_bbs[compact.active_result_bb]
-        compact.result_bbs[compact.active_result_bb] = BasicBlock(new_bb,
+    if compact.active_result_bb <= length(result_bbs)
+        new_bb = result_bbs[compact.active_result_bb]
+        result_bbs[compact.active_result_bb] = BasicBlock(new_bb,
             StmtRange(compact.result_idx, last(new_bb.stmts)))
     end
     return skipped
@@ -1537,7 +1556,8 @@ function iterate_compact(compact::IncrementalCompact)
         resize!(compact, old_result_idx)
     end
     bb = compact.ir.cfg.blocks[active_bb]
-    if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] <= -1
+    (; cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
+    if cfg_transforms_enabled && active_bb > 1 && active_bb <= length(bb_rename_succ) && bb_rename_succ[active_bb] <= -1
         # Dead block, so kill the entire block.
         compact.idx = last(bb.stmts)
         # Pop any remaining insertion nodes
@@ -1739,8 +1759,8 @@ function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
     just_fixup!(compact)
-    bb = compact.result_bbs[end]
-    compact.result_bbs[end] = BasicBlock(bb,
+    bb = compact.cfg_transform.result_bbs[end]
+    compact.cfg_transform.result_bbs[end] = BasicBlock(bb,
                 StmtRange(first(bb.stmts), result_idx-1))
     compact.renamed_new_nodes = true
     nothing
@@ -1753,7 +1773,7 @@ function finish(compact::IncrementalCompact)
 end
 
 function complete(compact::IncrementalCompact)
-    result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1)
+    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
     cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
     if should_check_ssa_counts()
         oracle_check(compact)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
index 7b2df1b39dd51..8d75ad3948ee2 100644
--- a/base/compiler/ssair/irinterp.jl
+++ b/base/compiler/ssair/irinterp.jl
@@ -1,216 +1,114 @@
-mutable struct TwoPhaseVectorView <: AbstractVector{Int}
-    const data::Vector{Int}
-    count::Int
-    const range::UnitRange{Int}
-end
-size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
-function getindex(tpvv::TwoPhaseVectorView, i::Int)
-    checkbounds(tpvv, i)
-    @inbounds tpvv.data[first(tpvv.range) + i - 1]
-end
-function push!(tpvv::TwoPhaseVectorView, v::Int)
-    tpvv.count += 1
-    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
-    return nothing
-end
-
-"""
-    mutable struct TwoPhaseDefUseMap
-
-This struct is intended as a memory- and GC-pressure-efficient mechanism
-for incrementally computing def-use maps. The idea is that the def-use map
-is constructed into two passes over the IR. In the first, we simply count the
-the number of uses, computing the number of uses for each def as well as the
-total number of uses. In the second pass, we actually fill in the def-use
-information.
-
-The idea is that either of these two phases can be combined with other useful
-work that needs to scan the instruction stream anyway, while avoiding the
-significant allocation pressure of e.g. allocating an array for every SSA value
-or attempting to dynamically move things around as new uses are discovered.
-
-The def-use map is presented as a vector of vectors. For every def, indexing
-into the map will return a vector of uses.
-"""
-mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
-    ssa_uses::Vector{Int}
-    data::Vector{Int}
-    complete::Bool
-end
-
-function complete!(tpdum::TwoPhaseDefUseMap)
-    cumsum = 0
-    for i = 1:length(tpdum.ssa_uses)
-        this_val = cumsum + 1
-        cumsum += tpdum.ssa_uses[i]
-        tpdum.ssa_uses[i] = this_val
-    end
-    resize!(tpdum.data, cumsum)
-    fill!(tpdum.data, 0)
-    tpdum.complete = true
-end
-
-function TwoPhaseDefUseMap(nssas::Int)
-    ssa_uses = zeros(Int, nssas)
-    data = Int[]
-    complete = false
-    return TwoPhaseDefUseMap(ssa_uses, data, complete)
-end
-
-function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
-    @assert !tpdum.complete
-    tpdum.ssa_uses[arg.id] += 1
-end
-
-function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
-    if !tpdum.complete
-        tpdum.ssa_uses[def] -= 1
-    else
-        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
-        # TODO: Sorted
-        useidx = findfirst(idx->tpdum.data[idx] == use, range)
-        @assert useidx !== nothing
-        idx = range[useidx]
-        while idx < lastindex(range)
-            ndata = tpdum.data[idx+1]
-            ndata == 0 && break
-            tpdum.data[idx] = ndata
-        end
-        tpdum.data[idx + 1] = 0
-    end
-end
-kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
-    kill_def_use!(tpdum, def.id, use)
-
-function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
-    @assert tpdum.complete
-    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
-    # TODO: Make logarithmic
-    nelems = 0
-    for i in range
-        tpdum.data[i] == 0 && break
-        nelems += 1
-    end
-    return TwoPhaseVectorView(tpdum.data, nelems, range)
-end
-
-struct IRInterpretationState
-    ir::IRCode
-    mi::MethodInstance
-    world::UInt
-    argtypes_refined::Vector{Bool}
-    tpdum::TwoPhaseDefUseMap
-    ssa_refined::BitSet
-    lazydomtree::LazyDomtree
-    function IRInterpretationState(interp::AbstractInterpreter,
-        ir::IRCode, mi::MethodInstance, world::UInt, argtypes::Vector{Any})
-        argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, mi)
-        for i = 1:length(argtypes)
-            argtypes[i] = widenslotwrapper(argtypes[i])
-        end
-        argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], argtypes[i]) for i = 1:length(argtypes)]
-        empty!(ir.argtypes)
-        append!(ir.argtypes, argtypes)
-        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
-        ssa_refined = BitSet()
-        lazydomtree = LazyDomtree(ir)
-        return new(ir, mi, world, argtypes_refined, tpdum, ssa_refined, lazydomtree)
-    end
-end
+# This file is a part of Julia. License is MIT: https://julialang.org/license
 
-function codeinst_to_ir(interp::AbstractInterpreter, code::CodeInstance)
-    src = code.inferred
-    mi = code.def
-    if isa(src, Vector{UInt8})
-        src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src)::CodeInfo
-    else
-        isa(src, CodeInfo) || return nothing
-    end
-    return inflate_ir(src, mi)
-end
-
-function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
-                                  arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
-                                  sv::IRCode, max_methods::Int)
-    return CallMeta(Any, Effects(), NoCallInfo())
-end
-
-function collect_limitations!(@nospecialize(typ), ::IRCode)
-    @assert !isa(typ, LimitedAccuracy) "semi-concrete eval on recursive call graph"
+function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
+    @assert !isa(typ, LimitedAccuracy) "irinterp is unable to handle heavy recursion"
     return typ
 end
 
 function concrete_eval_invoke(interp::AbstractInterpreter,
     inst::Expr, mi::MethodInstance, irsv::IRInterpretationState)
-    mi_cache = WorldView(code_cache(interp), irsv.world)
+    world = frame_world(irsv)
+    mi_cache = WorldView(code_cache(interp), world)
     code = get(mi_cache, mi, nothing)
-    code === nothing && return Pair{Any, Bool}(nothing, false)
-    argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv.ir)
-    argtypes === nothing && return Pair{Any, Bool}(Union{}, false)
+    code === nothing && return Pair{Any,Bool}(nothing, false)
+    argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv)
+    argtypes === nothing && return Pair{Any,Bool}(Bottom, false)
     effects = decode_effects(code.ipo_purity_bits)
     if is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1)
         args = collect_const_args(argtypes, #=start=#1)
-        world = get_world_counter(interp)
-        value = try
-            Core._call_in_world_total(world, args...)
-        catch
-            return Pair{Any, Bool}(Union{}, false)
+        value = let world = get_world_counter(interp)
+            try
+                Core._call_in_world_total(world, args...)
+            catch
+                return Pair{Any,Bool}(Bottom, false)
+            end
         end
-        return Pair{Any, Bool}(Const(value), true)
+        return Pair{Any,Bool}(Const(value), true)
     else
-        ir′ = codeinst_to_ir(interp, code)
-        if ir′ !== nothing
-            irsv′ = IRInterpretationState(interp, ir′, mi, irsv.world, argtypes)
-            return _ir_abstract_constant_propagation(interp, irsv′)
+        if is_constprop_edge_recursed(mi, irsv)
+            return Pair{Any,Bool}(nothing, is_nothrow(effects))
+        end
+        newirsv = IRInterpretationState(interp, code, mi, argtypes, world)
+        if newirsv !== nothing
+            newirsv.parent = irsv
+            return ir_abstract_constant_propagation(interp, newirsv)
         end
+        return Pair{Any,Bool}(nothing, is_nothrow(effects))
     end
-    return Pair{Any, Bool}(nothing, is_nothrow(effects))
 end
 
+abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
+
 function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
-    return abstract_eval_phi(interp, phi, nothing, irsv.ir)
+    return abstract_eval_phi(interp, phi, nothing, irsv)
 end
 
 function propagate_control_effects!(interp::AbstractInterpreter, idx::Int, stmt::GotoIfNot,
-        irsv::IRInterpretationState, reprocess::Union{Nothing, BitSet, BitSetBoundedMinPrioritySet})
+        irsv::IRInterpretationState, extra_reprocess::Union{Nothing,BitSet,BitSetBoundedMinPrioritySet})
     # Nothing to do for most abstract interpreters, but if the abstract
     # interpreter has control-dependent lattice effects, it can override
     # this method.
     return false
 end
 
-function reprocess_instruction!(interp::AbstractInterpreter,
-    idx::Int, bb::Union{Int, Nothing}, @nospecialize(inst), @nospecialize(typ),
-    irsv::IRInterpretationState, reprocess::Union{Nothing, BitSet, BitSetBoundedMinPrioritySet})
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
+    si = StmtInfo(true) # TODO better job here?
+    (; rt, effects, info) = abstract_call(interp, arginfo, si, irsv)
+    irsv.ir.stmts[irsv.curridx][:info] = info
+    return RTEffects(rt, effects)
+end
+
+function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
+    ir = irsv.ir
+    if length(ir.cfg.blocks[to].preds) == 0
+        # Kill the entire block
+        for bidx = ir.cfg.blocks[to].stmts
+            ir.stmts[bidx][:inst] = nothing
+            ir.stmts[bidx][:type] = Bottom
+            ir.stmts[bidx][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+        end
+        return
+    end
+    for sidx = ir.cfg.blocks[to].stmts
+        sinst = ir.stmts[sidx][:inst]
+        isa(sinst, Nothing) && continue # allowed between `PhiNode`s
+        isa(sinst, PhiNode) || break
+        for (eidx, edge) in enumerate(sinst.edges)
+            if edge == from
+                deleteat!(sinst.edges, eidx)
+                deleteat!(sinst.values, eidx)
+                push!(irsv.ssa_refined, sidx)
+                break
+            end
+        end
+    end
+end
+update_phi!(irsv::IRInterpretationState) = (from::Int, to::Int)->update_phi!(irsv, from, to)
+
+function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::Int=block_for_inst(irsv.ir, term_idx))
+    ir = irsv.ir
+    inst = ir[SSAValue(term_idx)][:inst]
+    if isa(inst, GotoIfNot)
+        kill_edge!(ir, bb, inst.dest, update_phi!(irsv))
+        kill_edge!(ir, bb, bb+1, update_phi!(irsv))
+    elseif isa(inst, GotoNode)
+        kill_edge!(ir, bb, inst.label, update_phi!(irsv))
+    elseif isa(inst, ReturnNode)
+        # Nothing to do
+    else
+        @assert !isexpr(inst, :enter)
+        kill_edge!(ir, bb, bb+1, update_phi!(irsv))
+    end
+end
+
+function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union{Int,Nothing},
+    @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState,
+    extra_reprocess::Union{Nothing,BitSet,BitSetBoundedMinPrioritySet})
     ir = irsv.ir
     if isa(inst, GotoIfNot)
         cond = inst.cond
         condval = maybe_extract_const_bool(argextype(cond, ir))
         if condval isa Bool
-            function update_phi!(from::Int, to::Int)
-                if length(ir.cfg.blocks[to].preds) == 0
-                    # Kill the entire block
-                    for idx in ir.cfg.blocks[to].stmts
-                        ir.stmts[idx][:inst] = nothing
-                        ir.stmts[idx][:type] = Union{}
-                        ir.stmts[idx][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-                    end
-                    return
-                end
-                for idx in ir.cfg.blocks[to].stmts
-                    stmt = ir.stmts[idx][:inst]
-                    isa(stmt, Nothing) && continue # allowed between `PhiNode`s
-                    isa(stmt, PhiNode) || break
-                    for (i, edge) in enumerate(stmt.edges)
-                        if edge == from
-                            deleteat!(stmt.edges, i)
-                            deleteat!(stmt.values, i)
-                            push!(irsv.ssa_refined, idx)
-                            break
-                        end
-                    end
-                end
-            end
             if isa(cond, SSAValue)
                 kill_def_use!(irsv.tpdum, cond, idx)
             end
@@ -221,45 +119,31 @@ function reprocess_instruction!(interp::AbstractInterpreter,
             if condval
                 ir.stmts[idx][:inst] = nothing
                 ir.stmts[idx][:type] = Any
-                kill_edge!(ir, bb, inst.dest, update_phi!)
+                kill_edge!(ir, bb, inst.dest, update_phi!(irsv))
             else
                 ir.stmts[idx][:inst] = GotoNode(inst.dest)
-                kill_edge!(ir, bb, bb+1, update_phi!)
+                kill_edge!(ir, bb, bb+1, update_phi!(irsv))
             end
             return true
         end
-        return propagate_control_effects!(interp, idx, inst, irsv, reprocess)
+        return propagate_control_effects!(interp, idx, inst, irsv, extra_reprocess)
     end
-
     rt = nothing
     if isa(inst, Expr)
         head = inst.head
         if head === :call || head === :foreigncall || head === :new || head === :splatnew
-            (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, ir, irsv.mi)
-            # All other effects already guaranteed effect free by construction
-            if is_nothrow(effects)
-                ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-                if isa(rt, Const) && is_inlineable_constant(rt.val)
-                    ir.stmts[idx][:inst] = quoted(rt.val)
-                end
-            end
+            (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, irsv)
+            ir.stmts[idx][:flag] |= flags_for_effects(effects)
         elseif head === :invoke
-            mi′ = inst.args[1]::MethodInstance
-            if mi′ !== irsv.mi # prevent infinite loop
-                rt, nothrow = concrete_eval_invoke(interp, inst, mi′, irsv)
-                if nothrow
-                    ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
-                    if isa(rt, Const) && is_inlineable_constant(rt.val)
-                        ir.stmts[idx][:inst] = quoted(rt.val)
-                    end
-                end
+            rt, nothrow = concrete_eval_invoke(interp, inst, inst.args[1]::MethodInstance, irsv)
+            if nothrow
+                ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
             end
         elseif head === :throw_undef_if_not || # TODO: Terminate interpretation early if known false?
                head === :gc_preserve_begin ||
                head === :gc_preserve_end
             return false
         else
-            ccall(:jl_, Cvoid, (Any,), inst)
             error("reprocess_instruction!: unhandled expression found")
         end
     elseif isa(inst, PhiNode)
@@ -268,26 +152,32 @@ function reprocess_instruction!(interp::AbstractInterpreter,
         # Handled at the very end
         return false
     elseif isa(inst, PiNode)
-        rt = tmeet(optimizer_lattice(interp), argextype(inst.val, ir), widenconst(inst.typ))
+        rt = tmeet(typeinf_lattice(interp), argextype(inst.val, ir), widenconst(inst.typ))
     elseif inst === nothing
         return false
     elseif isa(inst, GlobalRef)
         # GlobalRef is not refinable
     else
-        ccall(:jl_, Cvoid, (Any,), inst)
-        error()
+        error("reprocess_instruction!: unhandled instruction found")
     end
-    if rt !== nothing && !⊑(optimizer_lattice(interp), typ, rt)
-        ir.stmts[idx][:type] = rt
-        return true
+    if rt !== nothing
+        if isa(rt, Const)
+            ir.stmts[idx][:type] = rt
+            if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & IR_FLAG_EFFECT_FREE) != 0
+                ir.stmts[idx][:inst] = quoted(rt.val)
+            end
+            return true
+        elseif !⊑(typeinf_lattice(interp), typ, rt)
+            ir.stmts[idx][:type] = rt
+            return true
+        end
     end
     return false
 end
 
-# Process the terminator and add the successor to `ip`. Returns whether a backedge was seen.
-function process_terminator!(ir::IRCode, idx::Int, bb::Int,
-    all_rets::Vector{Int}, ip::BitSetBoundedMinPrioritySet)
-    inst = ir.stmts[idx][:inst]
+# Process the terminator and add the successor to `bb_ip`. Returns whether a backedge was seen.
+function process_terminator!(ir::IRCode, @nospecialize(inst), idx::Int, bb::Int,
+    all_rets::Vector{Int}, bb_ip::BitSetBoundedMinPrioritySet)
     if isa(inst, ReturnNode)
         if isdefined(inst, :val)
             push!(all_rets, idx)
@@ -295,43 +185,46 @@ function process_terminator!(ir::IRCode, idx::Int, bb::Int,
         return false
     elseif isa(inst, GotoNode)
         backedge = inst.label <= bb
-        !backedge && push!(ip, inst.label)
+        backedge || push!(bb_ip, inst.label)
         return backedge
     elseif isa(inst, GotoIfNot)
         backedge = inst.dest <= bb
-        !backedge && push!(ip, inst.dest)
-        push!(ip, bb + 1)
+        backedge || push!(bb_ip, inst.dest)
+        push!(bb_ip, bb+1)
         return backedge
     elseif isexpr(inst, :enter)
         dest = inst.args[1]::Int
         @assert dest > bb
-        push!(ip, dest)
-        push!(ip, bb + 1)
+        push!(bb_ip, dest)
+        push!(bb_ip, bb+1)
         return false
     else
-        push!(ip, bb + 1)
+        push!(bb_ip, bb+1)
         return false
     end
 end
 
-default_reprocess(interp::AbstractInterpreter, irsv::IRInterpretationState) = nothing
+default_reprocess(::AbstractInterpreter, ::IRInterpretationState) = nothing
 function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
     extra_reprocess::Union{Nothing,BitSet} = default_reprocess(interp, irsv))
+    interp = switch_to_irinterp(interp)
+
     (; ir, tpdum, ssa_refined) = irsv
 
     bbs = ir.cfg.blocks
-    ip = BitSetBoundedMinPrioritySet(length(bbs))
-    push!(ip, 1)
+    bb_ip = BitSetBoundedMinPrioritySet(length(bbs))
+    push!(bb_ip, 1)
     all_rets = Int[]
 
     # Fast path: Scan both use counts and refinement in one single pass of
     #            of the instructions. In the absence of backedges, this will
     #            converge.
-    while !isempty(ip)
-        bb = popfirst!(ip)
+    while !isempty(bb_ip)
+        bb = popfirst!(bb_ip)
         stmts = bbs[bb].stmts
         lstmt = last(stmts)
         for idx = stmts
+            irsv.curridx = idx
             inst = ir.stmts[idx][:inst]
             typ = ir.stmts[idx][:type]
             any_refined = false
@@ -354,18 +247,29 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
                 any_refined = true
                 delete!(ssa_refined, idx)
             end
+            is_terminator_or_phi = isa(inst, PhiNode) || isa(inst, GotoNode) || isa(inst, GotoIfNot) || isa(inst, ReturnNode) || isexpr(inst, :enter)
+            if typ === Bottom && (idx != lstmt || !is_terminator_or_phi)
+                continue
+            end
             if any_refined && reprocess_instruction!(interp,
-                idx, bb, inst, typ, irsv, extra_reprocess)
+                    idx, bb, inst, typ, irsv, extra_reprocess)
                 push!(ssa_refined, idx)
+                inst = ir.stmts[idx][:inst]
+                typ = ir.stmts[idx][:type]
             end
-            if idx == lstmt
-                if process_terminator!(ir, idx, bb, all_rets, ip)
-                    @goto residual_scan
+            if typ === Bottom && !is_terminator_or_phi
+                kill_terminator_edges!(irsv, lstmt, bb)
+                if idx != lstmt
+                    for idx2 in (idx+1:lstmt-1)
+                        ir[SSAValue(idx2)] = nothing
+                    end
+                    ir[SSAValue(lstmt)][:inst] = ReturnNode()
                 end
-            end
-            if typ === Bottom && !isa(inst, PhiNode)
                 break
             end
+            if idx == lstmt
+                process_terminator!(ir, inst, idx, bb, all_rets, bb_ip) && @goto residual_scan
+            end
         end
     end
     @goto compute_rt
@@ -378,11 +282,12 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
         end
 
         # Slow Path Phase 1.A: Complete use scanning
-        while !isempty(ip)
-            bb = popfirst!(ip)
+        while !isempty(bb_ip)
+            bb = popfirst!(bb_ip)
             stmts = bbs[bb].stmts
             lstmt = last(stmts)
             for idx = stmts
+                irsv.curridx = idx
                 inst = ir.stmts[idx][:inst]
                 for ur in userefs(inst)
                     val = ur[]
@@ -394,18 +299,19 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
                         count!(tpdum, val)
                     end
                 end
-                idx == lstmt && process_terminator!(ir, idx, bb, all_rets, ip)
+                idx == lstmt && process_terminator!(ir, inst, idx, bb, all_rets, bb_ip)
             end
         end
 
         # Slow Path Phase 1.B: Assemble def-use map
         complete!(tpdum)
-        push!(ip, 1)
-        while !isempty(ip)
-            bb = popfirst!(ip)
+        push!(bb_ip, 1)
+        while !isempty(bb_ip)
+            bb = popfirst!(bb_ip)
             stmts = bbs[bb].stmts
             lstmt = last(stmts)
             for idx = stmts
+                irsv.curridx = idx
                 inst = ir.stmts[idx][:inst]
                 for ur in userefs(inst)
                     val = ur[]
@@ -413,7 +319,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
                         push!(tpdum[val.id], idx)
                     end
                 end
-                idx == lstmt && process_terminator!(ir, idx, bb, all_rets, ip)
+                idx == lstmt && process_terminator!(ir, inst, idx, bb, all_rets, bb_ip)
             end
         end
 
@@ -425,6 +331,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
         end
         while !isempty(stmt_ip)
             idx = popfirst!(stmt_ip)
+            irsv.curridx = idx
             inst = ir.stmts[idx][:inst]
             typ = ir.stmts[idx][:type]
             if reprocess_instruction!(interp,
@@ -435,7 +342,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
     end
 
     begin @label compute_rt
-        ultimate_rt = Union{}
+        ultimate_rt = Bottom
         for idx in all_rets
             bb = block_for_inst(ir.cfg, idx)
             if bb != 1 && length(ir.cfg.blocks[bb].preds) == 0
@@ -444,31 +351,38 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
             end
             inst = ir.stmts[idx][:inst]::ReturnNode
             rt = argextype(inst.val, ir)
-            ultimate_rt = tmerge(optimizer_lattice(interp), ultimate_rt, rt)
+            ultimate_rt = tmerge(typeinf_lattice(interp), ultimate_rt, rt)
         end
     end
 
     nothrow = true
-    for i = 1:length(ir.stmts)
-        if (ir.stmts[i][:flag] & IR_FLAG_NOTHROW) == 0
+    for idx = 1:length(ir.stmts)
+        if (ir.stmts[idx][:flag] & IR_FLAG_NOTHROW) == 0
             nothrow = false
             break
         end
     end
 
-    return Pair{Any, Bool}(maybe_singleton_const(ultimate_rt), nothrow)
+    if last(irsv.valid_worlds) >= get_world_counter()
+        # if we aren't cached, we don't need this edge
+        # but our caller might, so let's just make it anyways
+        store_backedges(frame_instance(irsv), irsv.edges)
+    end
+
+    return Pair{Any,Bool}(maybe_singleton_const(ultimate_rt), nothrow)
 end
 
-function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState)
+function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
     if __measure_typeinf__[]
-        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, Any[], Any[], length(irsv.ir.argtypes))
+        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
         Timings.enter_new_timer(inf_frame)
-        v = _ir_abstract_constant_propagation(interp, irsv)
+        ret = _ir_abstract_constant_propagation(interp, irsv)
         append!(inf_frame.slottypes, irsv.ir.argtypes)
         Timings.exit_current_timer(inf_frame)
-        return v
+        return ret
     else
-        T = _ir_abstract_constant_propagation(interp, irsv)
-        return T
+        return _ir_abstract_constant_propagation(interp, irsv)
     end
 end
+ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
+    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 0ddefa4483eb1..e2c924d60cb83 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -2,7 +2,7 @@
 
 """
     inflate_ir!(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Inflates `ci::CodeInfo`-IR to `ir::IRCode`-format.
 This should be used with caution as it is a in-place transformation where the fields of
@@ -10,14 +10,10 @@ the original `ci::CodeInfo` are modified.
 """
 function inflate_ir!(ci::CodeInfo, linfo::MethodInstance)
     sptypes = sptypes_from_meth_instance(linfo)
-    if ci.inferred
-        argtypes, _ = matching_cache_argtypes(fallback_lattice, linfo)
-    else
-        argtypes = Any[ Any for i = 1:length(ci.slotflags) ]
-    end
+    argtypes, _ = matching_cache_argtypes(fallback_lattice, linfo)
     return inflate_ir!(ci, sptypes, argtypes)
 end
-function inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
+function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any})
     code = ci.code
     cfg = compute_basic_blocks(code)
     for i = 1:length(code)
@@ -51,20 +47,30 @@ end
 
 """
     inflate_ir(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
     inflate_ir(ci::CodeInfo) -> ir::IRCode
 
 Non-destructive version of `inflate_ir!`.
 Mainly used for testing or interactive use.
 """
 inflate_ir(ci::CodeInfo, linfo::MethodInstance) = inflate_ir!(copy(ci), linfo)
-inflate_ir(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
-inflate_ir(ci::CodeInfo) = inflate_ir(ci, Any[], Any[ ci.slottypes === nothing ? Any : (ci.slottypes::Vector{Any})[i] for i = 1:length(ci.slotflags) ])
+inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
+function inflate_ir(ci::CodeInfo)
+    parent = ci.parent
+    isa(parent, MethodInstance) && return inflate_ir(ci, parent)
+    # XXX the length of `ci.slotflags` may be different from the actual number of call
+    # arguments, but we really don't know that information in this case
+    argtypes = Any[ Any for i = 1:length(ci.slotflags) ]
+    return inflate_ir(ci, VarState[], argtypes)
+end
 
-function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
+function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
     @assert isempty(ir.new_nodes)
     # All but the first `nargs` slots will now be unused
+    nargs = length(ir.argtypes)
+    resize!(ci.slotnames, nargs)
     resize!(ci.slotflags, nargs)
+    resize!(ci.slottypes, nargs)
     stmts = ir.stmts
     code = ci.code = stmts.inst
     ssavaluetypes = ci.ssavaluetypes = stmts.type
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 4e93d6364749a..4bfb5f3fcde56 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -326,7 +326,7 @@ function already_inserted(compact::IncrementalCompact, old::OldSSAValue)
     end
     id -= length(compact.ir.stmts)
     if id < length(compact.ir.new_nodes)
-        error("")
+        return already_inserted(compact, OldSSAValue(compact.ir.new_nodes.info[id].pos))
     end
     id -= length(compact.ir.new_nodes)
     @assert id <= length(compact.pending_nodes)
@@ -604,21 +604,6 @@ function is_old(compact, @nospecialize(old_node_ssa))
         !already_inserted(compact, old_node_ssa)
 end
 
-mutable struct LazyGenericDomtree{IsPostDom}
-    ir::IRCode
-    domtree::GenericDomTree{IsPostDom}
-    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
-end
-function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
-    isdefined(x, :domtree) && return x.domtree
-    return @timeit "domtree 2" x.domtree = IsPostDom ?
-        construct_postdomtree(x.ir.cfg.blocks) :
-        construct_domtree(x.ir.cfg.blocks)
-end
-
-const LazyDomtree = LazyGenericDomtree{false}
-const LazyPostDomtree = LazyGenericDomtree{true}
-
 function perform_lifting!(compact::IncrementalCompact,
         visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key),
         lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
@@ -1046,6 +1031,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         end
 
         compact[idx] = val === nothing ? nothing : val.val
+        compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED
     end
 
     non_dce_finish!(compact)
@@ -1083,7 +1069,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
         end
         src = @atomic :monotonic code.inferred
     else
-        src = code
+        src = nothing
     end
 
     src = inlining_policy(inlining.interp, src, info, IR_FLAG_NULL, mi, Any[])
@@ -1099,7 +1085,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
     # TODO: Should there be a special line number node for inlined finalizers?
     inlined_at = ir[SSAValue(idx)][:line]
     ((sp_ssa, argexprs), linetable_offset) = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir,
-        ir.linetable, src, mi.sparam_vals, mi.def, inlined_at, argexprs)
+        ir.linetable, src, mi.sparam_vals, mi, inlined_at, argexprs)
 
     # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
     spvals = mi.sparam_vals
@@ -1166,12 +1152,10 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
     function note_block_use!(usebb::Int, useidx::Int)
         new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree),
             bb_insert_block, usebb)
-        if new_bb_insert_block == bb_insert_block == usebb
-            if bb_insert_idx !== nothing
-                bb_insert_idx = max(bb_insert_idx::Int, useidx)
-            else
-                bb_insert_idx = useidx
-            end
+        if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing
+            bb_insert_idx = max(bb_insert_idx::Int, useidx)
+        elseif new_bb_insert_block == usebb
+            bb_insert_idx = useidx
         else
             bb_insert_idx = nothing
         end
@@ -1203,7 +1187,7 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
     # Check #3
     dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
 
-    if !inlining.params.assume_fatal_throw
+    if !OptimizationParams(inlining.interp).assume_fatal_throw
         # Collect all reachable blocks between the finalizer registration and the
         # insertion point
         blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
@@ -1281,6 +1265,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         typ = unwrap_unionall(ir.stmts[newidx][:type])
         # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
+        typ = widenconst(typ)
         ismutabletype(typ) || continue
         typ = typ::DataType
         # First check for any finalizer calls
@@ -1394,6 +1379,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                     if use.kind === :getfield
                         ir[SSAValue(use.idx)][:inst] = compute_value_for_use(ir, domtree, allblocks,
                             du, phinodes, fidx, use.idx)
+                        ir[SSAValue(use.idx)][:flag] |= IR_FLAG_REFINED
                     elseif use.kind === :isdefined
                         continue # already rewritten if possible
                     elseif use.kind === :nopreserve
@@ -1708,8 +1694,8 @@ function type_lift_pass!(ir::IRCode)
             # a phi node (or an UpsilonNode() argument to a PhiC node), so lift
             # all these nodes that have maybe undef values
             val = stmt.args[(stmt.head === :isdefined) ? 1 : 2]
-            if stmt.head === :isdefined && (val isa Slot || val isa GlobalRef ||
-                    isexpr(val, :static_parameter) || val isa Argument || val isa Symbol)
+            if stmt.head === :isdefined && (val isa GlobalRef || isexpr(val, :static_parameter) ||
+                                            val isa Argument || val isa Symbol)
                 # this is a legal node, so assume it was not introduced by
                 # slot2ssa (at worst, we might leave in a runtime check that
                 # shouldn't have been there)
@@ -2185,14 +2171,10 @@ function cfg_simplify!(ir::IRCode)
         end
     end
 
-    compact = IncrementalCompact(ir, true)
     # Run instruction compaction to produce the result,
     # but we're messing with the CFG
     # so we don't want compaction to do so independently
-    compact.fold_constant_branches = false
-    compact.bb_rename_succ = bb_rename_succ
-    compact.bb_rename_pred = bb_rename_pred
-    compact.result_bbs = cresult_bbs
+    compact = IncrementalCompact(ir, CFGTransformState(true, false, cresult_bbs, bb_rename_pred, bb_rename_succ))
     result_idx = 1
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index f4d240f423e89..b420eb32b1205 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -48,7 +48,7 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         print(io, ", ")
         print(io, stmt.typ)
         print(io, ")")
-    elseif isexpr(stmt, :invoke)
+    elseif isexpr(stmt, :invoke) && length(stmt.args) >= 2 && isa(stmt.args[1], MethodInstance)
         stmt = stmt::Expr
         # TODO: why is this here, and not in Base.show_unquoted
         print(io, "invoke ")
@@ -171,10 +171,17 @@ function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show
     return nothing
 end
 
-normalize_method_name(m::Method) = m.name
-normalize_method_name(m::MethodInstance) = (m.def::Method).name
-normalize_method_name(m::Symbol) = m
-normalize_method_name(m) = Symbol("")
+function normalize_method_name(m)
+    if m isa Method
+        return m.name
+    elseif m isa MethodInstance
+        return (m.def::Method).name
+    elseif m isa Symbol
+        return m
+    else
+        return Symbol("")
+    end
+end
 @noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
 
 # converts the linetable for line numbers
@@ -796,7 +803,7 @@ function inline_linfo_printer(code::IRCode)
     end
 end
 
-_strip_color(s::String) = replace(s, r"\e\[\d+m" => "")
+_strip_color(s::String) = replace(s, r"\e\[\d+m"a => "")
 
 function statementidx_lineinfo_printer(f, code::IRCode)
     printer = f(code.linetable)
@@ -902,7 +909,7 @@ function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=defau
 
     # while compacting, the end of the active result bb will not have been determined
     # (this is done post-hoc by `finish_current_bb!`), so determine it here from scratch.
-    result_bbs = copy(compact.result_bbs)
+    result_bbs = copy(compact.cfg_transform.result_bbs)
     if compact.active_result_bb <= length(result_bbs)
         # count the total number of nodes we'll add to this block
         input_bb_idx = block_for_inst(compact.ir.cfg, compact.idx)
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 289cea14dc01d..757fa1b98bedc 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -1,5 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+struct TypedSlot
+    id::Int
+    typ
+    TypedSlot(id::Int, @nospecialize(typ)) = new(id, typ)
+end
+
 const UnoptSlot = Union{SlotNumber, TypedSlot}
 
 mutable struct SlotInfo
@@ -213,10 +219,10 @@ struct DelayedTyp
 end
 
 # maybe use expr_type?
-function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{Any}, idx::Int, slottypes::Vector{Any})
+function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{VarState}, idx::Int, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]::Int]
+            return sptypes[x.args[1]::Int].typ
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
@@ -229,7 +235,7 @@ function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{Any}, idx::
     isa(x, Argument) && return slottypes[x.n]
     isa(x, NewSSAValue) && return DelayedTyp(x)
     isa(x, QuoteNode) && return Const(x.value)
-    isa(x, Union{Symbol, PiNode, PhiNode, SlotNumber, TypedSlot}) && error("unexpected val type")
+    isa(x, Union{Symbol, PiNode, PhiNode, UnoptSlot}) && error("unexpected val type")
     return Const(x)
 end
 
@@ -567,7 +573,7 @@ function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
 end
 
 function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode,
-    sptypes::Vector{Any}, slottypes::Vector{Any}, nstmts::Int, 𝕃ₒ::AbstractLattice)
+    sptypes::Vector{VarState}, slottypes::Vector{Any}, nstmts::Int, 𝕃ₒ::AbstractLattice)
     new_typ = Union{}
     for i = 1:length(node.values)
         if isa(node, PhiNode) && !isassigned(node.values, i)
@@ -633,7 +639,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
 
     phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
     new_phi_nodes = Vector{NewPhiNode}[NewPhiNode[] for _ = 1:length(cfg.blocks)]
-    phi_ssas = SSAValue[]
     new_phic_nodes = IdDict{Int, Vector{NewPhiCNode}}()
     for (; leave_block) in catch_entry_blocks
         new_phic_nodes[leave_block] = NewPhiCNode[]
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index 7dc268f648bcc..bf06d6bb3e523 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -43,6 +43,12 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
                 error("")
             end
         end
+
+        use_inst = ir[op]
+        if isa(use_inst[:inst], Union{GotoIfNot, GotoNode, ReturnNode})
+            @verify_error "At statement %$use_idx: Invalid use of value statement or terminator %$(op.id)"
+            error("")
+        end
     elseif isa(op, GlobalRef)
         if !isdefined(op.mod, op.name) || !isconst(op.mod, op.name)
             @verify_error "Unbound GlobalRef not allowed in value position"
@@ -63,7 +69,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
     elseif isa(op, Union{OldSSAValue, NewSSAValue})
         @verify_error "Left over SSA marker"
         error("")
-    elseif isa(op, Union{SlotNumber, TypedSlot})
+    elseif isa(op, UnoptSlot)
         @verify_error "Left over slot detected in converted IR"
         error("")
     end
@@ -168,8 +174,16 @@ function verify_ir(ir::IRCode, print::Bool=true,
                     end
                     isa(stmt, PhiNode) || break
                 end
-                @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator ($terminator)"
-                error("")
+                termidx = last(block.stmts)
+                stmttyp = ir.stmts[termidx][:type]
+                if isempty(block.succs) && stmttyp == Union{}
+                    # Allow fallthrough terminators that are known to error to
+                    # be removed from the CFG. Ideally we'd add an unreachable
+                    # here, but that isn't always possible.
+                else
+                    @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator %$termidx ($terminator)::$stmttyp"
+                    error("")
+                end
             end
         end
     end
@@ -268,7 +282,7 @@ function verify_ir(ir::IRCode, print::Bool=true,
                 elseif stmt.head === :foreigncall
                     isforeigncall = true
                 elseif stmt.head === :isdefined && length(stmt.args) == 1 &&
-                        (stmt.args[1] isa GlobalRef || (stmt.args[1] isa Expr && stmt.args[1].head === :static_parameter))
+                        (stmt.args[1] isa GlobalRef || isexpr(stmt.args[1], :static_parameter))
                     # a GlobalRef or static_parameter isdefined check does not evaluate its argument
                     continue
                 elseif stmt.head === :call
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 23f8c3aba908e..9f55d56181838 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -56,11 +56,13 @@ nsplit_impl(info::UnionSplitInfo) = length(info.matches)
 getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1)
 getresult_impl(::UnionSplitInfo, ::Int) = nothing
 
-struct ConstPropResult
+abstract type ConstResult end
+
+struct ConstPropResult <: ConstResult
     result::InferenceResult
 end
 
-struct ConcreteResult
+struct ConcreteResult <: ConstResult
     mi::MethodInstance
     effects::Effects
     result
@@ -68,14 +70,12 @@ struct ConcreteResult
     ConcreteResult(mi::MethodInstance, effects::Effects, @nospecialize val) = new(mi, effects, val)
 end
 
-struct SemiConcreteResult
+struct SemiConcreteResult <: ConstResult
     mi::MethodInstance
     ir::IRCode
     effects::Effects
 end
 
-const ConstResult = Union{ConstPropResult, ConcreteResult, SemiConcreteResult}
-
 """
     info::ConstCallInfo <: CallInfo
 
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 1673929df1129..f894d4ab3f4a5 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -185,7 +185,6 @@ add_tfunc(add_float, 2, 2, math_tfunc, 1)
 add_tfunc(sub_float, 2, 2, math_tfunc, 1)
 add_tfunc(mul_float, 2, 2, math_tfunc, 4)
 add_tfunc(div_float, 2, 2, math_tfunc, 20)
-add_tfunc(rem_float, 2, 2, math_tfunc, 20)
 add_tfunc(fma_float, 3, 3, math_tfunc, 5)
 add_tfunc(muladd_float, 3, 3, math_tfunc, 5)
 
@@ -195,7 +194,6 @@ add_tfunc(add_float_fast, 2, 2, math_tfunc, 1)
 add_tfunc(sub_float_fast, 2, 2, math_tfunc, 1)
 add_tfunc(mul_float_fast, 2, 2, math_tfunc, 2)
 add_tfunc(div_float_fast, 2, 2, math_tfunc, 10)
-add_tfunc(rem_float_fast, 2, 2, math_tfunc, 10)
 
 # bitwise operators
 # -----------------
@@ -347,8 +345,17 @@ end
 end
 add_tfunc(===, 2, 2, egal_tfunc, 1)
 
+function isdefined_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any})
+    if length(argtypes) ≠ 2
+        # TODO prove nothrow when ordering is specified
+        return false
+    end
+    return isdefined_nothrow(𝕃, argtypes[1], argtypes[2])
+end
 @nospecs function isdefined_nothrow(𝕃::AbstractLattice, x, name)
     ⊑ = Core.Compiler.:⊑(𝕃)
+    isvarargtype(x) && return false
+    isvarargtype(name) && return false
     if hasintersect(widenconst(x), Module)
         return name ⊑ Symbol
     else
@@ -582,7 +589,6 @@ end
     return true
 end
 add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
-add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 100)
 
 @nospecs function arraysize_tfunc(𝕃::AbstractLattice, ary, dim)
     hasintersect(widenconst(ary), Array) || return Bottom
@@ -940,7 +946,7 @@ function getfield_boundscheck((; fargs, argtypes)::ArgInfo) # Symbol
     return :unknown
 end
 
-function getfield_nothrow(arginfo::ArgInfo, boundscheck::Symbol=getfield_boundscheck(arginfo))
+function getfield_nothrow(𝕃::AbstractLattice, arginfo::ArgInfo, boundscheck::Symbol=getfield_boundscheck(arginfo))
     (;argtypes) = arginfo
     boundscheck === :unknown && return false
     ordering = Const(:not_atomic)
@@ -954,24 +960,22 @@ function getfield_nothrow(arginfo::ArgInfo, boundscheck::Symbol=getfield_boundsc
     elseif length(argtypes) != 3
         return false
     end
-    isvarargtype(ordering) && return false
-    widenconst(ordering) === Symbol || return false
-    if isa(ordering, Const)
-        ordering = ordering.val::Symbol
-        if ordering !== :not_atomic # TODO: this is assuming not atomic
-            return false
-        end
-        return getfield_nothrow(argtypes[2], argtypes[3], !(boundscheck === :off))
-    else
+    isa(ordering, Const) || return false
+    ordering = ordering.val
+    isa(ordering, Symbol) || return false
+    if ordering !== :not_atomic # TODO: this is assuming not atomic
         return false
     end
+    return getfield_nothrow(𝕃, argtypes[2], argtypes[3], !(boundscheck === :off))
 end
-@nospecs function getfield_nothrow(s00, name, boundscheck::Bool)
+@nospecs function getfield_nothrow(𝕃::AbstractLattice, s00, name, boundscheck::Bool)
     # If we don't have boundscheck off and don't know the field, don't even bother
     if boundscheck
         isa(name, Const) || return false
     end
 
+    ⊑ = Core.Compiler.:⊑(𝕃)
+
     # If we have s00 being a const, we can potentially refine our type-based analysis above
     if isa(s00, Const) || isconstType(s00)
         if !isa(s00, Const)
@@ -987,31 +991,32 @@ end
             end
             return isdefined(sv, nval)
         end
-        if !boundscheck && !isa(sv, Module)
-            # If bounds checking is disabled and all fields are assigned,
-            # we may assume that we don't throw
-            for i = 1:fieldcount(typeof(sv))
-                isdefined(sv, i) || return false
-            end
-            return true
+        boundscheck && return false
+        # If bounds checking is disabled and all fields are assigned,
+        # we may assume that we don't throw
+        isa(sv, Module) && return false
+        name ⊑ Int || name ⊑ Symbol || return false
+        for i = 1:fieldcount(typeof(sv))
+            isdefined(sv, i) || return false
         end
-        return false
+        return true
     end
 
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
-        return getfield_nothrow(rewrap_unionall(s.a, s00), name, boundscheck) &&
-               getfield_nothrow(rewrap_unionall(s.b, s00), name, boundscheck)
+        return getfield_nothrow(𝕃, rewrap_unionall(s.a, s00), name, boundscheck) &&
+               getfield_nothrow(𝕃, rewrap_unionall(s.b, s00), name, boundscheck)
     elseif isType(s) && isTypeDataType(s.parameters[1])
         s = s0 = DataType
     end
     if isa(s, DataType)
         # Can't say anything about abstract types
         isabstracttype(s) && return false
-        # If all fields are always initialized, and bounds check is disabled, we can assume
-        # we don't throw
+        # If all fields are always initialized, and bounds check is disabled,
+        # we can assume we don't throw
         if !boundscheck && s.name.n_uninitialized == 0
+            name ⊑ Int || name ⊑ Symbol || return false
             return true
         end
         # Else we need to know what the field is
@@ -1365,7 +1370,7 @@ end
     PT = Const(Pair)
     return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T, T))[1]
 end
-function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::InferenceState)
+function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
         nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo())
@@ -1631,6 +1636,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe
     (headtype === Union) && return true
     isa(rt, Const) && return true
     u = headtype
+    # TODO: implement optimization for isvarargtype(u) and istuple occurences (which are valid but are not UnionAll)
     for i = 2:length(argtypes)
         isa(u, UnionAll) || return false
         ai = widenconditional(argtypes[i])
@@ -1741,6 +1747,20 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
     canconst = true
     tparams = Any[]
     outervars = TypeVar[]
+
+    # first push the tailing vars from headtype into outervars
+    outer_start, ua = 0, headtype
+    while isa(ua, UnionAll)
+        if (outer_start += 1) > largs
+            push!(outervars, ua.var)
+        end
+        ua = ua.body
+    end
+    if largs > outer_start && isa(headtype, UnionAll) # e.g. !isvarargtype(ua) && !istuple
+        return Bottom # too many arguments
+    end
+    outer_start = outer_start - largs + 1
+
     varnamectr = 1
     ua = headtype
     for i = 1:largs
@@ -1757,34 +1777,61 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
             push!(tparams, ai.tv)
         else
             uncertain = true
-            # These blocks improve type info but make compilation a bit slower.
-            # XXX
-            #unw = unwrap_unionall(ai)
-            #isT = isType(unw)
-            #if isT && isa(ai,UnionAll) && contains_is(outervars, ai.var)
-            #    ai = rename_unionall(ai)
-            #    unw = unwrap_unionall(ai)
-            #end
-            ai_w = widenconst(ai)
-            ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai)[1] : Any
+            unw = unwrap_unionall(ai)
+            isT = isType(unw)
+            # compute our desired upper bound value
+            if isT
+                ub = rewrap_unionall(unw.parameters[1], ai)
+            else
+                ub = Any
+            end
+            if !istuple && unionall_depth(ai) > 3
+                # Heuristic: if we are adding more than N unknown parameters here to the
+                # outer type, use the wrapper type, instead of letting it nest more
+                # complexity here. This is not monotonic, but seems to work out pretty well.
+                if isT
+                    ub = unwrap_unionall(unw.parameters[1])
+                    if ub isa DataType
+                        ub = ub.name.wrapper
+                        unw = Type{unwrap_unionall(ub)}
+                        ai = rewrap_unionall(unw, ub)
+                    else
+                        isT = false
+                        ai = unw = ub = Any
+                    end
+                else
+                    isT = false
+                    ai = unw = ub = Any
+                end
+            elseif !isT
+                # if we didn't have isType to compute ub directly, try to use instanceof_tfunc to refine this guess
+                ai_w = widenconst(ai)
+                ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai)[1] : Any
+            end
             if istuple
                 # in the last parameter of a Tuple type, if the upper bound is Any
                 # then this could be a Vararg type.
                 if i == largs && ub === Any
-                    push!(tparams, Vararg)
-                # XXX
-                #elseif isT
-                #    push!(tparams, rewrap_unionall(unw.parameters[1], ai))
-                else
-                    push!(tparams, Any)
+                    ub = Vararg
+                end
+                push!(tparams, ub)
+            elseif isT
+                tai = ai
+                while isa(tai, UnionAll)
+                    # make sure vars introduced here are unique
+                    if contains_is(outervars, tai.var)
+                        ai = rename_unionall(ai)
+                        unw = unwrap_unionall(ai)::DataType
+                        # ub = rewrap_unionall(unw, ai)
+                        break
+                    end
+                    tai = tai.body
+                end
+                push!(tparams, unw.parameters[1])
+                while isa(ai, UnionAll)
+                    push!(outervars, ai.var)
+                    ai = ai.body
                 end
-            # XXX
-            #elseif isT
-            #    push!(tparams, unw.parameters[1])
-            #    while isa(ai, UnionAll)
-            #        push!(outervars, ai.var)
-            #        ai = ai.body
-            #    end
             else
                 # Is this the second parameter to a NamedTuple?
                 if isa(uw, DataType) && uw.name === _NAMEDTUPLE_NAME && isa(ua, UnionAll) && uw.parameters[2] === ua.var
@@ -1804,19 +1851,40 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
                 push!(outervars, v)
             end
         end
-        if isa(ua, UnionAll)
+        if ua isa UnionAll
             ua = ua.body
-        else
-            ua = nothing
+            #otherwise, sometimes ua isa Vararg (Core.TypeofVararg) or Tuple (DataType)
         end
     end
     local appl
     try
         appl = apply_type(headtype, tparams...)
     catch ex
-        # type instantiation might fail if one of the type parameters
-        # doesn't match, which could happen if a type estimate is too coarse
-        return isvarargtype(headtype) ? TypeofVararg : Type{<:headtype}
+        # type instantiation might fail if one of the type parameters doesn't
+        # match, which could happen only if a type estimate is too coarse
+        # and might guess a concrete value while the actual type for it is Bottom
+        if !uncertain
+            return Union{}
+        end
+        canconst = false
+        uncertain = true
+        empty!(outervars)
+        outer_start = 1
+        # FIXME: if these vars are substituted with TypeVar here, the result
+        # might be wider than the input, so should we use the `.name.wrapper`
+        # object here instead, to replace all of these outervars with
+        # unconstrained ones? Note that this code is nearly unreachable though,
+        # and possibly should simply return Union{} here also, since
+        # `apply_type` is already quite conservative about detecting and
+        # throwing errors.
+        appl = headtype
+        if isa(appl, UnionAll)
+            for _ = 1:largs
+                appl = appl::UnionAll
+                push!(outervars, appl.var)
+                appl = appl.body
+            end
+        end
     end
     !uncertain && canconst && return Const(appl)
     if isvarargtype(appl)
@@ -1826,7 +1894,7 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
         return Type{<:appl}
     end
     ans = Type{appl}
-    for i = length(outervars):-1:1
+    for i = length(outervars):-1:outer_start
         ans = UnionAll(outervars[i], ans)
     end
     return ans
@@ -1836,7 +1904,15 @@ add_tfunc(apply_type, 1, INT_INF, apply_type_tfunc, 10)
 # convert the dispatch tuple type argtype to the real (concrete) type of
 # the tuple of those values
 function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
+    isempty(argtypes) && return Const(())
     argtypes = anymap(widenslotwrapper, argtypes)
+    if isvarargtype(argtypes[end]) && unwrapva(argtypes[end]) === Union{}
+        # Drop the Vararg in Tuple{...,Vararg{Union{}}} since it must be length 0.
+        # If there is a Vararg num also, it must be a TypeVar, and it must be
+        # zero, but that generally shouldn't show up here, since it implies a
+        # UnionAll context is missing around this.
+        pop!(argtypes)
+    end
     all_are_const = true
     for i in 1:length(argtypes)
         if !isa(argtypes[i], Const)
@@ -1879,6 +1955,8 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
                 params[i] = x
             elseif !isvarargtype(x) && hasintersect(x, Type)
                 params[i] = Union{x, Type}
+            elseif x === Union{}
+                return Bottom # argtypes is malformed, but try not to crash
             else
                 params[i] = x
             end
@@ -1934,7 +2012,7 @@ function array_elmtype(@nospecialize ary)
     return Any
 end
 
-@nospecs function _opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, linfo::MethodInstance)
+@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, linfo::MethodInstance)
     argt, argt_exact = instanceof_tfunc(arg)
     lbt, lb_exact = instanceof_tfunc(lb)
     if !lb_exact
@@ -1963,18 +2041,20 @@ function array_type_undefable(@nospecialize(arytype))
     end
 end
 
-function array_builtin_common_nothrow(argtypes::Vector{Any}, first_idx_idx::Int)
+function array_builtin_common_nothrow(argtypes::Vector{Any}, first_idx_idx::Int, isarrayref::Bool)
     length(argtypes) >= 4 || return false
     boundscheck = argtypes[1]
     arytype = argtypes[2]
     array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false
-    # If we could potentially throw undef ref errors, bail out now.
-    arytype = widenconst(arytype)
-    array_type_undefable(arytype) && return false
+    if isarrayref
+        # If we could potentially throw undef ref errors, bail out now.
+        arytype = widenconst(arytype)
+        array_type_undefable(arytype) && return false
+    end
     # If we have @inbounds (first argument is false), we're allowed to assume
     # we don't throw bounds errors.
     if isa(boundscheck, Const)
-        !(boundscheck.val::Bool) && return true
+        boundscheck.val::Bool || return true
     end
     # Else we can't really say anything here
     # TODO: In the future we may be able to track the shapes of arrays though
@@ -2006,11 +2086,11 @@ end
 @nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt)
     ⊑ = Core.Compiler.:⊑(𝕃)
     if f === arrayset
-        array_builtin_common_nothrow(argtypes, 4) || return false
+        array_builtin_common_nothrow(argtypes, 4, #=isarrayref=#false) || return false
         # Additionally check element type compatibility
         return arrayset_typecheck(argtypes[2], argtypes[3])
     elseif f === arrayref || f === const_arrayref
-        return array_builtin_common_nothrow(argtypes, 3)
+        return array_builtin_common_nothrow(argtypes, 3, #=isarrayref=#true)
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
         return argtypes[1] ⊑ Symbol
@@ -2029,7 +2109,7 @@ end
     elseif f === invoke
         return false
     elseif f === getfield
-        return getfield_nothrow(ArgInfo(nothing, Any[Const(f), argtypes...]))
+        return getfield_nothrow(𝕃, ArgInfo(nothing, Any[Const(f), argtypes...]))
     elseif f === setfield!
         if na == 3
             return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
@@ -2051,8 +2131,7 @@ end
     elseif f === UnionAll
         return na == 2 && (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
     elseif f === isdefined
-        na == 2 || return false
-        return isdefined_nothrow(𝕃, argtypes[1], argtypes[2])
+        return isdefined_nothrow(𝕃, argtypes)
     elseif f === Core.sizeof
         na == 1 || return false
         return sizeof_nothrow(argtypes[1])
@@ -2093,14 +2172,12 @@ end
 end
 
 # known to be always effect-free (in particular nothrow)
-const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
-
-# known to be effect-free (but not necessarily nothrow)
-const _EFFECT_FREE_BUILTINS = [
-    fieldtype, apply_type, isa, UnionAll,
-    getfield, arrayref, const_arrayref, isdefined, Core.sizeof,
-    Core.ifelse, Core._typevar, (<:),
-    typeassert, throw, arraysize, getglobal, compilerbarrier
+const _PURE_BUILTINS = Any[
+    tuple,
+    svec,
+    ===,
+    typeof,
+    nfields,
 ]
 
 const _CONSISTENT_BUILTINS = Any[
@@ -2118,14 +2195,35 @@ const _CONSISTENT_BUILTINS = Any[
     (<:),
     typeassert,
     throw,
-    setfield!
+    setfield!,
+    donotdelete
+]
+
+# known to be effect-free (but not necessarily nothrow)
+const _EFFECT_FREE_BUILTINS = [
+    fieldtype,
+    apply_type,
+    isa,
+    UnionAll,
+    getfield,
+    arrayref,
+    arraysize,
+    const_arrayref,
+    isdefined,
+    Core.sizeof,
+    Core.ifelse,
+    Core._typevar,
+    (<:),
+    typeassert,
+    throw,
+    getglobal,
+    compilerbarrier,
 ]
 
 const _INACCESSIBLEMEM_BUILTINS = Any[
     (<:),
     (===),
     apply_type,
-    arraysize,
     Core.ifelse,
     Core.sizeof,
     svec,
@@ -2138,12 +2236,14 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
     typeassert,
     typeof,
     compilerbarrier,
-    Core._typevar
+    Core._typevar,
+    donotdelete
 ]
 
 const _ARGMEM_BUILTINS = Any[
     arrayref,
     arrayset,
+    arraysize,
     modifyfield!,
     replacefield!,
     setfield!,
@@ -2152,7 +2252,7 @@ const _ARGMEM_BUILTINS = Any[
 
 const _INCONSISTENT_INTRINSICS = Any[
     Intrinsics.pointerref,      # this one is volatile
-    Intrinsics.arraylen,        # this one is volatile
+    Intrinsics.sqrt_llvm_fast,  # this one may differ at runtime (by a few ulps)
     Intrinsics.have_fma,        # this one depends on the runtime environment
     Intrinsics.cglobal,         # cglobal lookup answer changes at runtime
     # ... and list fastmath intrinsics:
@@ -2165,9 +2265,8 @@ const _INCONSISTENT_INTRINSICS = Any[
     Intrinsics.mul_float_fast,
     Intrinsics.ne_float_fast,
     Intrinsics.neg_float_fast,
-    Intrinsics.rem_float_fast,
     Intrinsics.sqrt_llvm_fast,
-    Intrinsics.sub_float_fast
+    Intrinsics.sub_float_fast,
     # TODO needs to revive #31193 to mark this as inconsistent to be accurate
     # while preserving the currently optimizations for many math operations
     # Intrinsics.muladd_float,    # this is not interprocedurally consistent
@@ -2180,14 +2279,39 @@ const _SPECIAL_BUILTINS = Any[
 function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
     # consistent if the first arg is immutable
     na = length(argtypes)
-    na == 0 && return EFFECTS_THROWS
-    obj = argtypes[1]
-    consistent = is_immutable_argtype(unwrapva(obj)) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = !isvarargtype(argtypes[end]) && na == 2 && isdefined_nothrow(𝕃, obj, argtypes[2])
-    return Effects(EFFECTS_TOTAL; consistent, nothrow)
+    2 ≤ na ≤ 3 || return EFFECTS_THROWS
+    obj, sym = argtypes
+    wobj = unwrapva(obj)
+    consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
+    if is_immutable_argtype(wobj)
+        consistent = ALWAYS_TRUE
+    else
+        # Bindings/fields are not allowed to transition from defined to undefined, so even
+        # if the object is not immutable, we can prove `:consistent`-cy if it is defined:
+        if isa(wobj, Const) && isa(sym, Const)
+            objval = wobj.val
+            symval = sym.val
+            if isa(objval, Module)
+                if isa(symval, Symbol) && isdefined(objval, symval)
+                    consistent = ALWAYS_TRUE
+                end
+            elseif (isa(symval, Symbol) || isa(symval, Int)) && isdefined(objval, symval)
+                consistent = ALWAYS_TRUE
+            end
+        end
+    end
+    nothrow = isdefined_nothrow(𝕃, argtypes)
+    if hasintersect(widenconst(wobj), Module)
+        inaccessiblememonly = ALWAYS_FALSE
+    elseif is_mutation_free_argtype(wobj)
+        inaccessiblememonly = ALWAYS_TRUE
+    else
+        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
+    end
+    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
 end
 
-function getfield_effects(arginfo::ArgInfo, @nospecialize(rt))
+function getfield_effects(𝕃::AbstractLattice, arginfo::ArgInfo, @nospecialize(rt))
     (;argtypes) = arginfo
     # consistent if the argtype is immutable
     length(argtypes) < 3 && return EFFECTS_THROWS
@@ -2203,9 +2327,9 @@ function getfield_effects(arginfo::ArgInfo, @nospecialize(rt))
     if !(length(argtypes) ≥ 3 && getfield_notundefined(obj, argtypes[3]))
         consistent = ALWAYS_FALSE
     end
-    nothrow = getfield_nothrow(arginfo, :on)
+    bcheck = getfield_boundscheck(arginfo)
+    nothrow = getfield_nothrow(𝕃, arginfo, bcheck)
     if !nothrow
-        bcheck = getfield_boundscheck(arginfo)
         if !(bcheck === :on || bcheck === :boundscheck)
             # If we cannot independently prove inboundsness, taint consistency.
             # The inbounds-ness assertion requires dynamic reachability, while
@@ -2256,7 +2380,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argin
     @assert !contains_is(_SPECIAL_BUILTINS, f)
 
     if f === getfield
-        return getfield_effects(arginfo, rt)
+        return getfield_effects(𝕃, arginfo, rt)
     end
     argtypes = arginfo.argtypes[2:end]
 
@@ -2269,8 +2393,15 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argin
         effect_free = get_binding_type_effect_free(argtypes[1], argtypes[2]) ? ALWAYS_TRUE : ALWAYS_FALSE
         return Effects(EFFECTS_TOTAL; effect_free)
     else
-        consistent = contains_is(_CONSISTENT_BUILTINS, f) ? ALWAYS_TRUE :
-            (f === Core._typevar) ? CONSISTENT_IF_NOTRETURNED : ALWAYS_FALSE
+        if contains_is(_CONSISTENT_BUILTINS, f)
+            consistent = ALWAYS_TRUE
+        elseif f === arrayref || f === arrayset || f === arraysize
+            consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
+        elseif f === Core._typevar
+            consistent = CONSISTENT_IF_NOTRETURNED
+        else
+            consistent = ALWAYS_FALSE
+        end
         if f === setfield! || f === arrayset
             effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY
         elseif contains_is(_EFFECT_FREE_BUILTINS, f) || contains_is(_PURE_BUILTINS, f)
@@ -2278,7 +2409,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argin
         else
             effect_free = ALWAYS_FALSE
         end
-        nothrow = (!(!isempty(argtypes) && isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt))
+        nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt)
         if contains_is(_INACCESSIBLEMEM_BUILTINS, f)
             inaccessiblememonly = ALWAYS_TRUE
         elseif contains_is(_ARGMEM_BUILTINS, f)
@@ -2297,7 +2428,7 @@ function builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f), argtypes::Vect
 end
 
 function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
-                           sv::Union{InferenceState,IRCode,Nothing})
+                           sv::Union{AbsIntState, Nothing})
     𝕃ᵢ = typeinf_lattice(interp)
     if f === tuple
         return tuple_tfunc(𝕃ᵢ, argtypes)
@@ -2458,73 +2589,182 @@ function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
         return Effects()
     end
 
-    consistent = contains_is(_INCONSISTENT_INTRINSICS, f) ? ALWAYS_FALSE : ALWAYS_TRUE
+    if contains_is(_INCONSISTENT_INTRINSICS, f)
+        consistent = ALWAYS_FALSE
+    elseif f === arraylen
+        consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
+    else
+        consistent = ALWAYS_TRUE
+    end
     effect_free = !(f === Intrinsics.pointerset) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = (!(!isempty(argtypes) && isvarargtype(argtypes[end])) && intrinsic_nothrow(f, argtypes))
-
-    return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow)
+    nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && intrinsic_nothrow(f, argtypes)
+    if f === arraylen
+        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
+    else
+        inaccessiblememonly = ALWAYS_TRUE
+    end
+    return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
 end
 
 # TODO: this function is a very buggy and poor model of the return_type function
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
-function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::Union{InferenceState, IRCode})
+function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
+    UNKNOWN = CallMeta(Type, EFFECTS_THROWS, NoCallInfo())
+    if !(2 <= length(argtypes) <= 3)
+        return UNKNOWN
+    end
+
+    tt = widenslotwrapper(argtypes[end])
+    if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
+        return UNKNOWN
+    end
+
+    af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
+    if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
+        return UNKNOWN
+    end
+
     if length(argtypes) == 3
-        tt = widenslotwrapper(argtypes[3])
-        if isa(tt, Const) || (isType(tt) && !has_free_typevars(tt))
-            aft = widenslotwrapper(argtypes[2])
-            if isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
-                   (isconcretetype(aft) && !(aft <: Builtin))
-                af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
-                if isa(af_argtype, DataType) && af_argtype <: Tuple
-                    argtypes_vec = Any[aft, af_argtype.parameters...]
-                    if contains_is(argtypes_vec, Union{})
-                        return CallMeta(Const(Union{}), EFFECTS_TOTAL, NoCallInfo())
-                    end
-                    #
-                    # Run the abstract_call without restricting abstract call
-                    # sites. Otherwise, our behavior model of abstract_call
-                    # below will be wrong.
-                    if isa(sv, InferenceState)
-                        old_restrict = sv.restrict_abstract_call_sites
-                        sv.restrict_abstract_call_sites = false
-                        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1)
-                        sv.restrict_abstract_call_sites = old_restrict
-                    else
-                        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1)
-                    end
-                    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-                    rt = widenslotwrapper(call.rt)
-                    if isa(rt, Const)
-                        # output was computed to be constant
-                        return CallMeta(Const(typeof(rt.val)), EFFECTS_TOTAL, info)
-                    end
-                    rt = widenconst(rt)
-                    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-                        # output cannot be improved so it is known for certain
-                        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-                    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
-                        # conservatively express uncertainty of this result
-                        # in two ways: both as being a subtype of this, and
-                        # because of LimitedAccuracy causes
-                        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
-                    elseif (isa(tt, Const) || isconstType(tt)) &&
-                        (isa(aft, Const) || isconstType(aft))
-                        # input arguments were known for certain
-                        # XXX: this doesn't imply we know anything about rt
-                        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-                    elseif isType(rt)
-                        return CallMeta(Type{rt}, EFFECTS_TOTAL, info)
-                    else
-                        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
-                    end
-                end
+        aft = widenslotwrapper(argtypes[2])
+        if !isa(aft, Const) && !(isType(aft) && !has_free_typevars(aft)) &&
+                !(isconcretetype(aft) && !(aft <: Builtin))
+            return UNKNOWN
+        end
+        argtypes_vec = Any[aft, af_argtype.parameters...]
+    else
+        argtypes_vec = Any[af_argtype.parameters...]
+    end
+
+    if contains_is(argtypes_vec, Union{})
+        return CallMeta(Const(Union{}), EFFECTS_TOTAL, NoCallInfo())
+    end
+
+    # Run the abstract_call without restricting abstract call
+    # sites. Otherwise, our behavior model of abstract_call
+    # below will be wrong.
+    if isa(sv, InferenceState)
+        old_restrict = sv.restrict_abstract_call_sites
+        sv.restrict_abstract_call_sites = false
+        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1)
+        sv.restrict_abstract_call_sites = old_restrict
+    else
+        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1)
+    end
+    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
+    rt = widenslotwrapper(call.rt)
+    if isa(rt, Const)
+        # output was computed to be constant
+        return CallMeta(Const(typeof(rt.val)), EFFECTS_TOTAL, info)
+    end
+    rt = widenconst(rt)
+    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+        # output cannot be improved so it is known for certain
+        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
+    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+        # conservatively express uncertainty of this result
+        # in two ways: both as being a subtype of this, and
+        # because of LimitedAccuracy causes
+        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
+    elseif (isa(tt, Const) || isconstType(tt)) &&
+        (isa(aft, Const) || isconstType(aft))
+        # input arguments were known for certain
+        # XXX: this doesn't imply we know anything about rt
+        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
+    elseif isType(rt)
+        return CallMeta(Type{rt}, EFFECTS_TOTAL, info)
+    else
+        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
+    end
+end
+
+# a simplified model of abstract_call_gf_by_type for applicable
+function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
+                             sv::AbsIntState, max_methods::Int)
+    length(argtypes) < 2 && return CallMeta(Union{}, EFFECTS_UNKNOWN, NoCallInfo())
+    isvarargtype(argtypes[2]) && return CallMeta(Bool, EFFECTS_UNKNOWN, NoCallInfo())
+    argtypes = argtypes[2:end]
+    atype = argtypes_to_type(argtypes)
+    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
+        InferenceParams(interp).max_union_splitting, max_methods)
+    if isa(matches, FailedMethodMatch)
+        rt = Bool # too many matches to analyze
+    else
+        (; valid_worlds, applicable) = matches
+        update_valid_age!(sv, valid_worlds)
+
+        # also need an edge to the method table in case something gets
+        # added that did not intersect with any existing method
+        if isa(matches, MethodMatches)
+            matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
+        else
+            for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
+                thisfullmatch || add_mt_backedge!(sv, mt, atype)
+            end
+        end
+
+        napplicable = length(applicable)
+        if napplicable == 0
+            rt = Const(false) # never any matches
+        else
+            rt = Const(true) # has applicable matches
+            for i in 1:napplicable
+                match = applicable[i]::MethodMatch
+                edge = specialize_method(match)::MethodInstance
+                add_backedge!(sv, edge)
+            end
+
+            if isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
+                    (!all(matches.fullmatches) || any_ambig(matches))
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                rt = Bool
             end
         end
     end
-    return CallMeta(Type, EFFECTS_THROWS, NoCallInfo())
+    return CallMeta(rt, EFFECTS_TOTAL, NoCallInfo())
+end
+add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
+
+# a simplified model of abstract_invoke for Core._hasmethod
+function _hasmethod_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
+    if length(argtypes) == 3 && !isvarargtype(argtypes[3])
+        ft′ = argtype_by_index(argtypes, 2)
+        ft = widenconst(ft′)
+        ft === Bottom && return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+        typeidx = 3
+    elseif length(argtypes) == 2 && !isvarargtype(argtypes[2])
+        typeidx = 2
+    else
+        return CallMeta(Any, Effects(), NoCallInfo())
+    end
+    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, typeidx))
+    isexact || return CallMeta(Bool, Effects(), NoCallInfo())
+    unwrapped = unwrap_unionall(types)
+    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
+        return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+    end
+    if typeidx == 3
+        isdispatchelem(ft) || return CallMeta(Bool, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+        types = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
+    end
+    mt = ccall(:jl_method_table_for, Any, (Any,), types)
+    if !isa(mt, MethodTable)
+        return CallMeta(Bool, EFFECTS_THROWS, NoCallInfo())
+    end
+    match, valid_worlds, overlayed = findsup(types, method_table(interp))
+    update_valid_age!(sv, valid_worlds)
+    if match === nothing
+        rt = Const(false)
+        add_mt_backedge!(sv, mt, types) # this should actually be an invoke-type backedge
+    else
+        rt = Const(true)
+        edge = specialize_method(match)::MethodInstance
+        add_invoke_backedge!(sv, types, edge)
+    end
+    return CallMeta(rt, EFFECTS_TOTAL, NoCallInfo())
 end
 
+
 # N.B.: typename maps type equivalence classes to a single value
 function typename_static(@nospecialize(t))
     t isa Const && return _typename(t.val)
@@ -2640,9 +2880,25 @@ function foreigncall_effects(@specialize(abstract_eval), e::Expr)
             return new_array_effects(abstract_eval, args)
         end
     end
+    if is_array_resize(name)
+        return array_resize_effects()
+    end
     return EFFECTS_UNKNOWN
 end
 
+function is_array_resize(name::Symbol)
+    return name === :jl_array_grow_beg || name === :jl_array_grow_end ||
+           name === :jl_array_del_beg || name === :jl_array_del_end ||
+           name === :jl_array_grow_at || name === :jl_array_del_at
+end
+
+function array_resize_effects()
+    return Effects(EFFECTS_TOTAL;
+        effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY,
+        nothrow = false,
+        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY)
+end
+
 function alloc_array_ndims(name::Symbol)
     if name === :jl_alloc_array_1d
         return 1
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index d9443045e9e89..7d983ec5420db 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -29,7 +29,7 @@ using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inlin
 struct InferenceFrameInfo
     mi::Core.MethodInstance
     world::UInt64
-    sptypes::Vector{Any}
+    sptypes::Vector{Core.Compiler.VarState}
     slottypes::Vector{Any}
     nargs::Int
 end
@@ -89,7 +89,7 @@ function reset_timings()
     empty!(_timings)
     push!(_timings, Timing(
         # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Any[], Any[Core.Const(ROOT)], 1),
+        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
         _time_ns()))
     return nothing
 end
@@ -204,8 +204,9 @@ If set to `true`, record per-method-instance timings within type inference in th
 __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
 const __measure_typeinf__ = fill(false)
 
-# Wrapper around _typeinf that optionally records the exclusive time for each invocation.
-function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+# Wrapper around `_typeinf` that optionally records the exclusive time for
+# each inference performed by `NativeInterpreter`.
+function typeinf(interp::NativeInterpreter, frame::InferenceState)
     if __measure_typeinf__[]
         Timings.enter_new_timer(frame)
         v = _typeinf(interp, frame)
@@ -215,6 +216,7 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
         return _typeinf(interp, frame)
     end
 end
+typeinf(interp::AbstractInterpreter, frame::InferenceState) = _typeinf(interp, frame)
 
 function finish!(interp::AbstractInterpreter, caller::InferenceResult)
     # If we didn't transform the src for caching, we may have to transform
@@ -241,6 +243,7 @@ function finish!(interp::AbstractInterpreter, caller::InferenceResult)
 end
 
 function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
+    interp = switch_from_irinterp(interp)
     typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
     # with no active ip's, frame is done
     frames = frame.callers_in_cycle
@@ -255,8 +258,6 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     for caller in frames
         caller.valid_worlds = valid_worlds
         finish(caller, interp)
-        # finalize and record the linfo result
-        caller.inferred = true
     end
     # collect results for the new expanded frame
     results = Tuple{InferenceResult, Vector{Any}, Bool}[
@@ -268,15 +269,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     for (caller, _, _) in results
         opt = caller.src
         if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
-            analyzed = optimize(interp, opt, OptimizationParams(interp), caller)
-            if isa(analyzed, ConstAPI)
-                # XXX: The work in ir_to_codeinf! is essentially wasted. The only reason
-                # we're doing it is so that code_llvm can return the code
-                # for the `return ...::Const` (which never runs anyway). We should do this
-                # as a post processing step instead.
-                ir_to_codeinf!(opt)
-                caller.src = analyzed
-            end
+            analyzed = optimize(interp, opt, caller)
             caller.valid_worlds = (opt.inlining.et::EdgeTracker).valid_worlds[]
         end
     end
@@ -295,16 +288,19 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     return true
 end
 
-function CodeInstance(
-    result::InferenceResult, @nospecialize(inferred_result), valid_worlds::WorldRange)
+function CodeInstance(interp::AbstractInterpreter, result::InferenceResult,
+                      @nospecialize(inferred_result), valid_worlds::WorldRange)
     local const_flags::Int32
     result_type = result.result
-    @assert !(result_type isa LimitedAccuracy)
-    if inferred_result isa ConstAPI
+    @assert !(result_type === nothing || result_type isa LimitedAccuracy)
+
+    if isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
         # use constant calling convention
-        rettype_const = inferred_result.val
+        rettype_const = result_type.val
         const_flags = 0x3
-        inferred_result = nothing
+        if may_discard_trees(interp)
+            inferred_result = nothing
+        end
     else
         if isa(result_type, Const)
             rettype_const = result_type.val
@@ -329,14 +325,20 @@ function CodeInstance(
             const_flags = 0x00
         end
     end
-    relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] :
-                     inferred_result === nothing ? UInt8(1) : UInt8(0)
-    # relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] : UInt8(0)
+    relocatability = 0x0
+    if isa(inferred_result, String)
+        t = @_gc_preserve_begin inferred_result
+        relocatability = unsafe_load(unsafe_convert(Ptr{UInt8}, inferred_result), Core.sizeof(inferred_result))
+        @_gc_preserve_end t
+    elseif inferred_result === nothing
+        relocatability = 0x1
+    end
+    # relocatability = isa(inferred_result, String) ? inferred_result[end] : UInt8(0)
     return CodeInstance(result.linfo,
         widenconst(result_type), rettype_const, inferred_result,
         const_flags, first(valid_worlds), last(valid_worlds),
         # TODO: Actually do something with non-IPO effects
-	    encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
+        encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
         relocatability)
 end
 
@@ -356,7 +358,7 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
             nslots = length(ci.slotflags)
             resize!(ci.slottypes::Vector{Any}, nslots)
             resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci)
+            return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
         else
             return ci
         end
@@ -379,7 +381,7 @@ function transform_result_for_cache(interp::AbstractInterpreter,
         inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
     end
     # The global cache can only handle objects that codegen understands
-    if !isa(inferred_result, Union{CodeInfo, Vector{UInt8}, ConstAPI})
+    if !isa(inferred_result, MaybeCompressed)
         inferred_result = nothing
     end
     return inferred_result
@@ -403,7 +405,7 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
         inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
-        code_cache(interp)[linfo] = ci = CodeInstance(result, inferred_result, valid_worlds)
+        code_cache(interp)[linfo] = ci = CodeInstance(interp, result, inferred_result, valid_worlds)
         if track_newly_inferred[]
             m = linfo.def
             if isa(m, Method) && m.module != Core
@@ -438,7 +440,7 @@ function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
 end
 
 function adjust_effects(sv::InferenceState)
-    ipo_effects = Effects(sv)
+    ipo_effects = sv.ipo_effects
 
     # refine :consistent-cy effect using the return type information
     # TODO this adjustment tries to compromise imprecise :consistent-cy information,
@@ -449,12 +451,12 @@ function adjust_effects(sv::InferenceState)
         # always throwing an error counts or never returning both count as consistent
         ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
     end
-    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv)) do i::Int
+    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
             return is_mutation_free_argtype(sv.slottypes[i])
         end
         ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
     end
-    if is_consistent_if_notreturned(ipo_effects) && is_consistent_argtype(rt)
+    if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
         # in a case when the :consistent-cy here is only tainted by mutable allocations
         # (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
         # type guarantees that the allocations are never returned
@@ -558,7 +560,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         doopt = (me.cached || me.parent !== nothing)
         recompute_cfg = type_annotate!(interp, me, doopt)
         if doopt && may_optimize(interp)
-            me.result.src = OptimizationState(me, OptimizationParams(interp), interp, recompute_cfg)
+            me.result.src = OptimizationState(me, interp, recompute_cfg)
         else
             me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
         end
@@ -571,23 +573,22 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
 end
 
 # record the backedges
-function store_backedges(frame::InferenceResult, edges::Vector{Any})
-    toplevel = !isa(frame.linfo.def, Method)
-    if !toplevel
-        store_backedges(frame.linfo, edges)
-    end
-    nothing
+function store_backedges(caller::InferenceResult, edges::Vector{Any})
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return store_backedges(caller.linfo, edges)
 end
 
-function store_backedges(frame::MethodInstance, edges::Vector{Any})
-    for (; sig, caller) in BackedgeIterator(edges)
-        if isa(caller, MethodInstance)
-            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), caller, sig, frame)
+function store_backedges(caller::MethodInstance, edges::Vector{Any})
+    for itr in BackedgeIterator(edges)
+        callee = itr.caller
+        if isa(callee, MethodInstance)
+            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
         else
-            typeassert(caller, Core.MethodTable)
-            ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), caller, sig, frame)
+            typeassert(callee, MethodTable)
+            ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
         end
     end
+    return nothing
 end
 
 function record_slot_assign!(sv::InferenceState)
@@ -631,7 +632,7 @@ function record_bestguess!(sv::InferenceState)
     return nothing
 end
 
-function annotate_slot_load!(undefs::Vector{Bool}, idx::Int, sv::InferenceState, @nospecialize x)
+function annotate_slot_load!(interp::AbstractInterpreter, undefs::Vector{Bool}, idx::Int, sv::InferenceState, @nospecialize x)
     if isa(x, SlotNumber)
         id = slot_id(x)
         pc = find_dominating_assignment(id, idx, sv)
@@ -646,7 +647,7 @@ function annotate_slot_load!(undefs::Vector{Bool}, idx::Int, sv::InferenceState,
             @assert typ !== NOT_FOUND "active slot in unreached region"
         end
         # add type annotations where needed
-        if !⊑(typeinf_lattice(sv.interp), sv.slottypes[id], typ)
+        if !⊑(typeinf_lattice(interp), sv.slottypes[id], typ)
             return TypedSlot(id, typ)
         end
         return x
@@ -660,13 +661,13 @@ function annotate_slot_load!(undefs::Vector{Bool}, idx::Int, sv::InferenceState,
             i0 = 2
         end
         for i = i0:length(x.args)
-            x.args[i] = annotate_slot_load!(undefs, idx, sv, x.args[i])
+            x.args[i] = annotate_slot_load!(interp, undefs, idx, sv, x.args[i])
         end
         return x
     elseif isa(x, ReturnNode) && isdefined(x, :val)
-        return ReturnNode(annotate_slot_load!(undefs, idx, sv, x.val))
+        return ReturnNode(annotate_slot_load!(interp, undefs, idx, sv, x.val))
     elseif isa(x, GotoIfNot)
-        return GotoIfNot(annotate_slot_load!(undefs, idx, sv, x.cond), x.dest)
+        return GotoIfNot(annotate_slot_load!(interp, undefs, idx, sv, x.cond), x.dest)
     end
     return x
 end
@@ -739,7 +740,7 @@ function type_annotate!(interp::AbstractInterpreter, sv::InferenceState, run_opt
                     end
                 end
             end
-            body[i] = annotate_slot_load!(undefs, i, sv, expr) # 1&2
+            body[i] = annotate_slot_load!(interp, undefs, i, sv, expr) # 1&2
             ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 4
         else # i.e. any runtime execution will never reach this statement
             any_unreachable = true
@@ -786,62 +787,68 @@ function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState,
     # then add all backedges of parent <- parent.parent
     # and merge all of the callers into ancestor.callers_in_cycle
     # and ensure that walking the parent list will get the same result (DAG) from everywhere
-    # Also taint the termination effect, because we can no longer guarantee the absence
-    # of recursion.
-    merge_effects!(interp, parent, Effects(EFFECTS_TOTAL; terminates=false))
     while true
         add_cycle_backedge!(parent, child, parent.currpc)
         union_caller_cycle!(ancestor, child)
-        merge_effects!(interp, child, Effects(EFFECTS_TOTAL; terminates=false))
         child = parent
         child === ancestor && break
-        parent = child.parent::InferenceState
+        parent = frame_parent(child)
+        while !isa(parent, InferenceState)
+            # XXX we may miss some edges here?
+            parent = frame_parent(parent::IRInterpretationState)
+        end
+        parent = parent::InferenceState
     end
 end
 
-function is_same_frame(interp::AbstractInterpreter, linfo::MethodInstance, frame::InferenceState)
-    return linfo === frame.linfo
+function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
+    return mi === frame_instance(frame)
 end
 
-function poison_callstack(infstate::InferenceState, topmost::InferenceState)
+function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
     push!(infstate.pclimitations, topmost)
     nothing
 end
 
-# Walk through `linfo`'s upstream call chain, starting at `parent`. If a parent
-# frame matching `linfo` is encountered, then there is a cycle in the call graph
-# (i.e. `linfo` is a descendant callee of itself). Upon encountering this cycle,
+# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
+# frame matching `mi` is encountered, then there is a cycle in the call graph
+# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
 # we "resolve" it by merging the call chain, which entails unioning each intermediary
 # frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
-# we return `linfo`'s pre-existing frame. If no cycles are found, `nothing` is
+# we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
 # returned instead.
-function resolve_call_cycle!(interp::AbstractInterpreter, linfo::MethodInstance, parent::InferenceState)
+function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
+    # TODO (#48913) implement a proper recursion handling for irinterp:
+    # This works just because currently the `:terminate` condition guarantees that
+    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+    # We should revisit this once we have a better story for handling cycles in irinterp.
+    isa(parent, InferenceState) || return false
     frame = parent
     uncached = false
     while isa(frame, InferenceState)
-        uncached |= !frame.cached # ensure we never add an uncached frame to a cycle
-        if is_same_frame(interp, linfo, frame)
+        uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
+        if is_same_frame(interp, mi, frame)
             if uncached
                 # our attempt to speculate into a constant call lead to an undesired self-cycle
                 # that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
                 # with the limited flag and abort (set return type to Any) now
-                poison_callstack(parent, frame)
+                poison_callstack!(parent, frame)
                 return true
             end
             merge_call_chain!(interp, parent, frame, frame)
             return frame
         end
-        for caller in frame.callers_in_cycle
-            if is_same_frame(interp, linfo, caller)
+        for caller in callers_in_cycle(frame)
+            if is_same_frame(interp, mi, caller)
                 if uncached
-                    poison_callstack(parent, frame)
+                    poison_callstack!(parent, frame)
                     return true
                 end
                 merge_call_chain!(interp, parent, frame, caller)
                 return caller
             end
         end
-        frame = frame.parent
+        frame = frame_parent(frame)
     end
     return false
 end
@@ -862,7 +869,7 @@ struct EdgeCallResult
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::InferenceState)
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
     mi = specialize_method(method, atype, sparams)::MethodInstance
     code = get(code_cache(interp), mi, nothing)
     if code isa CodeInstance # return existing rettype if the code is already inferred
@@ -901,9 +908,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         add_remark!(interp, caller, "Inference is disabled for the target module")
         return EdgeCallResult(Any, nothing, Effects())
     end
-    if !caller.cached && caller.parent === nothing
+    if !is_cached(caller) && frame_parent(caller) === nothing
         # this caller exists to return to the user
-        # (if we asked resolve_call_cyle, it might instead detect that there is a cycle that it can't merge)
+        # (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
         frame = false
     else
         frame = resolve_call_cycle!(interp, mi, caller)
@@ -911,7 +918,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
     if frame === false
         # completely new
         lock_mi_inference(interp, mi)
-        result = InferenceResult(mi)
+        result = InferenceResult(mi, typeinf_lattice(interp))
         frame = InferenceState(result, cache, interp) # always use the cache for edge targets
         if frame === nothing
             add_remark!(interp, caller, "Failed to retrieve source")
@@ -919,20 +926,20 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
             unlock_mi_inference(interp, mi)
             return EdgeCallResult(Any, nothing, Effects())
         end
-        if caller.cached || caller.parent !== nothing # don't involve uncached functions in cycle resolution
+        if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
             frame.parent = caller
         end
         typeinf(interp, frame)
-        update_valid_age!(frame, caller)
-        edge = frame.inferred ? mi : nothing
-        return EdgeCallResult(frame.bestguess, edge, Effects(frame)) # effects are adjusted already within `finish`
+        update_valid_age!(caller, frame.valid_worlds)
+        edge = is_inferred(frame) ? mi : nothing
+        return EdgeCallResult(frame.bestguess, edge, frame.ipo_effects) # effects are adjusted already within `finish`
     elseif frame === true
         # unresolvable cycle
         return EdgeCallResult(Any, nothing, Effects())
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
-    update_valid_age!(frame, caller)
+    update_valid_age!(caller, frame.valid_worlds)
     return EdgeCallResult(frame.bestguess, nothing, adjust_effects(frame))
 end
 
@@ -942,7 +949,7 @@ end
 function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
     frame = typeinf_frame(interp, method, atype, sparams, run_optimizer)
     frame === nothing && return nothing, Any
-    frame.inferred || return nothing, Any
+    is_inferred(frame) || return nothing, Any
     code = frame.src
     rt = widenconst(ignorelimited(frame.result.result))
     return code, rt
@@ -966,89 +973,85 @@ function typeinf_ircode(
     sparams::SimpleVector,
     optimize_until::Union{Integer,AbstractString,Nothing},
 )
-    ccall(:jl_typeinf_timing_begin, Cvoid, ())
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
     frame = typeinf_frame(interp, method, atype, sparams, false)
     if frame === nothing
-        ccall(:jl_typeinf_timing_end, Cvoid, ())
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
         return nothing, Any
     end
     (; result) = frame
-    opt_params = OptimizationParams(interp)
-    opt = OptimizationState(frame, opt_params, interp)
+    opt = OptimizationState(frame, interp)
     ir = run_passes(opt.src, opt, result, optimize_until)
     rt = widenconst(ignorelimited(result.result))
-    ccall(:jl_typeinf_timing_end, Cvoid, ())
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     return ir, rt
 end
 
 # compute an inferred frame
 function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
     mi = specialize_method(method, atype, sparams)::MethodInstance
-    ccall(:jl_typeinf_timing_begin, Cvoid, ())
-    result = InferenceResult(mi)
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    result = InferenceResult(mi, typeinf_lattice(interp))
     frame = InferenceState(result, run_optimizer ? :global : :no, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, ())
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     return frame
 end
 
 # compute (and cache) an inferred AST and return type
 function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
     method = mi.def::Method
-    for i = 1:2 # test-and-lock-and-test
-        i == 2 && ccall(:jl_typeinf_timing_begin, Cvoid, ())
-        code = get(code_cache(interp), mi, nothing)
-        if code isa CodeInstance
-            # see if this code already exists in the cache
-            inf = @atomic :monotonic code.inferred
-            if use_const_api(code)
-                i == 2 && ccall(:jl_typeinf_timing_end, Cvoid, ())
-                tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-                rettype_const = code.rettype_const
-                tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
-                nargs = Int(method.nargs)
-                tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
-                tree.slotflags = fill(IR_FLAG_NULL, nargs)
-                tree.ssavaluetypes = 1
-                tree.codelocs = Int32[1]
-                tree.linetable = [LineInfoNode(method.module, method.name, method.file, method.line, Int32(0))]
-                tree.inferred = true
-                tree.ssaflags = UInt8[0]
-                tree.pure = true
-                set_inlineable!(tree, true)
-                tree.parent = mi
-                tree.rettype = Core.Typeof(rettype_const)
-                tree.min_world = code.min_world
-                tree.max_world = code.max_world
-                return tree
-            elseif isa(inf, CodeInfo)
-                i == 2 && ccall(:jl_typeinf_timing_end, Cvoid, ())
-                if !(inf.min_world == code.min_world &&
-                     inf.max_world == code.max_world &&
-                     inf.rettype === code.rettype)
-                    inf = copy(inf)
-                    inf.min_world = code.min_world
-                    inf.max_world = code.max_world
-                    inf.rettype = code.rettype
-                end
-                return inf
-            elseif isa(inf, Vector{UInt8})
-                i == 2 && ccall(:jl_typeinf_timing_end, Cvoid, ())
-                inf = _uncompressed_ir(code, inf)
-                return inf
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    code = get(code_cache(interp), mi, nothing)
+    if code isa CodeInstance
+        # see if this code already exists in the cache
+        inf = @atomic :monotonic code.inferred
+        if use_const_api(code)
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+            rettype_const = code.rettype_const
+            tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
+            nargs = Int(method.nargs)
+            tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
+            tree.slotflags = fill(IR_FLAG_NULL, nargs)
+            tree.ssavaluetypes = 1
+            tree.codelocs = Int32[1]
+            tree.linetable = LineInfoNode[LineInfoNode(method.module, mi, method.file, method.line, Int32(0))]
+            tree.ssaflags = UInt8[0]
+            set_inlineable!(tree, true)
+            tree.parent = mi
+            tree.rettype = Core.Typeof(rettype_const)
+            tree.min_world = code.min_world
+            tree.max_world = code.max_world
+            tree.inferred = true
+            return tree
+        elseif isa(inf, CodeInfo)
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            if !(inf.min_world == code.min_world &&
+                    inf.max_world == code.max_world &&
+                    inf.rettype === code.rettype)
+                inf = copy(inf)
+                inf.min_world = code.min_world
+                inf.max_world = code.max_world
+                inf.rettype = code.rettype
             end
+            return inf
+        elseif isa(inf, String)
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            inf = _uncompressed_ir(code, inf)
+            return inf
         end
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        return retrieve_code_info(mi)
+        return retrieve_code_info(mi, get_world_counter(interp))
     end
     lock_mi_inference(interp, mi)
-    result = InferenceResult(mi)
+    result = InferenceResult(mi, typeinf_lattice(interp))
     frame = InferenceState(result, #=cache=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, ())
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     frame.src.inferred || return nothing
     return frame.src
 end
@@ -1059,19 +1062,17 @@ function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize
         return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
     end
     mi = specialize_method(method, atype, sparams)::MethodInstance
-    for i = 1:2 # test-and-lock-and-test
-        i == 2 && ccall(:jl_typeinf_timing_begin, Cvoid, ())
-        code = get(code_cache(interp), mi, nothing)
-        if code isa CodeInstance
-            # see if this rettype already exists in the cache
-            i == 2 && ccall(:jl_typeinf_timing_end, Cvoid, ())
-            return code.rettype
-        end
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    code = get(code_cache(interp), mi, nothing)
+    if code isa CodeInstance
+        # see if this rettype already exists in the cache
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+        return code.rettype
     end
-    result = InferenceResult(mi)
+    result = InferenceResult(mi, typeinf_lattice(interp))
     typeinf(interp, result, :global)
-    ccall(:jl_typeinf_timing_end, Cvoid, ())
-    result.result isa InferenceState && return nothing
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+    is_inferred(result) || return nothing
     return widenconst(ignorelimited(result.result))
 end
 
@@ -1085,15 +1086,15 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
         src = linfo.uninferred::CodeInfo
         if !src.inferred
             # toplevel lambda - infer directly
-            ccall(:jl_typeinf_timing_begin, Cvoid, ())
+            start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
             if !src.inferred
-                result = InferenceResult(linfo)
+                result = InferenceResult(linfo, typeinf_lattice(interp))
                 frame = InferenceState(result, src, #=cache=#:global, interp)
                 typeinf(interp, frame)
-                @assert frame.inferred # TODO: deal with this better
+                @assert is_inferred(frame) # TODO: deal with this better
                 src = frame.src
             end
-            ccall(:jl_typeinf_timing_end, Cvoid, ())
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
         end
     end
     return src
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 33d4d37e9c936..700a6d333cbc4 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -120,6 +120,8 @@ end
 MustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
     MustAlias(slot_id(var), vartyp, fldidx, fldtyp)
 
+_uniontypes(x::MustAlias, ts) = _uniontypes(widenconst(x), ts)
+
 """
     alias::InterMustAlias
 
@@ -734,22 +736,15 @@ widenconst(::LimitedAccuracy) = error("unhandled LimitedAccuracy")
 # state management #
 ####################
 
-issubstate(lattice::AbstractLattice, a::VarState, b::VarState) =
-    ⊑(lattice, a.typ, b.typ) && a.undef <= b.undef
-
 function smerge(lattice::AbstractLattice, sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
     sa === sb && return sa
     sa === NOT_FOUND && return sb
     sb === NOT_FOUND && return sa
-    issubstate(lattice, sa, sb) && return sb
-    issubstate(lattice, sb, sa) && return sa
     return VarState(tmerge(lattice, sa.typ, sb.typ), sa.undef | sb.undef)
 end
 
-@inline tchanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) =
-    o === NOT_FOUND || (n !== NOT_FOUND && !⊑(lattice, n, o))
 @inline schanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) =
-    (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(lattice, n::VarState, o::VarState)))
+    (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !(n.undef <= o.undef && ⊑(lattice, n.typ, o.typ))))
 
 # remove any lattice elements that wrap the reassigned slot object from the vartable
 function invalidate_slotwrapper(vt::VarState, changeid::Int, ignore_conditional::Bool)
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index ed9db007bdbc8..191820951fae1 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -35,6 +35,12 @@ end
 # try to find `type` somewhere in `comparison` type
 # at a minimum nesting depth of `mindepth`
 function is_derived_type(@nospecialize(t), @nospecialize(c), mindepth::Int)
+    if has_free_typevars(t) || has_free_typevars(c)
+        # Don't allow finding types with free typevars. These strongly depend
+        # on identity and we do not make any effort to make sure this returns
+        # sensible results in that case.
+        return false
+    end
     if t === c
         return mindepth <= 1
     end
@@ -87,10 +93,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
         return t # fast path: unparameterized are always simple
     else
         ut = unwrap_unionall(t)
-        if isa(ut, DataType) && isa(c, Type) && c !== Union{} && c <: t
-            # TODO: need to check that the UnionAll bounds on t are limited enough too
-            return t # t is already wider than the comparison in the type lattice
-        elseif is_derived_type_from_any(ut, sources, depth)
+        if is_derived_type_from_any(ut, sources, depth)
             return t # t isn't something new
         end
     end
@@ -135,7 +138,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
                     Q = Any[ tP[i] for i in 1:np ]
                     if ltP > np
                         # combine tp[np:end] into tP[np] using Vararg
-                        Q[np] = tuple_tail_elem(Bottom, Any[ tP[i] for i in np:ltP ])
+                        Q[np] = tuple_tail_elem(fallback_lattice, Bottom, Any[ tP[i] for i in np:ltP ])
                     end
                     for i = 1:np
                         # now apply limit element-wise to Q
@@ -208,9 +211,6 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return false # Bottom is as simple as they come
     elseif isa(t, DataType) && isempty(t.parameters)
         return false # fastpath: unparameterized types are always finite
-    elseif tupledepth > 0 && isa(unwrap_unionall(t), DataType) && isa(c, Type) && c !== Union{} && c <: t
-        # TODO: need to check that the UnionAll bounds on t are limited enough too
-        return false # t is already wider than the comparison in the type lattice
     elseif tupledepth > 0 && is_derived_type_from_any(unwrap_unionall(t), sources, depth)
         return false # t isn't something new
     end
@@ -227,7 +227,7 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
     end
     # rules for various comparison types
     if isa(c, TypeVar)
-        tupledepth = 1 # allow replacing a TypeVar with a concrete value (since we know the UnionAll must be in covariant position)
+        tupledepth = 1
         if isa(t, TypeVar)
             return !(t.lb === Union{} || t.lb === c.lb) || # simplify lb towards Union{}
                    type_more_complex(t.ub, c.ub, sources, depth + 1, tupledepth, 0)
@@ -261,7 +261,8 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
-            length(cP) > length(tP) && !isvarargtype(tP[end]) && depth == 1 && return false
+            isempty(tP) && return false
+            length(cP) > length(tP) && !isvarargtype(tP[end]) && depth == 1 && return false # is this line necessary?
             ntail = length(cP) - length(tP) # assume parameters were dropped from the tuple head
             # allow creating variation within a nested tuple, but only so deep
             if t.name === Tuple.name && tupledepth > 0
@@ -306,6 +307,7 @@ end
 function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
     typea isa MaybeUndef && (typea = typea.typ) # n.b. does not appear in inference
     typeb isa MaybeUndef && (typeb = typeb.typ) # n.b. does not appear in inference
+    @assert !isa(typea, LimitedAccuracy) && !isa(typeb, LimitedAccuracy) "LimitedAccuracy not supported by simplertype lattice" # n.b. the caller was supposed to handle these
     typea === typeb && return true
     if typea isa PartialStruct
         aty = widenconst(typea)
@@ -327,7 +329,7 @@ function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecializ
         end
     elseif typea isa Type
         return issimpleenoughtype(typea)
-    # elseif typea isa Const # fall-through good
+    # elseif typea isa Const # fall-through to true is good
     elseif typea isa Conditional # follow issubconditional query
         typeb isa Const && return true
         typeb isa Conditional || return false
@@ -352,6 +354,13 @@ function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecializ
         issimplertype(𝕃, typea.fldtyp, typeb.fldtyp) || return false
     elseif typea isa PartialOpaque
         # TODO
+        typeb isa PartialOpaque || return false
+        aty = widenconst(typea)
+        bty = widenconst(typeb)
+        if typea.source === typeb.source && typea.parent === typeb.parent && aty == bty && typea.env == typeb.env
+            return false
+        end
+        return false
     end
     return true
 end
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 4d5a77f4ee70d..4a4f27c9c27c2 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -32,21 +32,44 @@ struct StmtInfo
     used::Bool
 end
 
+struct MethodInfo
+    propagate_inbounds::Bool
+    method_for_inference_limit_heuristics::Union{Nothing,Method}
+end
+MethodInfo(src::CodeInfo) = MethodInfo(
+    src.propagate_inbounds,
+    src.method_for_inference_limit_heuristics::Union{Nothing,Method})
+
+"""
+    v::VarState
+
+A special wrapper that represents a local variable of a method being analyzed.
+This does not participate in the native type system nor the inference lattice, and it thus
+should be always unwrapped to `v.typ` when performing any type or lattice operations on it.
+`v.undef` represents undefined-ness of this static parameter. If `true`, it means that the
+variable _may_ be undefined at runtime, otherwise it is guaranteed to be defined.
+If `v.typ === Bottom` it means that the variable is strictly undefined.
+"""
+struct VarState
+    typ
+    undef::Bool
+    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
+end
+
 abstract type ForwardableArgtypes end
 
 """
-    InferenceResult(linfo::MethodInstance)
-    InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes)
+    InferenceResult(linfo::MethodInstance, [argtypes::ForwardableArgtypes, 𝕃::AbstractLattice])
 
 A type that represents the result of running type inference on a chunk of code.
 
 See also [`matching_cache_argtypes`](@ref).
 """
 mutable struct InferenceResult
-    linfo::MethodInstance
-    argtypes::Vector{Any}
-    overridden_by_const::BitVector
-    result                   # ::Type, or InferenceState if WIP
+    const linfo::MethodInstance
+    const argtypes::Vector{Any}
+    const overridden_by_const::BitVector
+    result                   # extended lattice element if inferred, nothing otherwise
     src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
     valid_worlds::WorldRange # if inference and optimization is finished
     ipo_effects::Effects     # if inference is finished
@@ -54,16 +77,17 @@ mutable struct InferenceResult
     argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
     must_be_codeinf::Bool    # if this must come out as CodeInfo or leaving it as IRCode is ok
     function InferenceResult(linfo::MethodInstance, cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
-        return new(linfo, cache_argtypes, overridden_by_const, Any, nothing,
+        # def = linfo.def
+        # nargs = def isa Method ? Int(def.nargs) : 0
+        # @assert length(cache_argtypes) == nargs
+        return new(linfo, cache_argtypes, overridden_by_const, nothing, nothing,
             WorldRange(), Effects(), Effects(), nothing, true)
     end
 end
-function InferenceResult(linfo::MethodInstance; lattice::AbstractLattice=fallback_lattice)
-    return InferenceResult(linfo, matching_cache_argtypes(lattice, linfo)...)
-end
-function InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes; lattice::AbstractLattice=fallback_lattice)
-    return InferenceResult(linfo, matching_cache_argtypes(lattice, linfo, argtypes)...)
-end
+InferenceResult(linfo::MethodInstance, 𝕃::AbstractLattice=fallback_lattice) =
+    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo)...)
+InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes, 𝕃::AbstractLattice=fallback_lattice) =
+    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo, argtypes)...)
 
 """
     inf_params::InferenceParams
@@ -128,6 +152,7 @@ struct InferenceParams
     aggressive_constant_propagation::Bool
     unoptimize_throw_blocks::Bool
     assume_bindings_static::Bool
+    ignore_recursion_hardlimit::Bool
 
     function InferenceParams(
         max_methods::Int,
@@ -138,7 +163,8 @@ struct InferenceParams
         ipo_constant_propagation::Bool,
         aggressive_constant_propagation::Bool,
         unoptimize_throw_blocks::Bool,
-        assume_bindings_static::Bool)
+        assume_bindings_static::Bool,
+        ignore_recursion_hardlimit::Bool)
         return new(
             max_methods,
             max_union_splitting,
@@ -148,7 +174,8 @@ struct InferenceParams
             ipo_constant_propagation,
             aggressive_constant_propagation,
             unoptimize_throw_blocks,
-            assume_bindings_static)
+            assume_bindings_static,
+            ignore_recursion_hardlimit)
     end
 end
 function InferenceParams(
@@ -161,7 +188,8 @@ function InferenceParams(
         #=ipo_constant_propagation::Bool=# true,
         #=aggressive_constant_propagation::Bool=# false,
         #=unoptimize_throw_blocks::Bool=# true,
-        #=assume_bindings_static::Bool=# false);
+        #=assume_bindings_static::Bool=# false,
+        #=ignore_recursion_hardlimit::Bool=# false);
     max_methods::Int = params.max_methods,
     max_union_splitting::Int = params.max_union_splitting,
     max_apply_union_enum::Int = params.max_apply_union_enum,
@@ -170,7 +198,8 @@ function InferenceParams(
     ipo_constant_propagation::Bool = params.ipo_constant_propagation,
     aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
     unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
-    assume_bindings_static::Bool = params.assume_bindings_static)
+    assume_bindings_static::Bool = params.assume_bindings_static,
+    ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
     return InferenceParams(
         max_methods,
         max_union_splitting,
@@ -180,7 +209,8 @@ function InferenceParams(
         ipo_constant_propagation,
         aggressive_constant_propagation,
         unoptimize_throw_blocks,
-        assume_bindings_static)
+        assume_bindings_static,
+        ignore_recursion_hardlimit)
 end
 
 """
@@ -293,51 +323,64 @@ function OptimizationParams(
 end
 
 """
-    NativeInterpreter
+    NativeInterpreter <: AbstractInterpreter
 
 This represents Julia's native type inference algorithm and the Julia-LLVM codegen backend.
-It contains many parameters used by the compilation pipeline.
 """
 struct NativeInterpreter <: AbstractInterpreter
-    # Cache of inference results for this particular interpreter
-    cache::Vector{InferenceResult}
     # The world age we're working inside of
     world::UInt
+
     # method table to lookup for during inference on this world age
     method_table::CachedMethodTable{InternalMethodTable}
 
+    # Cache of inference results for this particular interpreter
+    inf_cache::Vector{InferenceResult}
+
     # Parameters for inference and optimization
     inf_params::InferenceParams
     opt_params::OptimizationParams
 
-    function NativeInterpreter(world::UInt = get_world_counter();
-                               inf_params = InferenceParams(),
-                               opt_params = OptimizationParams(),
-                               )
-        cache = Vector{InferenceResult}() # Initially empty cache
+    # a boolean flag to indicate if this interpreter is performing semi concrete interpretation
+    irinterp::Bool
+end
+
+function NativeInterpreter(world::UInt = get_world_counter();
+                           inf_params::InferenceParams = InferenceParams(),
+                           opt_params::OptimizationParams = OptimizationParams())
+    # Sometimes the caller is lazy and passes typemax(UInt).
+    # we cap it to the current world age for correctness
+    if world == typemax(UInt)
+        world = get_world_counter()
+    end
 
-        # Sometimes the caller is lazy and passes typemax(UInt).
-        # we cap it to the current world age
-        if world == typemax(UInt)
-            world = get_world_counter()
-        end
+    # If they didn't pass typemax(UInt) but passed something more subtly
+    # incorrect, fail out loudly.
+    @assert world <= get_world_counter()
 
-        method_table = CachedMethodTable(InternalMethodTable(world))
+    method_table = CachedMethodTable(InternalMethodTable(world))
 
-        # If they didn't pass typemax(UInt) but passed something more subtly
-        # incorrect, fail out loudly.
-        @assert world <= get_world_counter()
+    inf_cache = Vector{InferenceResult}() # Initially empty cache
 
-        return new(cache, world, method_table, inf_params, opt_params)
-    end
+    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params, #=irinterp=#false)
+end
+
+function NativeInterpreter(interp::NativeInterpreter;
+                           world::UInt = interp.world,
+                           method_table::CachedMethodTable{InternalMethodTable} = interp.method_table,
+                           inf_cache::Vector{InferenceResult} = interp.inf_cache,
+                           inf_params::InferenceParams = interp.inf_params,
+                           opt_params::OptimizationParams = interp.opt_params,
+                           irinterp::Bool = interp.irinterp)
+    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params, irinterp)
 end
 
 # Quickly and easily satisfy the AbstractInterpreter API contract
-InferenceParams(ni::NativeInterpreter) = ni.inf_params
-OptimizationParams(ni::NativeInterpreter) = ni.opt_params
-get_world_counter(ni::NativeInterpreter) = ni.world
-get_inference_cache(ni::NativeInterpreter) = ni.cache
-code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(ni))
+InferenceParams(interp::NativeInterpreter) = interp.inf_params
+OptimizationParams(interp::NativeInterpreter) = interp.opt_params
+get_world_counter(interp::NativeInterpreter) = interp.world
+get_inference_cache(interp::NativeInterpreter) = interp.inf_cache
+code_cache(interp::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(interp))
 
 """
     already_inferred_quick_test(::AbstractInterpreter, ::MethodInstance)
@@ -423,6 +466,34 @@ typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.i
 ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
 optimizer_lattice(::AbstractInterpreter) = OptimizerLattice(SimpleInferenceLattice.instance)
 
+typeinf_lattice(interp::NativeInterpreter) = interp.irinterp ?
+    OptimizerLattice(InferenceLattice(SimpleInferenceLattice.instance)) :
+    InferenceLattice(BaseInferenceLattice.instance)
+ipo_lattice(interp::NativeInterpreter) = interp.irinterp ?
+    InferenceLattice(SimpleInferenceLattice.instance) :
+    InferenceLattice(IPOResultLattice.instance)
+optimizer_lattice(interp::NativeInterpreter) = OptimizerLattice(SimpleInferenceLattice.instance)
+
+"""
+    switch_to_irinterp(interp::AbstractInterpreter) -> irinterp::AbstractInterpreter
+
+This interface allows `ir_abstract_constant_propagation` to convert `interp` to a new
+`irinterp::AbstractInterpreter` to perform semi-concrete interpretation.
+`NativeInterpreter` uses this interface to switch its lattice to `optimizer_lattice` during
+semi-concrete interpretation on `IRCode`.
+"""
+switch_to_irinterp(interp::AbstractInterpreter) = interp
+switch_to_irinterp(interp::NativeInterpreter) = NativeInterpreter(interp; irinterp=true)
+
+"""
+    switch_from_irinterp(irinterp::AbstractInterpreter) -> interp::AbstractInterpreter
+
+The inverse operation of `switch_to_irinterp`, allowing `typeinf` to convert `irinterp` back
+to a new `interp::AbstractInterpreter` to perform ordinary abstract interpretation.
+"""
+switch_from_irinterp(irinterp::AbstractInterpreter) = irinterp
+switch_from_irinterp(irinterp::NativeInterpreter) = NativeInterpreter(irinterp; irinterp=false)
+
 abstract type CallInfo end
 
 @nospecialize
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index 8f256ea88b78f..cff10b02ceafc 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -165,7 +165,7 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I
         if ub isa DataType
             if a.name === ub.name === Tuple.name &&
                     length(a.parameters) == length(ub.parameters)
-                if 1 < unionsplitcost(a.parameters) <= max_union_splitting
+                if 1 < unionsplitcost(JLTypeLattice(), a.parameters) <= max_union_splitting
                     ta = switchtupleunion(a)
                     return typesubtract(Union{ta...}, b, 0)
                 elseif b isa DataType
@@ -187,6 +187,7 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I
                             bp = b.parameters[i]
                             (isvarargtype(ap) || isvarargtype(bp)) && return a
                             ta[i] = typesubtract(ap, bp, min(2, max_union_splitting))
+                            ta[i] === Union{} && return Union{}
                             return Tuple{ta...}
                         end
                     end
@@ -212,11 +213,11 @@ end
 _typename(union::UnionAll) = _typename(union.body)
 _typename(a::DataType) = Const(a.name)
 
-function tuple_tail_elem(@nospecialize(init), ct::Vector{Any})
+function tuple_tail_elem(𝕃::AbstractLattice, @nospecialize(init), ct::Vector{Any})
     t = init
     for x in ct
         # FIXME: this is broken: it violates subtyping relations and creates invalid types with free typevars
-        t = tmerge(t, unwraptv(unwrapva(x)))
+        t = tmerge(𝕃, t, unwraptv(unwrapva(x)))
     end
     return Vararg{widenconst(t)}
 end
@@ -227,12 +228,11 @@ end
 # or outside of the Tuple/Union nesting, though somewhat more expensive to be
 # outside than inside because the representation is larger (because and it
 # informs the callee whether any splitting is possible).
-function unionsplitcost(argtypes::Union{SimpleVector,Vector{Any}})
+function unionsplitcost(𝕃::AbstractLattice, argtypes::Union{SimpleVector,Vector{Any}})
     nu = 1
     max = 2
     for ti in argtypes
-        # TODO remove this to implement callsite refinement of MustAlias
-        if isa(ti, MustAlias) && isa(widenconst(ti), Union)
+        if has_extended_unionsplit(𝕃) && !isvarargtype(ti)
             ti = widenconst(ti)
         end
         if isa(ti, Union)
@@ -252,12 +252,12 @@ end
 # and `Union{return...} == ty`
 function switchtupleunion(@nospecialize(ty))
     tparams = (unwrap_unionall(ty)::DataType).parameters
-    return _switchtupleunion(Any[tparams...], length(tparams), [], ty)
+    return _switchtupleunion(JLTypeLattice(), Any[tparams...], length(tparams), [], ty)
 end
 
-switchtupleunion(argtypes::Vector{Any}) = _switchtupleunion(argtypes, length(argtypes), [], nothing)
+switchtupleunion(𝕃::AbstractLattice, argtypes::Vector{Any}) = _switchtupleunion(𝕃, argtypes, length(argtypes), [], nothing)
 
-function _switchtupleunion(t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospecialize(origt))
+function _switchtupleunion(𝕃::AbstractLattice, t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospecialize(origt))
     if i == 0
         if origt === nothing
             push!(tunion, copy(t))
@@ -268,17 +268,20 @@ function _switchtupleunion(t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospeci
     else
         origti = ti = t[i]
         # TODO remove this to implement callsite refinement of MustAlias
-        if isa(ti, MustAlias) && isa(widenconst(ti), Union)
-            ti = widenconst(ti)
-        end
         if isa(ti, Union)
-            for ty in uniontypes(ti::Union)
+            for ty in uniontypes(ti)
+                t[i] = ty
+                _switchtupleunion(𝕃, t, i - 1, tunion, origt)
+            end
+            t[i] = origti
+        elseif has_extended_unionsplit(𝕃) && !isa(ti, Const) && !isvarargtype(ti) && isa(widenconst(ti), Union)
+            for ty in uniontypes(ti)
                 t[i] = ty
-                _switchtupleunion(t, i - 1, tunion, origt)
+                _switchtupleunion(𝕃, t, i - 1, tunion, origt)
             end
             t[i] = origti
         else
-            _switchtupleunion(t, i - 1, tunion, origt)
+            _switchtupleunion(𝕃, t, i - 1, tunion, origt)
         end
     end
     return tunion
@@ -306,6 +309,15 @@ function _unioncomplexity(@nospecialize x)
     end
 end
 
+function unionall_depth(@nospecialize ua) # aka subtype_env_size
+    depth = 0
+    while ua isa UnionAll
+        depth += 1
+        ua = ua.body
+    end
+    return depth
+end
+
 # convert a Union of Tuple types to a Tuple of Unions
 function unswitchtupleunion(u::Union)
     ts = uniontypes(u)
@@ -324,20 +336,43 @@ function unswitchtupleunion(u::Union)
     Tuple{Any[ Union{Any[(t::DataType).parameters[i] for t in ts]...} for i in 1:n ]...}
 end
 
-function unwraptv(@nospecialize t)
+function unwraptv_ub(@nospecialize t)
     while isa(t, TypeVar)
         t = t.ub
     end
     return t
 end
+function unwraptv_lb(@nospecialize t)
+    while isa(t, TypeVar)
+        t = t.lb
+    end
+    return t
+end
+const unwraptv = unwraptv_ub
+
+"""
+    is_identity_free_argtype(argtype) -> Bool
 
-# this query is specially written for `adjust_effects` and returns true if a value of this type
-# never involves inconsistency of mutable objects that are allocated somewhere within a call graph
-is_consistent_argtype(@nospecialize ty) =
-    is_consistent_type(widenconst(ignorelimited(ty)))
-is_consistent_type(@nospecialize ty) = isidentityfree(ty)
+Return `true` if the `argtype` object is identity free in the sense that this type or any
+reachable through its fields has non-content-based identity (see `Base.isidentityfree`).
+This query is specifically designed for `adjust_effects`, enabling it to refine the
+`:consistent` effect property tainted by mutable allocation(s) within the analyzed call
+graph when the return value type is `is_identity_free_argtype`, ensuring that the allocated
+mutable objects are never returned.
+"""
+is_identity_free_argtype(@nospecialize ty) = is_identity_free_type(widenconst(ignorelimited(ty)))
+is_identity_free_type(@nospecialize ty) = isidentityfree(ty)
 
-is_immutable_argtype(@nospecialize ty) = is_immutable_type(widenconst(ignorelimited(ty)))
+"""
+    is_immutable_argtype(argtype) -> Bool
+
+Return `true` if the `argtype` object is known to be immutable.
+This query is specifically designed for `getfield_effects` and `isdefined_effects`, allowing
+them to prove `:consistent`-cy of `getfield` / `isdefined` calls when applied to immutable
+objects. Otherwise, we need to additionally prove that the non-immutable object is not a
+global object to prove the `:consistent`-cy.
+"""
+is_immutable_argtype(@nospecialize argtype) = is_immutable_type(widenconst(ignorelimited(argtype)))
 is_immutable_type(@nospecialize ty) = _is_immutable_type(unwrap_unionall(ty))
 function _is_immutable_type(@nospecialize ty)
     if isa(ty, Union)
@@ -346,6 +381,16 @@ function _is_immutable_type(@nospecialize ty)
     return !isabstracttype(ty) && !ismutabletype(ty)
 end
 
-is_mutation_free_argtype(@nospecialize argtype) =
+"""
+    is_mutation_free_argtype(argtype) -> Bool
+
+Return `true` if `argtype` object is mutation free in the sense that no mutable memory
+is reachable from this type (either in the type itself) or through any fields
+(see `Base.ismutationfree`).
+This query is specifically written for analyzing the `:inaccessiblememonly` effect property
+and is supposed to improve the analysis accuracy by not tainting the `:inaccessiblememonly`
+property when there is access to mutation-free global object.
+"""
+is_mutation_free_argtype(@nospecialize(argtype)) =
     is_mutation_free_type(widenconst(ignorelimited(argtype)))
 is_mutation_free_type(@nospecialize ty) = ismutationfree(ty)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 0c263931d8fd2..836c370b98bd4 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -84,7 +84,7 @@ end
 const MAX_INLINE_CONST_SIZE = 256
 
 function count_const_size(@nospecialize(x), count_self::Bool = true)
-    (x isa Type || x isa Symbol) && return 0
+    (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
     ismutable(x) && return MAX_INLINE_CONST_SIZE + 1
     isbits(x) && return Core.sizeof(x)
     dt = typeof(x)
@@ -114,30 +114,30 @@ end
 invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
 use_const_api(li::CodeInstance) = invoke_api(li) == 2
 
-function get_staged(mi::MethodInstance)
+function get_staged(mi::MethodInstance, world::UInt)
     may_invoke_generator(mi) || return nothing
     try
         # user code might throw errors – ignore them
-        ci = ccall(:jl_code_for_staged, Any, (Any,), mi)::CodeInfo
+        ci = ccall(:jl_code_for_staged, Any, (Any, UInt), mi, world)::CodeInfo
         return ci
     catch
         return nothing
     end
 end
 
-function retrieve_code_info(linfo::MethodInstance)
+function retrieve_code_info(linfo::MethodInstance, world::UInt)
     m = linfo.def::Method
     c = nothing
     if isdefined(m, :generator)
         # user code might throw errors – ignore them
-        c = get_staged(linfo)
+        c = get_staged(linfo, world)
     end
     if c === nothing && isdefined(m, :source)
         src = m.source
         if src === nothing
             # can happen in images built with --strip-ir
             return nothing
-        elseif isa(src, Array{UInt8,1})
+        elseif isa(src, String)
             c = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, src)
         else
             c = copy(src::CodeInfo)
@@ -199,20 +199,10 @@ function normalize_typevars(method::Method, @nospecialize(atype), sparams::Simpl
 end
 
 # get a handle to the unique specialization object representing a particular instantiation of a call
-function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false, compilesig::Bool=false)
+function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false)
     if isa(atype, UnionAll)
         atype, sparams = normalize_typevars(method, atype, sparams)
     end
-    if compilesig
-        new_atype = get_compileable_sig(method, atype, sparams)
-        new_atype === nothing && return nothing
-        if atype !== new_atype
-            sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), new_atype, method.sig)::SimpleVector
-            if sparams === sp_[2]::SimpleVector
-                atype = new_atype
-            end
-        end
-    end
     if preexisting
         # check cached specializations
         # for an existing result stored there
@@ -271,8 +261,8 @@ Return an iterator over a list of backedges. Iteration returns `(sig, caller)` e
 which will be one of the following:
 
 - `BackedgePair(nothing, caller::MethodInstance)`: a call made by ordinary inferable dispatch
-- `BackedgePair(invokesig, caller::MethodInstance)`: a call made by `invoke(f, invokesig, args...)`
-- `BackedgePair(specsig, mt::MethodTable)`: an abstract call
+- `BackedgePair(invokesig::Type, caller::MethodInstance)`: a call made by `invoke(f, invokesig, args...)`
+- `BackedgePair(specsig::Type, mt::MethodTable)`: an abstract call
 
 # Examples
 
@@ -286,7 +276,7 @@ callyou (generic function with 1 method)
 julia> callyou(2.0)
 3.0
 
-julia> mi = first(which(callme, (Any,)).specializations)
+julia> mi = which(callme, (Any,)).specializations
 MethodInstance for callme(::Float64)
 
 julia> @eval Core.Compiler for (; sig, caller) in BackedgeIterator(Main.mi.backedges)
@@ -305,17 +295,17 @@ const empty_backedge_iter = BackedgeIterator(Any[])
 
 struct BackedgePair
     sig # ::Union{Nothing,Type}
-    caller::Union{MethodInstance,Core.MethodTable}
-    BackedgePair(@nospecialize(sig), caller::Union{MethodInstance,Core.MethodTable}) = new(sig, caller)
+    caller::Union{MethodInstance,MethodTable}
+    BackedgePair(@nospecialize(sig), caller::Union{MethodInstance,MethodTable}) = new(sig, caller)
 end
 
 function iterate(iter::BackedgeIterator, i::Int=1)
     backedges = iter.backedges
     i > length(backedges) && return nothing
     item = backedges[i]
-    isa(item, MethodInstance) && return BackedgePair(nothing, item), i+1           # regular dispatch
-    isa(item, Core.MethodTable) && return BackedgePair(backedges[i+1], item), i+2  # abstract dispatch
-    return BackedgePair(item, backedges[i+1]::MethodInstance), i+2                 # `invoke` calls
+    isa(item, MethodInstance) && return BackedgePair(nothing, item), i+1      # regular dispatch
+    isa(item, MethodTable) && return BackedgePair(backedges[i+1], item), i+2  # abstract dispatch
+    return BackedgePair(item, backedges[i+1]::MethodInstance), i+2            # `invoke` calls
 end
 
 #########
@@ -482,8 +472,11 @@ function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
 end
 
 # using a function to ensure we can infer this
-@inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id :
-    isa(s, Argument) ? (s::Argument).n : (s::TypedSlot).id
+@inline function slot_id(s)
+    isa(s, SlotNumber) && return s.id
+    isa(s, Argument) && return s.n
+    return (s::TypedSlot).id
+end
 
 ###########
 # options #
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index 0931686184a2e..68eb2ab15c59d 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -103,11 +103,11 @@ function _validate_val!(@nospecialize(x), errors, ssavals::BitSet)
 end
 
 """
-    validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo)
+    validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo)
 
 Validate `c`, logging any violation by pushing an `InvalidCodeError` into `errors`.
 """
-function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_level::Bool = false)
+function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_level::Bool = false)
     ssavals = BitSet()
     lhs_slotnums = BitSet()
 
@@ -199,16 +199,15 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
 end
 
 """
-    validate_code!(errors::Vector{>:InvalidCodeError}, mi::MethodInstance,
-                   c::Union{Nothing,CodeInfo} = Core.Compiler.retrieve_code_info(mi))
+    validate_code!(errors::Vector{InvalidCodeError}, mi::MethodInstance,
+                   c::Union{Nothing,CodeInfo})
 
 Validate `mi`, logging any violation by pushing an `InvalidCodeError` into `errors`.
 
 If `isa(c, CodeInfo)`, also call `validate_code!(errors, c)`. It is assumed that `c` is
-the `CodeInfo` instance associated with `mi`.
+a `CodeInfo` instance associated with `mi`.
 """
-function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInstance,
-                        c::Union{Nothing,CodeInfo} = Core.Compiler.retrieve_code_info(mi))
+function validate_code!(errors::Vector{InvalidCodeError}, mi::Core.MethodInstance, c::Union{Nothing,CodeInfo})
     is_top_level = mi.def isa Module
     if is_top_level
         mnargs = 0
@@ -231,13 +230,13 @@ end
 
 validate_code(args...) = validate_code!(Vector{InvalidCodeError}(), args...)
 
-is_valid_lvalue(@nospecialize(x)) = isa(x, Slot) || isa(x, GlobalRef)
+is_valid_lvalue(@nospecialize(x)) = isa(x, UnoptSlot) || isa(x, GlobalRef)
 
 function is_valid_argument(@nospecialize(x))
-    if isa(x, Slot) || isa(x, Argument) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
-        (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck))) ||
-        isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
-        isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
+    if isa(x, UnoptSlot) || isa(x, Argument) || isa(x, SSAValue) ||
+       isa(x, GlobalRef) || isa(x, QuoteNode) || isexpr(x, (:static_parameter, :boundscheck)) ||
+       isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
+       isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
         return true
     end
     # TODO: consider being stricter about what needs to be wrapped with QuoteNode
diff --git a/base/complex.jl b/base/complex.jl
index a32ccaa5219a6..a0473c90d5c17 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -120,6 +120,7 @@ Float64
 real(T::Type) = typeof(real(zero(T)))
 real(::Type{T}) where {T<:Real} = T
 real(C::Type{<:Complex}) = fieldtype(C, 1)
+real(::Type{Union{}}, slurp...) = Union{}(im)
 
 """
     isreal(x) -> Bool
@@ -245,6 +246,8 @@ bswap(z::Complex) = Complex(bswap(real(z)), bswap(imag(z)))
 ==(x::Real, z::Complex) = isreal(z) && real(z) == x
 
 isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w))
+isequal(z::Complex, w::Real) = isequal(real(z),w) & isequal(imag(z),zero(w))
+isequal(z::Real, w::Complex) = isequal(z,real(w)) & isequal(zero(z),imag(w))
 
 in(x::Complex, r::AbstractRange{<:Real}) = isreal(x) && real(x) in r
 
diff --git a/base/condition.jl b/base/condition.jl
index ca39b6ea148a4..20481c98ee805 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -87,7 +87,7 @@ function _wait2(c::GenericCondition, waiter::Task, first::Bool=false)
         push!(c.waitq, waiter)
     end
     # since _wait2 is similar to schedule, we should observe the sticky bit now
-    if waiter.sticky && Threads.threadid(waiter) == 0
+    if waiter.sticky && Threads.threadid(waiter) == 0 && !GC.in_finalizer()
         # Issue #41324
         # t.sticky && tid == 0 is a task that needs to be co-scheduled with
         # the parent task. If the parent (current_task) is not sticky we must
@@ -129,7 +129,7 @@ function wait(c::GenericCondition; first::Bool=false)
     try
         return wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         rethrow()
     finally
         relockall(c.lock, token)
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 79ae852ff22b1..1b661716cc2d9 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -48,7 +48,7 @@ arguments of type `Any`.
 
 To restrict deprecation to a specific signature, annotate the
 arguments of `old`. For example,
-```jldoctest; filter = r"@ .*"
+```jldoctest; filter = r"@ .*"a
 julia> new(x::Int) = x;
 
 julia> new(x::Float64) = 2x;
@@ -362,3 +362,25 @@ end
 end
 
 # END 1.9 deprecations
+
+# BEGIN 1.10 deprecations
+
+"""
+    @pure ex
+
+`@pure` gives the compiler a hint for the definition of a pure function,
+helping for type inference.
+
+!!! warning
+    This macro is intended for internal compiler use and may be subject to changes.
+
+!!! warning
+    In Julia 1.8 and higher, it is favorable to use [`@assume_effects`](@ref) instead of `@pure`.
+    This is because `@assume_effects` allows a finer grained control over Julia's purity
+    modeling and the effect system enables a wider range of optimizations.
+"""
+macro pure(ex)
+    return esc(:(Base.@assume_effects :foldable $ex))
+end
+
+# END 1.10 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 57598522224dd..8a78c1fa8da45 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -147,8 +147,7 @@ end
 empty(a::AbstractDict, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}()
 
 # Gets 7 most significant bits from the hash (hsh), first bit is 1
-_shorthash7(hsh::UInt32) = (hsh >> UInt(25))%UInt8 | 0x80
-_shorthash7(hsh::UInt64) = (hsh >> UInt(57))%UInt8 | 0x80
+_shorthash7(hsh::UInt) = (hsh >> (8sizeof(UInt)-7))%UInt8 | 0x80
 
 # hashindex (key, sz) - computes optimal position and shorthash7
 #     idx - optimal position in the hash table
@@ -258,11 +257,12 @@ function empty!(h::Dict{K,V}) where V where K
 end
 
 # get the index where a key is stored, or -1 if not present
-function ht_keyindex(h::Dict{K,V}, key) where V where K
+@assume_effects :terminates_locally function ht_keyindex(h::Dict{K,V}, key) where V where K
     isempty(h) && return -1
     sz = length(h.keys)
     iter = 0
     maxprobe = h.maxprobe
+    maxprobe < sz || throw(AssertionError()) # This error will never trigger, but is needed for terminates_locally to be valid
     index, sh = hashindex(key, sz)
     keys = h.keys
 
@@ -351,23 +351,27 @@ ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]
 
     sz = length(h.keys)
     # Rehash now if necessary
-    if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
-        # > 3/4 deleted or > 2/3 full
+    if (h.count + h.ndel)*3 > sz*2
+        # > 2/3 full (including tombstones)
         rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
     end
     nothing
 end
 
 function setindex!(h::Dict{K,V}, v0, key0) where V where K
-    key = convert(K, key0)
-    if !(isequal(key, key0)::Bool)
-        throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+    if key0 isa K
+        key = key0
+    else
+        key = convert(K, key0)::K
+        if !(isequal(key, key0)::Bool)
+            throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+        end
     end
     setindex!(h, v0, key)
 end
 
 function setindex!(h::Dict{K,V}, v0, key::K) where V where K
-    v = convert(V, v0)
+    v = v0 isa V ? v0 : convert(V, v0)::V
     index, sh = ht_keyindex2_shorthash!(h, key)
 
     if index > 0
@@ -424,7 +428,7 @@ Dict{String, Int64} with 4 entries:
 get!(collection, key, default)
 
 """
-    get!(f::Function, collection, key)
+    get!(f::Union{Function, Type}, collection, key)
 
 Return the value stored for the given key, or if no mapping for the key is present, store
 `key => f()`, and return `f()`.
@@ -450,12 +454,16 @@ Dict{Int64, Int64} with 1 entry:
   2 => 4
 ```
 """
-get!(f::Function, collection, key)
+get!(f::Callable, collection, key)
 
 function get!(default::Callable, h::Dict{K,V}, key0) where V where K
-    key = convert(K, key0)
-    if !isequal(key, key0)
-        throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+    if key0 isa K
+        key = key0
+    else
+        key = convert(K, key0)::K
+        if !isequal(key, key0)
+            throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+        end
     end
     return get!(default, h, key)
 end
@@ -466,7 +474,10 @@ function get!(default::Callable, h::Dict{K,V}, key::K) where V where K
     index > 0 && return h.vals[index]
 
     age0 = h.age
-    v = convert(V, default())
+    v = default()
+    if !isa(v, V)
+        v = convert(V, v)::V
+    end
     if h.age != age0
         index, sh = ht_keyindex2_shorthash!(h, key)
     end
@@ -513,7 +524,7 @@ function get(h::Dict{K,V}, key, default) where V where K
 end
 
 """
-    get(f::Function, collection, key)
+    get(f::Union{Function, Type}, collection, key)
 
 Return the value stored for the given key, or if no mapping for the key is present, return
 `f()`.  Use [`get!`](@ref) to also store the default value in the dictionary.
@@ -527,7 +538,7 @@ get(dict, key) do
 end
 ```
 """
-get(::Function, collection, key)
+get(::Callable, collection, key)
 
 function get(default::Callable, h::Dict{K,V}, key) where V where K
     index = ht_keyindex(h, key)
@@ -756,10 +767,17 @@ function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
         if i > 0
             d1.vals[i] = combine(d1.vals[i], v)
         else
-            if !isequal(k, convert(K, k))
-                throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+            if !(k isa K)
+                k1 = convert(K, k)::K
+                if !isequal(k, k1)
+                    throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+                end
+                k = k1
+            end
+            if !isa(v, V)
+                v = convert(V, v)::V
             end
-            @inbounds _setindex!(d1, convert(V, v), k, -i, sh)
+            @inbounds _setindex!(d1, v, k, -i, sh)
         end
     end
     return d1
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index 61b5786298475..e0d21715c2147 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -517,11 +517,12 @@ function docm(source::LineNumberNode, mod::Module, ex)
     @nospecialize ex
     if isexpr(ex, :->) && length(ex.args) > 1
         return docm(source, mod, ex.args...)
-    else
+    elseif isassigned(Base.REPL_MODULE_REF)
         # TODO: this is a shim to continue to allow `@doc` for looking up docstrings
         REPL = Base.REPL_MODULE_REF[]
         return REPL.lookup_doc(ex)
     end
+    return nothing
 end
 # Drop incorrect line numbers produced by nested macro calls.
 docm(source::LineNumberNode, mod::Module, _, _, x...) = docm(source, mod, x...)
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 956dc9987e2d8..684ed8b48f734 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -59,6 +59,27 @@ See the [manual section about modules](@ref modules) for details.
 """
 kw"export"
 
+"""
+    as
+
+`as` is used as a keyword to rename an identifier brought into scope by
+`import` or `using`, for the purpose of working around name conflicts as
+well as for shortening names.  (Outside of `import` or `using` statements,
+`as` is not a keyword and can be used as an ordinary identifier.)
+
+`import LinearAlgebra as LA` brings the imported `LinearAlgebra` standard library
+into scope as `LA`.
+
+`import LinearAlgebra: eigen as eig, cholesky as chol` brings the `eigen` and `cholesky` methods
+from `LinearAlgebra` into scope as `eig` and `chol` respectively.
+
+`as` works with `using` only when individual identifiers are brought into scope.
+For example, `using LinearAlgebra: eigen as eig` or `using LinearAlgebra: eigen as eig, cholesky as chol` works,
+but `using LinearAlgebra as LA` is invalid syntax, since it is nonsensical to
+rename *all* exported names from `LinearAlgebra` to `LA`.
+"""
+kw"as"
+
 """
     abstract type
 
@@ -1449,21 +1470,6 @@ parser rather than being implemented as a normal string macro `@var_str`.
 """
 kw"var\"name\"", kw"@var_str"
 
-"""
-    ans
-
-A variable referring to the last computed value, automatically set at the interactive prompt.
-"""
-kw"ans"
-
-"""
-    err
-
-A variable referring to the last thrown errors, automatically set at the interactive prompt.
-The thrown errors are collected in a stack of exceptions.
-"""
-kw"err"
-
 """
     devnull
 
@@ -1724,7 +1730,7 @@ The argument `val` to a function or constructor is outside the valid domain.
 ```jldoctest
 julia> sqrt(-1)
 ERROR: DomainError with -1.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -3102,7 +3108,7 @@ with elements of type `T` and `N` dimensions.
 If `A` is a `StridedArray`, then its elements are stored in memory with offsets, which may
 vary between dimensions but are constant within a dimension. For example, `A` could
 have stride 2 in dimension 1, and stride 3 in dimension 2. Incrementing `A` along
-dimension `d` jumps in memory by [`strides(A, d)`] slots. Strided arrays are
+dimension `d` jumps in memory by [`stride(A, d)`] slots. Strided arrays are
 particularly important and useful because they can sometimes be passed directly
 as pointers to foreign language libraries like BLAS.
 """
diff --git a/base/env.jl b/base/env.jl
index 10f57f3fb9dc7..a4a55d9dad013 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -74,7 +74,7 @@ all keys to uppercase for display, iteration, and copying. Portable code should
 ability to distinguish variables by case, and should beware that setting an ostensibly lowercase
 variable may result in an uppercase `ENV` key.)
 
-    !!! warning
+!!! warning
     Mutating the environment is not thread-safe.
 
 # Examples
@@ -222,7 +222,7 @@ by zero or more `"var"=>val` arguments `kv`. `withenv` is generally used via the
 environment variable (if it is set). When `withenv` returns, the original environment has
 been restored.
 
-    !!! warning
+!!! warning
     Changing the environment is not thread-safe. For running external commands with a different
     environment from the parent process, prefer using [`addenv`](@ref) over `withenv`.
 """
diff --git a/base/errorshow.jl b/base/errorshow.jl
index e99253656d4e4..03650920aae57 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -181,6 +181,7 @@ function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b:
     if a.name === b.name
         Base.show_type_name(io, a.name)
         n = length(a.parameters)
+        n > 0 || return
         print(io, '{')
         for i = 1:n
             if i > length(b.parameters)
@@ -416,17 +417,17 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
             end
             sig0 = sig0::DataType
             s1 = sig0.parameters[1]
-            sig = sig0.parameters[2:end]
-            print(iob, "  ")
-            if !isa(func, rewrap_unionall(s1, method.sig))
-                # function itself doesn't match
+            if sig0 === Tuple || !isa(func, rewrap_unionall(s1, method.sig))
+                # function itself doesn't match or is a builtin
                 continue
             else
+                print(iob, "  ")
                 show_signature_function(iob, s1)
             end
             print(iob, "(")
             t_i = copy(arg_types_param)
             right_matches = 0
+            sig = sig0.parameters[2:end]
             for i = 1 : min(length(t_i), length(sig))
                 i > 1 && print(iob, ", ")
                 # If isvarargtype then it checks whether the rest of the input arguments matches
@@ -823,6 +824,72 @@ function _simplify_include_frames(trace)
     return trace[kept_frames]
 end
 
+# Collapse frames that have the same location (in some cases)
+function _collapse_repeated_frames(trace)
+    kept_frames = trues(length(trace))
+    last_frame = nothing
+    for i in 1:length(trace)
+        frame::StackFrame, _ = trace[i]
+        if last_frame !== nothing && frame.file == last_frame.file && frame.line == last_frame.line
+            #=
+            Handles this case:
+
+            f(g, a; kw...) = error();
+            @inline f(a; kw...) = f(identity, a; kw...);
+            f(1)
+
+            which otherwise ends up as:
+
+            [4] #f#4 <-- useless
+            @ ./REPL[2]:1 [inlined]
+            [5] f(a::Int64)
+            @ Main ./REPL[2]:1
+            =#
+            if startswith(sprint(show, last_frame), "#")
+                kept_frames[i-1] = false
+            end
+
+            #= Handles this case
+            g(x, y=1, z=2) = error();
+            g(1)
+
+            which otherwise ends up as:
+
+            [2] g(x::Int64, y::Int64, z::Int64)
+            @ Main ./REPL[1]:1
+            [3] g(x::Int64) <-- useless
+            @ Main ./REPL[1]:1
+            =#
+            if frame.linfo isa MethodInstance && last_frame.linfo isa MethodInstance &&
+                frame.linfo.def isa Method && last_frame.linfo.def isa Method
+                m, last_m = frame.linfo.def::Method, last_frame.linfo.def::Method
+                params, last_params = Base.unwrap_unionall(m.sig).parameters, Base.unwrap_unionall(last_m.sig).parameters
+                if last_m.nkw != 0
+                    pos_sig_params = last_params[(last_m.nkw+2):end]
+                    issame = true
+                    if pos_sig_params == params
+                        kept_frames[i] = false
+                    end
+                end
+                if length(last_params) > length(params)
+                    issame = true
+                    for i = 1:length(params)
+                        issame &= params[i] == last_params[i]
+                    end
+                    if issame
+                        kept_frames[i] = false
+                    end
+                end
+            end
+
+            # TODO: Detect more cases that can be collapsed
+        end
+        last_frame = frame
+    end
+    return trace[kept_frames]
+end
+
+
 function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     n = 0
     last_frame = StackTraces.UNKNOWN
@@ -846,7 +913,7 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
             code = lkup.linfo
             if code isa MethodInstance
                 def = code.def
-                if def isa Method && def.name !== :kwcall && def.sig <: Tuple{typeof(Core.kwcall),Any,Any,Vararg}
+                if def isa Method && def.name !== :kwcall && def.sig <: Tuple{typeof(Core.kwcall),NamedTuple,Any,Vararg}
                     # hide kwcall() methods, which are probably internal keyword sorter methods
                     # (we print the internal method instead, after demangling
                     # the argument list, since it has the right line number info)
@@ -875,7 +942,9 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     if n > 0
         push!(ret, (last_frame, n))
     end
-    return _simplify_include_frames(ret)
+    trace = _simplify_include_frames(ret)
+    trace = _collapse_repeated_frames(trace)
+    return trace
 end
 
 function show_exception_stack(io::IO, stack)
@@ -921,7 +990,7 @@ Experimental.register_error_hint(noncallable_number_hint_handler, MethodError)
 # (probably attempting concatenation)
 function string_concatenation_hint_handler(io, ex, arg_types, kwargs)
     @nospecialize
-    if (ex.f == +) && all(i -> i <: AbstractString, arg_types)
+    if (ex.f === +) && all(i -> i <: AbstractString, arg_types)
         print(io, "\nString concatenation is performed with ")
         printstyled(io, "*", color=:cyan)
         print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).")
diff --git a/base/essentials.jl b/base/essentials.jl
index a9794f372a0d5..06e2c3ea2ec87 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -177,6 +177,19 @@ macro isdefined(s::Symbol)
     return Expr(:escape, Expr(:isdefined, s))
 end
 
+"""
+    nameof(m::Module) -> Symbol
+
+Get the name of a `Module` as a [`Symbol`](@ref).
+
+# Examples
+```jldoctest
+julia> nameof(Base.Broadcast)
+:Broadcast
+```
+"""
+nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
+
 function _is_internal(__module__)
     if ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module === Core.Compiler ||
        nameof(__module__) === :Base
@@ -185,10 +198,6 @@ function _is_internal(__module__)
     return false
 end
 
-# can be used in place of `@pure` (supposed to be used for bootstrapping)
-macro _pure_meta()
-    return _is_internal(__module__) && Expr(:meta, :pure)
-end
 # can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
 macro _total_meta()
     return _is_internal(__module__) && Expr(:meta, Expr(:purity,
@@ -211,6 +220,39 @@ macro _foldable_meta()
         #=:notaskstate=#false,
         #=:inaccessiblememonly=#true))
 end
+# can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
+macro _nothrow_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#true,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false))
+end
+# can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
+macro _terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false))
+end
+# can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
+macro _effect_free_terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false))
+end
 
 # another version of inlining that propagates an inbounds context
 macro _propagate_inbounds_meta()
@@ -268,13 +310,8 @@ See also: [`round`](@ref), [`trunc`](@ref), [`oftype`](@ref), [`reinterpret`](@r
 """
 function convert end
 
-# make convert(::Type{<:Union{}}, x::T) intentionally ambiguous for all T
-# so it will never get called or invalidated by loading packages
-# with carefully chosen types that won't have any other convert methods defined
-convert(T::Type{<:Core.IntrinsicFunction}, x) = throw(MethodError(convert, (T, x)))
-convert(T::Type{<:Nothing}, x) = throw(MethodError(convert, (Nothing, x)))
-convert(::Type{T}, x::T) where {T<:Core.IntrinsicFunction} = x
-convert(::Type{T}, x::T) where {T<:Nothing} = x
+# ensure this is never ambiguous, and therefore fast for lookup
+convert(T::Type{Union{}}, x...) = throw(ArgumentError("cannot convert a value to Union{} for assignment"))
 
 convert(::Type{Type}, x::Type) = x # the ssair optimizer is strongly dependent on this method existing to avoid over-specialization
                                    # in the absence of inlining-enabled
@@ -300,7 +337,7 @@ end
 @eval struct Pairs{K, V, I, A} <: AbstractDict{K, V}
     data::A
     itr::I
-    Pairs{K, V, I, A}(data, itr) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :(convert(A, data)), :(convert(I, itr))))
+    Pairs{K, V, I, A}(data, itr) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :(data isa A ? data : convert(A, data)), :(itr isa I ? itr : convert(I, itr))))
     Pairs{K, V}(data::A, itr::I) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :data, :itr))
     Pairs{K}(data::A, itr::I) where {K, I, A} = $(Expr(:new, :(Pairs{K, eltype(A), I, A}), :data, :itr))
     Pairs(data::A, itr::I) where  {I, A} = $(Expr(:new, :(Pairs{eltype(I), eltype(A), I, A}), :data, :itr))
@@ -367,13 +404,8 @@ function rename_unionall(@nospecialize(u))
     if !isa(u, UnionAll)
         return u
     end
-    body = rename_unionall(u.body)
-    if body === u.body
-        body = u
-    else
-        body = UnionAll(u.var, body)
-    end
     var = u.var::TypeVar
+    body = UnionAll(var, rename_unionall(u.body))
     nv = TypeVar(var.name, var.lb, var.ub)
     return UnionAll(nv, body{nv})
 end
@@ -422,7 +454,13 @@ function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
     if typeintersect(NTuple{N,Any}, T) === Union{}
         _tuple_error(T, x)
     end
-    cvt1(n) = (@inline; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
+    function cvt1(n)
+        @inline
+        Tn = fieldtype(T, n)
+        xn = getfield(x, n, #=boundscheck=#false)
+        xn isa Tn && return xn
+        return convert(Tn, xn)
+    end
     return ntuple(cvt1, Val(N))::NTuple{N,Any}
 end
 
@@ -475,7 +513,7 @@ julia> oftype(y, x)
 4.0
 ```
 """
-oftype(x, y) = convert(typeof(x), y)
+oftype(x, y) = y isa typeof(x) ? y : convert(typeof(x), y)::typeof(x)
 
 unsigned(x::Int) = reinterpret(UInt, x)
 signed(x::UInt) = reinterpret(Int, x)
@@ -496,18 +534,19 @@ Neither `convert` nor `cconvert` should take a Julia object and turn it into a `
 """
 function cconvert end
 
-cconvert(T::Type, x) = convert(T, x) # do the conversion eagerly in most cases
+cconvert(T::Type, x) = x isa T ? x : convert(T, x) # do the conversion eagerly in most cases
+cconvert(::Type{Union{}}, x...) = convert(Union{}, x...)
 cconvert(::Type{<:Ptr}, x) = x # but defer the conversion to Ptr to unsafe_convert
 unsafe_convert(::Type{T}, x::T) where {T} = x # unsafe_convert (like convert) defaults to assuming the convert occurred
 unsafe_convert(::Type{T}, x::T) where {T<:Ptr} = x  # to resolve ambiguity with the next method
 unsafe_convert(::Type{P}, x::Ptr) where {P<:Ptr} = convert(P, x)
 
 """
-    reinterpret(type, A)
+    reinterpret(type, x)
 
-Change the type-interpretation of the binary data in the primitive type `A`
+Change the type-interpretation of the binary data in the primitive value `x`
 to that of the primitive type `type`.
-The size of `type` has to be the same as that of the type of `A`.
+The size of `type` has to be the same as that of the type of `x`.
 For example, `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
 [`Float32`](@ref).
 
@@ -701,8 +740,9 @@ end
 
 # SimpleVector
 
-@eval getindex(v::SimpleVector, i::Int) = Core._svec_ref($(Expr(:boundscheck)), v, i)
+@eval getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref($(Expr(:boundscheck)), v, i))
 function length(v::SimpleVector)
+    @_total_meta
     t = @_gc_preserve_begin v
     len = unsafe_load(Ptr{Int}(pointer_from_objref(v)))
     @_gc_preserve_end t
@@ -946,7 +986,7 @@ function popfirst! end
     peek(stream[, T=UInt8])
 
 Read and return a value of type `T` from a stream without advancing the current position
-in the stream.
+in the stream.   See also [`startswith(stream, char_or_string)`](@ref).
 
 # Examples
 
diff --git a/base/exports.jl b/base/exports.jl
index 600b36b6c37c6..ec151df0bfde2 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -238,6 +238,7 @@ export
     bitrotate,
     bswap,
     cbrt,
+    fourthroot,
     ceil,
     cis,
     cispi,
@@ -352,6 +353,7 @@ export
     tan,
     tand,
     tanh,
+    tanpi,
     trailing_ones,
     trailing_zeros,
     trunc,
@@ -363,6 +365,7 @@ export
     zero,
     √,
     ∛,
+    ∜,
     ≈,
     ≉,
 
diff --git a/base/expr.jl b/base/expr.jl
index a0a9a5676c760..e45684f95a34f 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -75,6 +75,9 @@ function copy(c::CodeInfo)
     cnew.code = copy_exprargs(cnew.code)
     cnew.slotnames = copy(cnew.slotnames)
     cnew.slotflags = copy(cnew.slotflags)
+    if cnew.slottypes !== nothing
+        cnew.slottypes = copy(cnew.slottypes)
+    end
     cnew.codelocs  = copy(cnew.codelocs)
     cnew.linetable = copy(cnew.linetable::Union{Vector{Any},Vector{Core.LineInfoNode}})
     cnew.ssaflags  = copy(cnew.ssaflags)
@@ -193,7 +196,7 @@ Small functions typically do not need the `@inline` annotation,
 as the compiler does it automatically. By using `@inline` on bigger functions,
 an extra nudge can be given to the compiler to inline it.
 
-`@inline` can be applied immediately before the definition or in its function body.
+`@inline` can be applied immediately before a function definition or within a function body.
 
 ```julia
 # annotate long-form definition
@@ -271,7 +274,7 @@ Small functions are typically inlined automatically.
 By using `@noinline` on small functions, auto-inlining can be
 prevented.
 
-`@noinline` can be applied immediately before the definition or in its function body.
+`@noinline` can be applied immediately before a function definition or within a function body.
 
 ```julia
 # annotate long-form definition
@@ -339,51 +342,68 @@ macro noinline(x)
     return annotate_meta_def_or_block(x, :noinline)
 end
 
-"""
-    @pure ex
-
-`@pure` gives the compiler a hint for the definition of a pure function,
-helping for type inference.
 
-!!! warning
-    This macro is intended for internal compiler use and may be subject to changes.
-
-!!! warning
-    In Julia 1.8 and higher, it is favorable to use [`@assume_effects`](@ref) instead of `@pure`.
-    This is because `@assume_effects` allows a finer grained control over Julia's purity
-    modeling and the effect system enables a wider range of optimizations.
 """
-macro pure(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :pure) : ex)
-end
+    @constprop setting [ex]
 
-"""
-    @constprop setting ex
+Control the mode of interprocedural constant propagation for the annotated function.
 
-`@constprop` controls the mode of interprocedural constant propagation for the
-annotated function. Two `setting`s are supported:
+Two `setting`s are supported:
 
-- `@constprop :aggressive ex`: apply constant propagation aggressively.
+- `@constprop :aggressive [ex]`: apply constant propagation aggressively.
   For a method where the return type depends on the value of the arguments,
   this can yield improved inference results at the cost of additional compile time.
-- `@constprop :none ex`: disable constant propagation. This can reduce compile
+- `@constprop :none [ex]`: disable constant propagation. This can reduce compile
   times for functions that Julia might otherwise deem worthy of constant-propagation.
   Common cases are for functions with `Bool`- or `Symbol`-valued arguments or keyword arguments.
+
+`@constprop` can be applied immediately before a function definition or within a function body.
+
+```julia
+# annotate long-form definition
+@constprop :aggressive function longdef(x)
+  ...
+end
+
+# annotate short-form definition
+@constprop :aggressive shortdef(x) = ...
+
+# annotate anonymous function that a `do` block creates
+f() do
+    @constprop :aggressive
+    ...
+end
+```
+
+!!! compat "Julia 1.10"
+  The usage within a function body requires at least Julia 1.10.
 """
 macro constprop(setting, ex)
-    if isa(setting, QuoteNode)
-        setting = setting.value
+    sym = constprop_setting(setting)
+    isa(ex, Expr) && return esc(pushmeta!(ex, sym))
+    throw(ArgumentError(LazyString("Bad expression `", ex, "` in `@constprop settings ex`")))
+end
+macro constprop(setting)
+    sym = constprop_setting(setting)
+    return Expr(:meta, sym)
+end
+
+function constprop_setting(@nospecialize setting)
+    isa(setting, QuoteNode) && (setting = setting.value)
+    if setting === :aggressive
+        return :aggressive_constprop
+    elseif setting === :none
+        return :no_constprop
     end
-    setting === :aggressive && return esc(isa(ex, Expr) ? pushmeta!(ex, :aggressive_constprop) : ex)
-    setting === :none && return esc(isa(ex, Expr) ? pushmeta!(ex, :no_constprop) : ex)
-    throw(ArgumentError("@constprop $setting not supported"))
+    throw(ArgumentError(LazyString("@constprop "), setting, "not supported"))
 end
 
 """
-    @assume_effects setting... ex
+    @assume_effects setting... [ex]
 
-`@assume_effects` overrides the compiler's effect modeling for the given method.
-`ex` must be a method definition or `@ccall` expression.
+Override the compiler's effect modeling for the given method or foreign call.
+`@assume_effects` can be applied immediately before a function definition or within a function body.
+It can also be applied immediately before a `@ccall` expression.
 
 !!! compat "Julia 1.8"
     Using `Base.@assume_effects` requires Julia version 1.8.
@@ -410,10 +430,31 @@ julia> code_typed() do
 1 ─     return 479001600
 ) => Int64
 
+julia> code_typed() do
+           map((2,3,4)) do x
+               # this :terminates_locally allows this anonymous function to be constant-folded
+               Base.@assume_effects :terminates_locally
+               res = 1
+               1 < x < 20 || error("bad pow")
+               while x > 1
+                   res *= x
+                   x -= 1
+               end
+               return res
+           end
+       end
+1-element Vector{Any}:
+ CodeInfo(
+1 ─     return (2, 6, 24)
+) => Tuple{Int64, Int64, Int64}
+
 julia> Base.@assume_effects :total !:nothrow @ccall jl_type_intersection(Vector{Int}::Any, Vector{<:Integer}::Any)::Any
 Vector{Int64} (alias for Array{Int64, 1})
 ```
 
+!!! compat "Julia 1.10"
+  The usage within a function body requires at least Julia 1.10.
+
 !!! warning
     Improper use of this macro causes undefined behavior (including crashes,
     incorrect answers, or other hard to track bugs). Use with care and only as a
@@ -472,6 +513,13 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
     even for the same world age (e.g. because one ran in the interpreter, while
     the other was optimized).
 
+!!! note
+    The `:consistent`-cy assertion currrently includes the assertion that the function
+    will not execute any undefined behavior (for any input). Note that undefined behavior
+    may technically cause the function to violate other effect assertions (such as
+    `:nothrow` or `:effect_free`) as well, but we do not model this, and all effects
+    except `:consistent` assume the absence of undefined behavior.
+
 !!! note
     If `:consistent` functions terminate by throwing an exception, that exception
     itself is not required to meet the egality requirement specified above.
@@ -648,21 +696,23 @@ the following other `setting`s:
 Effect names may be prefixed by `!` to indicate that the effect should be removed
 from an earlier meta effect. For example, `:total !:nothrow` indicates that while
 the call is generally total, it may however throw.
-
----
-## Comparison to `@pure`
-
-`@assume_effects :foldable` is similar to [`@pure`](@ref) with the primary
-distinction that the `:consistent`-cy requirement applies world-age wise rather
-than globally as described above. However, in particular, a method annotated
-`@pure` should always be at least `:foldable`.
-Another advantage is that effects introduced by `@assume_effects` are propagated to
-callers interprocedurally while a purity defined by `@pure` is not.
 """
 macro assume_effects(args...)
+    lastex = args[end]
+    inner = unwrap_macrocalls(lastex)
+    if is_function_def(inner)
+        ex = lastex
+        idx = length(args)-1
+    elseif isexpr(lastex, :macrocall) && lastex.args[1] === Symbol("@ccall")
+        ex = lastex
+        idx = length(args)-1
+    else # anonymous function case
+        ex = nothing
+        idx = length(args)
+    end
     (consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly) =
         (false, false, false, false, false, false, false, false)
-    for org_setting in args[1:end-1]
+    for org_setting in args[1:idx]
         (setting, val) = compute_assumed_setting(org_setting)
         if setting === :consistent
             consistent = val
@@ -688,16 +738,19 @@ macro assume_effects(args...)
             throw(ArgumentError("@assume_effects $org_setting not supported"))
         end
     end
-    ex = args[end]
-    isa(ex, Expr) || throw(ArgumentError("Bad expression `$ex` in `@assume_effects [settings] ex`"))
-    if ex.head === :macrocall && ex.args[1] === Symbol("@ccall")
+    if is_function_def(inner)
+        return esc(pushmeta!(ex, :purity,
+            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
+    elseif isexpr(ex, :macrocall) && ex.args[1] === Symbol("@ccall")
         ex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
         insert!(ex.args, 3, Core.Compiler.encode_effects_override(Core.Compiler.EffectsOverride(
             consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly,
         )))
         return esc(ex)
+    else # anonymous function case
+        return Expr(:meta, Expr(:purity,
+            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
     end
-    return esc(pushmeta!(ex, :purity, consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate, inaccessiblememonly))
 end
 
 function compute_assumed_setting(@nospecialize(setting), val::Bool=true)
@@ -876,6 +929,26 @@ function remove_linenums!(src::CodeInfo)
     return src
 end
 
+replace_linenums!(ex, ln::LineNumberNode) = ex
+function replace_linenums!(ex::Expr, ln::LineNumberNode)
+    if ex.head === :block || ex.head === :quote
+        # replace line number expressions from metadata (not argument literal or inert) position
+        map!(ex.args, ex.args) do @nospecialize(x)
+            isa(x, Expr) && x.head === :line && length(x.args) == 1 && return Expr(:line, ln.line)
+            isa(x, Expr) && x.head === :line && length(x.args) == 2 && return Expr(:line, ln.line, ln.file)
+            isa(x, LineNumberNode) && return ln
+            return x
+        end
+    end
+    # preserve any linenums inside `esc(...)` guards
+    if ex.head !== :escape
+        for subex in ex.args
+            subex isa Expr && replace_linenums!(subex, ln)
+        end
+    end
+    return ex
+end
+
 macro generated()
     return Expr(:generated)
 end
@@ -919,10 +992,7 @@ macro generated(f)
                          Expr(:block,
                               lno,
                               Expr(:if, Expr(:generated),
-                                   # https://github.com/JuliaLang/julia/issues/25678
-                                   Expr(:block,
-                                        :(local $tmp = $body),
-                                        :(if $tmp isa $(GlobalRef(Core, :CodeInfo)); return $tmp; else $tmp; end)),
+                                   body,
                                    Expr(:block,
                                         Expr(:meta, :generated_only),
                                         Expr(:return, nothing))))))
diff --git a/base/fastmath.jl b/base/fastmath.jl
index a969bcaaa6ae0..7865736f8a776 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -28,7 +28,7 @@ module FastMath
 export @fastmath
 
 import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
-    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
+    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
 const fast_op =
@@ -173,7 +173,6 @@ add_fast(x::T, y::T) where {T<:FloatTypes} = add_float_fast(x, y)
 sub_fast(x::T, y::T) where {T<:FloatTypes} = sub_float_fast(x, y)
 mul_fast(x::T, y::T) where {T<:FloatTypes} = mul_float_fast(x, y)
 div_fast(x::T, y::T) where {T<:FloatTypes} = div_float_fast(x, y)
-rem_fast(x::T, y::T) where {T<:FloatTypes} = rem_float_fast(x, y)
 
 add_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
     add_fast(add_fast(x, y), zs...)
@@ -304,6 +303,11 @@ sincos_fast(v::AbstractFloat) = (sin_fast(v), cos_fast(v))
 sincos_fast(v::Real) = sincos_fast(float(v)::AbstractFloat)
 sincos_fast(v) = (sin_fast(v), cos_fast(v))
 
+
+function rem_fast(x::T, y::T) where {T<:FloatTypes}
+    return @fastmath copysign(Base.rem_internal(abs(x), abs(y)), x)
+end
+
 @fastmath begin
     hypot_fast(x::T, y::T) where {T<:FloatTypes} = sqrt(x*x + y*y)
 
diff --git a/base/file.jl b/base/file.jl
index b761e1d65ccb5..866e82b6e39c2 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -32,7 +32,7 @@ export
 # get and set current directory
 
 """
-    pwd() -> AbstractString
+    pwd() -> String
 
 Get the current working directory.
 
@@ -105,7 +105,7 @@ if Sys.iswindows()
     end
 else
     function cd(f::Function, dir::AbstractString)
-        fd = ccall(:open, Int32, (Cstring, Int32), :., 0)
+        fd = ccall(:open, Int32, (Cstring, Int32, UInt32...), :., 0)
         systemerror(:open, fd == -1)
         try
             cd(dir)
@@ -544,7 +544,10 @@ function temp_cleanup_purge(; force::Bool=false)
             end
             !ispath(path) && delete!(TEMP_CLEANUP, path)
         catch ex
-            @warn "temp cleanup" _group=:file exception=(ex, catch_backtrace())
+            @warn """
+                Failed to clean up temporary path $(repr(path))
+                $ex
+                """ _group=:file
         end
     end
 end
@@ -675,8 +678,9 @@ mktemp(parent)
     mktempdir(parent=tempdir(); prefix=$(repr(temp_prefix)), cleanup=true) -> path
 
 Create a temporary directory in the `parent` directory with a name
-constructed from the given prefix and a random suffix, and return its path.
-Additionally, any trailing `X` characters may be replaced with random characters.
+constructed from the given `prefix` and a random suffix, and return its path.
+Additionally, on some platforms, any trailing `'X'` characters in `prefix` may be replaced
+with random characters.
 If `parent` does not exist, throw an error. The `cleanup` option controls whether
 the temporary directory is automatically deleted when the process exits.
 
@@ -840,7 +844,7 @@ julia> readdir("base", join=true)
  ⋮
  "base/version_git.sh"
  "base/views.jl"
- "base/weakkeydict.jl"```
+ "base/weakkeydict.jl"
 
 julia> readdir(abspath("base"), join=true)
 145-element Array{String,1}:
@@ -1109,7 +1113,7 @@ function symlink(target::AbstractString, link::AbstractString;
 end
 
 """
-    readlink(path::AbstractString) -> AbstractString
+    readlink(path::AbstractString) -> String
 
 Return the target location a symbolic link `path` points to.
 """
diff --git a/base/float.jl b/base/float.jl
index 2677fd5dfba38..fad7146655ade 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -310,6 +310,7 @@ Float64
 """
 float(::Type{T}) where {T<:Number} = typeof(float(zero(T)))
 float(::Type{T}) where {T<:AbstractFloat} = T
+float(::Type{Union{}}, slurp...) = Union{}(0.0)
 
 """
     unsafe_trunc(T, x)
@@ -619,7 +620,7 @@ See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref
 isnan(x::AbstractFloat) = (x != x)::Bool
 isnan(x::Number) = false
 
-isfinite(x::AbstractFloat) = x - x == 0
+isfinite(x::AbstractFloat) = !isnan(x - x)
 isfinite(x::Real) = decompose(x)[3] != 0
 isfinite(x::Integer) = true
 
@@ -924,6 +925,7 @@ false
 
 julia> issubnormal(1.0f-38)
 true
+```
 """
 function issubnormal(x::T) where {T<:IEEEFloat}
     y = reinterpret(Unsigned, x)
diff --git a/base/gcutils.jl b/base/gcutils.jl
index 0e5d4c16e550a..fed30befd7d5c 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -1,5 +1,36 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+
+"""
+    WeakRef(x)
+
+`w = WeakRef(x)` constructs a [weak reference](https://en.wikipedia.org/wiki/Weak_reference)
+to the Julia value `x`: although `w` contains a reference to `x`, it does not prevent `x` from being
+garbage collected. `w.value` is either `x` (if `x` has not been garbage-collected yet) or `nothing`
+(if `x` has been garbage-collected).
+
+```jldoctest
+julia> x = "a string"
+"a string"
+
+julia> w = WeakRef(x)
+WeakRef("a string")
+
+julia> GC.gc()
+
+julia> w           # a reference is maintained via `x`
+WeakRef("a string")
+
+julia> x = nothing # clear reference
+
+julia> GC.gc()
+
+julia> w
+WeakRef(nothing)
+```
+"""
+WeakRef
+
 ==(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
 ==(w::WeakRef, v) = isequal(w.value, v)
 ==(w, v::WeakRef) = isequal(w, v.value)
@@ -131,6 +162,23 @@ function disable_finalizers() @inline
     ccall(:jl_gc_disable_finalizers_internal, Cvoid, ())
 end
 
+"""
+    GC.in_finalizer()::Bool
+
+Returns `true` if the current task is running a finalizer, returns `false`
+otherwise. Will also return `false` within a finalizer which was inlined by the
+compiler's eager finalization optimization, or if `finalize` is called on the
+finalizer directly.
+
+The result of this function may be useful, for example, when a finalizer must
+wait on a resource to become available; instead of polling the resource in a
+`yield` loop (which is not legal to execute within a task running finalizers),
+busy polling or an `@async` continuation could be used instead.
+"""
+function in_finalizer() @inline
+    ccall(:jl_gc_is_in_finalizer, Int8, ()) > 0
+end
+
 """
     GC.@preserve x1 x2 ... xn expr
 
diff --git a/base/generator.jl b/base/generator.jl
index d11742fe5b72f..aa4b7f67cba95 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -92,13 +92,13 @@ Base.HasLength()
 """
 IteratorSize(x) = IteratorSize(typeof(x))
 IteratorSize(::Type) = HasLength()  # HasLength is the default
+IteratorSize(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
+IteratorSize(::Type{Any}) = SizeUnknown()
 
 IteratorSize(::Type{<:Tuple}) = HasLength()
 IteratorSize(::Type{<:AbstractArray{<:Any,N}})  where {N} = HasShape{N}()
 IteratorSize(::Type{Generator{I,F}}) where {I,F} = IteratorSize(I)
 
-IteratorSize(::Type{Any}) = SizeUnknown()
-
 haslength(iter) = IteratorSize(iter) isa Union{HasShape, HasLength}
 
 abstract type IteratorEltype end
@@ -126,7 +126,7 @@ Base.HasEltype()
 """
 IteratorEltype(x) = IteratorEltype(typeof(x))
 IteratorEltype(::Type) = HasEltype()  # HasEltype is the default
+IteratorEltype(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
+IteratorEltype(::Type{Any}) = EltypeUnknown()
 
 IteratorEltype(::Type{Generator{I,T}}) where {I,T} = EltypeUnknown()
-
-IteratorEltype(::Type{Any}) = EltypeUnknown()
diff --git a/base/gmp.jl b/base/gmp.jl
index 0a71de28fffe9..69926f4ad0d06 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -247,6 +247,17 @@ get_str!(x, a, b::BigInt) = (ccall((:__gmpz_get_str,libgmp), Ptr{Cchar}, (Ptr{Cc
 set_str!(x::BigInt, a, b) = Int(ccall((:__gmpz_set_str, libgmp), Cint, (mpz_t, Ptr{UInt8}, Cint), x, a, b))
 get_d(a::BigInt) = ccall((:__gmpz_get_d, libgmp), Cdouble, (mpz_t,), a)
 
+function export!(a::AbstractVector{T}, n::BigInt; order::Integer=-1, nails::Integer=0, endian::Integer=0) where {T<:Base.BitInteger}
+    stride(a, 1) == 1 || throw(ArgumentError("a must have stride 1"))
+    ndigits = cld(sizeinbase(n, 2), 8*sizeof(T) - nails)
+    length(a) < ndigits && resize!(a, ndigits)
+    count = Ref{Csize_t}()
+    ccall((:__gmpz_export, libgmp), Ptr{T}, (Ptr{T}, Ref{Csize_t}, Cint, Csize_t, Cint, Csize_t, mpz_t),
+        a, count, order, sizeof(T), endian, nails, n)
+    @assert count[] ≤ length(a)
+    return a, Int(count[])
+end
+
 limbs_write!(x::BigInt, a) = ccall((:__gmpz_limbs_write, libgmp), Ptr{Limb}, (mpz_t, Clong), x, a)
 limbs_finish!(x::BigInt, a) = ccall((:__gmpz_limbs_finish, libgmp), Cvoid, (mpz_t, Clong), x, a)
 import!(x::BigInt, a, b, c, d, e, f) = ccall((:__gmpz_import, libgmp), Cvoid,
@@ -696,8 +707,16 @@ end
 
 factorial(x::BigInt) = isneg(x) ? BigInt(0) : MPZ.fac_ui(x)
 
-binomial(n::BigInt, k::UInt) = MPZ.bin_ui(n, k)
-binomial(n::BigInt, k::Integer) = k < 0 ? BigInt(0) : binomial(n, UInt(k))
+function binomial(n::BigInt, k::Integer)
+    k < 0 && return BigInt(0)
+    k <= typemax(Culong) && return binomial(n, Culong(k))
+    n < 0 && return isodd(k) ? -binomial(k - n - 1, k) : binomial(k - n - 1, k)
+    κ = n - k
+    κ < 0 && return BigInt(0)
+    κ <= typemax(Culong) && return binomial(n, Culong(κ))
+    throw(OverflowError("Computation would exceed memory"))
+end
+binomial(n::BigInt, k::Culong) = MPZ.bin_ui(n, k)
 
 ==(x::BigInt, y::BigInt) = cmp(x,y) == 0
 ==(x::BigInt, i::Integer) = cmp(x,i) == 0
@@ -750,19 +769,29 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1)
 end
 
 function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer}
-    if 2 ≤ base ≤ 62
-        s = codeunits(string(n; base))
-        i, j = firstindex(a)-1, length(s)+1
-        lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isneg(n))
-        while i < lasti
-            # base ≤ 36: 0-9, plus a-z for 10-35
-            # base > 36: 0-9, plus A-Z for 10-35 and a-z for 36..61
-            x = s[j -= 1]
-            a[i += 1] = base ≤ 36 ? (x>0x39 ? x-0x57 : x-0x30) : (x>0x39 ? (x>0x60 ? x-0x3d : x-0x37) : x-0x30)
+    if base ≥ 2
+        if base ≤ 62
+            # fast path using mpz_get_str via string(n; base)
+            s = codeunits(string(n; base))
+            i, j = firstindex(a)-1, length(s)+1
+            lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isneg(n))
+            while i < lasti
+                # base ≤ 36: 0-9, plus a-z for 10-35
+                # base > 36: 0-9, plus A-Z for 10-35 and a-z for 36..61
+                x = s[j -= 1]
+                a[i += 1] = base ≤ 36 ? (x>0x39 ? x-0x57 : x-0x30) : (x>0x39 ? (x>0x60 ? x-0x3d : x-0x37) : x-0x30)
+            end
+            lasti = lastindex(a)
+            while i < lasti; a[i+=1] = zero(T); end
+            return isneg(n) ? map!(-,a,a) : a
+        elseif a isa StridedVector{<:Base.BitInteger} && stride(a,1) == 1 && ispow2(base) && base-1 ≤ typemax(T)
+            # fast path using mpz_export
+            origlen = length(a)
+            _, writelen = MPZ.export!(a, n; nails = 8sizeof(T) - trailing_zeros(base))
+            length(a) != origlen && resize!(a, origlen) # truncate to least-significant digits
+            a[begin+writelen:end] .= zero(T)
+            return isneg(n) ? map!(-,a,a) : a
         end
-        lasti = lastindex(a)
-        while i < lasti; a[i+=1] = zero(T); end
-        return isneg(n) ? map!(-,a,a) : a
     end
     return invoke(digits!, Tuple{typeof(a), Integer}, a, n; base) # slow generic fallback
 end
diff --git a/base/hashing.jl b/base/hashing.jl
index 0989fecb29839..5dbae09123bd6 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -15,6 +15,8 @@ Typically, any type that implements `hash` should also implement its own [`==`](
 (operator `-`) should also implement [`widen`](@ref), which is required to hash
 values inside heterogeneous arrays.
 
+The hash value may change when a new Julia process is started.
+
 ```jldoctest
 julia> a = hash(10)
 0x95ea2955abd45275
@@ -27,6 +29,7 @@ See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
 """
 hash(x::Any) = hash(x, zero(UInt))
 hash(w::WeakRef, h::UInt) = hash(w.value, h)
+hash(T::Type, h::UInt) = hash_uint(3h - ccall(:jl_type_hash, UInt, (Any,), T))
 
 ## hashing general objects ##
 
@@ -110,7 +113,7 @@ end
 const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
 const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
 
-function hash(s::String, h::UInt)
+@assume_effects :total function hash(s::String, h::UInt)
     h += memhash_seed
     ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
 end
diff --git a/base/iddict.jl b/base/iddict.jl
index dc7af461b09ec..99710fbb3491e 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -86,7 +86,7 @@ end
 function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where {K, V}
     !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
     if !(val isa V) # avoid a dynamic call
-        val = convert(V, val)
+        val = convert(V, val)::V
     end
     if d.ndel >= ((3*length(d.ht))>>2)
         rehash!(d, max((length(d.ht)%UInt)>>1, 32))
@@ -155,7 +155,7 @@ copy(d::IdDict) = typeof(d)(d)
 function get!(d::IdDict{K,V}, @nospecialize(key), @nospecialize(default)) where {K, V}
     val = ccall(:jl_eqtable_get, Any, (Any, Any, Any), d.ht, key, secret_table_token)
     if val === secret_table_token
-        val = isa(default, V) ? default : convert(V, default)
+        val = isa(default, V) ? default : convert(V, default)::V
         setindex!(d, val, key)
         return val
     else
diff --git a/base/indices.jl b/base/indices.jl
index 0584b32941132..a9189865048cd 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -30,7 +30,7 @@ to implement indexing (and indexed assignment) with a single `Int` index;
 all other indexing expressions — including multidimensional accesses — will
 be recomputed to the linear index.  For example, if `A` were a `2×3` custom
 matrix with linear indexing, and we referenced `A[1, 3]`, this would be
-recomputed to the equivalent linear index and call `A[5]` since `2*1 + 3 = 5`.
+recomputed to the equivalent linear index and call `A[5]` since `1 + 2*(3 - 1) = 5`.
 
 See also [`IndexCartesian`](@ref).
 """
@@ -53,7 +53,7 @@ to implement indexing (and indexed assignment) with exactly `N` `Int` indices;
 all other indexing expressions — including linear indexing — will
 be recomputed to the equivalent Cartesian location.  For example, if `A` were a `2×3` custom
 matrix with cartesian indexing, and we referenced `A[5]`, this would be
-recomputed to the equivalent Cartesian index and call `A[1, 3]` since `5 = 2*1 + 3`.
+recomputed to the equivalent Cartesian index and call `A[1, 3]` since `5 = 1 + 2*(3 - 1)`.
 
 It is significantly more expensive to compute Cartesian indices from a linear index than it is
 to go the other way.  The former operation requires division — a very costly operation — whereas
@@ -92,7 +92,7 @@ particular, [`eachindex`](@ref) creates an iterator whose type depends
 on the setting of this trait.
 """
 IndexStyle(A::AbstractArray) = IndexStyle(typeof(A))
-IndexStyle(::Type{Union{}}) = IndexLinear()
+IndexStyle(::Type{Union{}}, slurp...) = IndexLinear()
 IndexStyle(::Type{<:AbstractArray}) = IndexCartesian()
 IndexStyle(::Type{<:Array}) = IndexLinear()
 IndexStyle(::Type{<:AbstractRange}) = IndexLinear()
diff --git a/base/initdefs.jl b/base/initdefs.jl
index 97a67c88fe713..002984b83dd97 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -315,6 +315,9 @@ end
     set_active_project(projfile::Union{AbstractString,Nothing})
 
 Set the active `Project.toml` file to `projfile`. See also [`Base.active_project`](@ref).
+
+!!! compat "Julia 1.8"
+    This function requires at least Julia 1.8.
 """
 function set_active_project(projfile::Union{AbstractString,Nothing})
     ACTIVE_PROJECT[] = projfile
@@ -350,6 +353,7 @@ end
 const atexit_hooks = Callable[
     () -> Filesystem.temp_cleanup_purge(force=true)
 ]
+const _atexit_hooks_lock = ReentrantLock()
 
 """
     atexit(f)
@@ -371,7 +375,7 @@ calls `exit(n)`, then Julia will exit with the exit code corresponding to the
 last called exit hook that calls `exit(n)`. (Because exit hooks are called in
 LIFO order, "last called" is equivalent to "first registered".)
 """
-atexit(f::Function) = (pushfirst!(atexit_hooks, f); nothing)
+atexit(f::Function) = Base.@lock _atexit_hooks_lock (pushfirst!(atexit_hooks, f); nothing)
 
 function _atexit(exitcode::Cint)
     while !isempty(atexit_hooks)
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 98a7098196500..1b007700f4331 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -48,33 +48,46 @@ function gcd(a::T, b::T) where T<:Integer
 end
 
 function gcd(a::T, b::T) where T<:BitInteger
-    a == 0 && return checked_abs(b)
-    b == 0 && return checked_abs(a)
-    r = _gcd(a, b)
-    signbit(r) && __throw_gcd_overflow(a, b)
-    return r
+    a == 0 && return Base.checked_abs(b)
+    b == 0 && return Base.checked_abs(a)
+    if a isa Signed && a == typemin(T)
+        if a == b
+            Base.__throw_gcd_overflow(a, b)
+        else
+            a, b = b, a
+        end
+    end
+    return _gcd(a, b)
 end
 @noinline __throw_gcd_overflow(a, b) =
     throw(OverflowError(LazyString("gcd(", a, ", ", b, ") overflows")))
 
+function absdiff(x::T,y::T) where {T<:Unsigned}
+    d = max(x,y) - min(x,y)
+    d, d
+end
+function absdiff(x::T,y::T) where {T<:Signed}
+    d = x - y
+    abs(d), d
+end
 # binary GCD (aka Stein's) algorithm
 # about 1.7x (2.1x) faster for random Int64s (Int128s)
 # Unfortunately, we need to manually annotate this as `@assume_effects :terminates_locally` to work around #41694.
 # Since this is used in the Rational constructor, constant folding is something we do care about here.
-@assume_effects :terminates_locally function _gcd(a::T, b::T) where T<:BitInteger
-    za = trailing_zeros(a)
-    zb = trailing_zeros(b)
+@assume_effects :terminates_locally function _gcd(ain::T, bin::T) where T<:BitInteger
+    zb = trailing_zeros(bin)
+    za = trailing_zeros(ain)
+    a = abs(ain)
+    b = abs(bin >> zb)
     k = min(za, zb)
-    u = unsigned(abs(a >> za))
-    v = unsigned(abs(b >> zb))
-    while u != v
-        if u > v
-            u, v = v, u
-        end
-        v -= u
-        v >>= trailing_zeros(v)
+    while a != 0
+        a >>= za
+        absd, diff = absdiff(a, b)
+        za = trailing_zeros(diff)
+        b = min(a, b)
+        a = absd
     end
-    r = u << k
+    r = b << k
     return r % T
 end
 
@@ -375,18 +388,20 @@ function powermod(x::Integer, p::Integer, m::T) where T<:Integer
     # but will work for integer types like `BigInt` that don't have `typemin` defined
     # It needs special handling otherwise will cause overflow problem.
     if p == -p
-        t = powermod(invmod(x, m), -(p÷2), m)
-        t = mod(widemul(t, t), m)
-        iseven(p) && return t
+        imod = invmod(x, m)
+        rhalf = powermod(imod, -(p÷2), m)
+        r::T = mod(widemul(rhalf, rhalf), m)
+        isodd(p) && (r = mod(widemul(r, imod), m))
         #else odd
-        return mod(widemul(t, invmod(x, m)), m)
+        return r
+    elseif p < 0
+        return powermod(invmod(x, m), -p, m)
     end
-    p < 0 && return powermod(invmod(x, m), -p, m)
     (m == 1 || m == -1) && return zero(m)
     b = oftype(m,mod(x,m))  # this also checks for divide by zero
 
     t = prevpow(2, p)
-    r::T = 1
+    r = 1
     while true
         if p >= t
             r = mod(widemul(r,b),m)
@@ -1096,7 +1111,7 @@ Base.@assume_effects :terminates_locally function binomial(n::T, k::T) where T<:
     while rr <= k
         xt = div(widemul(x, nn), rr)
         x = xt % T
-        x == xt || throw(OverflowError(LazyString("binomial(", n0, ", ", k0, " overflows")))
+        x == xt || throw(OverflowError(LazyString("binomial(", n0, ", ", k0, ") overflows")))
         rr += one(T)
         nn += one(T)
     end
diff --git a/base/io.jl b/base/io.jl
index c2d6ad592bf0c..9c00c57576bac 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -219,6 +219,8 @@ julia> read(io, String)
 ```
 """
 read(stream, t)
+read(stream, ::Type{Union{}}, slurp...; kwargs...) = error("cannot read a value of type Union{}")
+
 
 """
     write(io::IO, x)
@@ -1304,6 +1306,7 @@ end
 
 """
     countlines(io::IO; eol::AbstractChar = '\\n')
+    countlines(filename::AbstractString; eol::AbstractChar = '\\n')
 
 Read `io` until the end of the stream/file and count the number of lines. To specify a file
 pass the filename as the first argument. EOL markers other than `'\\n'` are supported by
@@ -1331,6 +1334,19 @@ julia> io = IOBuffer("JuliaLang is a GitHub organization.");
 
 julia> countlines(io, eol = '.')
 1
+```
+```jldoctest
+julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\n")
+36
+
+julia> countlines("my_file.txt")
+1
+
+julia> countlines("my_file.txt", eol = 'n')
+4
+
+julia> rm("my_file.txt")
+
 ```
 """
 function countlines(io::IO; eol::AbstractChar='\n')
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index e08a019d84a2c..6c95285f232f2 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -5,6 +5,7 @@
 # Stateful string
 mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
     data::T # T should support: getindex, setindex!, length, copyto!, and resize!
+    reinit::Bool # if true, data needs to be re-allocated (after take!)
     readable::Bool
     writable::Bool
     seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data
@@ -17,7 +18,7 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
     function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
                                 maxsize::Integer) where T<:AbstractVector{UInt8}
         require_one_based_indexing(data)
-        new(data,readable,writable,seekable,append,length(data),maxsize,1,-1)
+        new(data,false,readable,writable,seekable,append,length(data),maxsize,1,-1)
     end
 end
 const IOBuffer = GenericIOBuffer{Vector{UInt8}}
@@ -137,8 +138,12 @@ PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) =
     GenericIOBuffer(data,true,true,false,true,maxsize)
 PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringVector(maxsize), maxsize = maxsize); x.size=0; x)
 
+_similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
+_similar_data(b::IOBuffer, len::Int) = StringVector(len)
+
 function copy(b::GenericIOBuffer)
-    ret = typeof(b)(b.writable ? copy(b.data) : b.data,
+    ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ?
+                    copyto!(_similar_data(b, length(b.data)), b.data) : b.data,
                     b.readable, b.writable, b.seekable, b.append, b.maxsize)
     ret.size = b.size
     ret.ptr  = b.ptr
@@ -203,7 +208,7 @@ function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T
         GC.@preserve a unsafe_read(from, pointer(a, offs), nb)
     else
         for i = offs:offs+nel-1
-            a[i] = read(to, T)
+            a[i] = read(from, T)
         end
     end
     return a
@@ -270,7 +275,10 @@ function truncate(io::GenericIOBuffer, n::Integer)
     io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable"))
     n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n"))
     n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)"))
-    if n > length(io.data)
+    if io.reinit
+        io.data = _similar_data(io, n)
+        io.reinit = false
+    elseif n > length(io.data)
         resize!(io.data, n)
     end
     io.data[io.size+1:n] .= 0
@@ -325,9 +333,14 @@ end
         ensureroom_slowpath(io, nshort)
     end
     n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize)
-    l = length(io.data)
-    if n > l
-        _growend!(io.data, (n - l) % UInt)
+    if io.reinit
+        io.data = _similar_data(io, n)
+        io.reinit = false
+    else
+        l = length(io.data)
+        if n > l
+            _growend!(io.data, (n - l) % UInt)
+        end
     end
     return io
 end
@@ -390,18 +403,26 @@ end
 function take!(io::IOBuffer)
     ismarked(io) && unmark(io)
     if io.seekable
-        data = io.data
         if io.writable
-            maxsize = (io.maxsize == typemax(Int) ? 0 : min(length(io.data),io.maxsize))
-            io.data = StringVector(maxsize)
+            if io.reinit
+                data = StringVector(0)
+            else
+                data = resize!(io.data, io.size)
+                io.reinit = true
+            end
         else
-            data = copy(data)
+            data = copyto!(StringVector(io.size), 1, io.data, 1, io.size)
         end
-        resize!(data,io.size)
     else
         nbytes = bytesavailable(io)
-        a = StringVector(nbytes)
-        data = read!(io, a)
+        if io.writable
+            data = io.data
+            io.reinit = true
+            _deletebeg!(data, io.ptr-1)
+            resize!(data, nbytes)
+        else
+            data = read!(io, StringVector(nbytes))
+        end
     end
     if io.writable
         io.ptr = 1
@@ -410,6 +431,19 @@ function take!(io::IOBuffer)
     return data
 end
 
+"""
+    _unsafe_take!(io::IOBuffer)
+
+This simply returns the raw resized `io.data`, with no checks to be
+sure that `io` is readable etcetera, and leaves `io` in an inconsistent
+state.  This should only be used internally for performance-critical
+`String` routines that immediately discard `io` afterwards, and it
+*assumes* that `io` is writable and seekable.
+
+It saves no allocations compared to `take!`, it just omits some checks.
+"""
+_unsafe_take!(io::IOBuffer) = resize!(io.data, io.size)
+
 function write(to::IO, from::GenericIOBuffer)
     if to === from
         from.ptr = from.size + 1
diff --git a/base/irrationals.jl b/base/irrationals.jl
index 72341fea71690..6513e3269a4d7 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -165,11 +165,13 @@ end
 round(x::Irrational, r::RoundingMode) = round(float(x), r)
 
 """
-    @irrational sym val def
-    @irrational(sym, val, def)
+    @irrational sym [val] def
 
-Define a new `Irrational` value, `sym`, with pre-computed `Float64` value `val`,
-and arbitrary-precision definition in terms of `BigFloat`s given by the expression `def`.
+Define a new `Irrational` value, `sym`, with arbitrary-precision definition in terms
+of `BigFloat`s given by the expression `def`.
+
+Optionally provide a pre-computed `Float64` value `val` which must equal `Float64(def)`.
+`val` will be computed automatically if omitted.
 
 An `AssertionError` is thrown when either `big(def) isa BigFloat` or `Float64(val) == Float64(def)`
 returns `false`.
@@ -184,24 +186,30 @@ returns `false`.
 
 # Examples
 ```jldoctest
-julia> Base.@irrational(twoπ, 6.2831853071795864769, 2*big(π))
+julia> Base.@irrational twoπ 2*big(π)
 
 julia> twoπ
 twoπ = 6.2831853071795...
 
-julia> Base.@irrational sqrt2  1.4142135623730950488  √big(2)
+julia> Base.@irrational sqrt2 1.4142135623730950488 √big(2)
 
 julia> sqrt2
 sqrt2 = 1.4142135623730...
 
-julia> Base.@irrational sqrt2  1.4142135623730950488  big(2)
+julia> Base.@irrational sqrt2 1.4142135623730950488 big(2)
 ERROR: AssertionError: big($(Expr(:escape, :sqrt2))) isa BigFloat
 
-julia> Base.@irrational sqrt2  1.41421356237309  √big(2)
+julia> Base.@irrational sqrt2 1.41421356237309 √big(2)
 ERROR: AssertionError: Float64($(Expr(:escape, :sqrt2))) == Float64(big($(Expr(:escape, :sqrt2))))
 ```
 """
 macro irrational(sym, val, def)
+    irrational(sym, val, def)
+end
+macro irrational(sym, def)
+    irrational(sym, :(big($(esc(sym)))), def)
+end
+function irrational(sym, val, def)
     esym = esc(sym)
     qsym = esc(Expr(:quote, sym))
     bigconvert = isa(def,Symbol) ? quote
@@ -221,8 +229,10 @@ macro irrational(sym, val, def)
     quote
         const $esym = Irrational{$qsym}()
         $bigconvert
-        Base.Float64(::Irrational{$qsym}) = $val
-        Base.Float32(::Irrational{$qsym}) = $(Float32(val))
+        let v = $val, v64 = Float64(v), v32 = Float32(v)
+            Base.Float64(::Irrational{$qsym}) = v64
+            Base.Float32(::Irrational{$qsym}) = v32
+        end
         @assert isa(big($esym), BigFloat)
         @assert Float64($esym) == Float64(big($esym))
         @assert Float32($esym) == Float32(big($esym))
diff --git a/base/iterators.jl b/base/iterators.jl
index f2a9f23c9d094..11e94d3384de8 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -12,7 +12,7 @@ using .Base:
     @inline, Pair, Pairs, AbstractDict, IndexLinear, IndexStyle, AbstractVector, Vector,
     SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
     @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator,
-    AbstractRange, AbstractUnitRange, UnitRange, LinearIndices,
+    AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom,
     (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing,
     any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex,
     tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape
@@ -209,7 +209,7 @@ size(e::Enumerate) = size(e.itr)
 end
 last(e::Enumerate) = (length(e.itr), e.itr[end])
 
-eltype(::Type{Enumerate{I}}) where {I} = Tuple{Int, eltype(I)}
+eltype(::Type{Enumerate{I}}) where {I} = TupleOrBottom(Int, eltype(I))
 
 IteratorSize(::Type{Enumerate{I}}) where {I} = IteratorSize(I)
 IteratorEltype(::Type{Enumerate{I}}) where {I} = IteratorEltype(I)
@@ -394,7 +394,7 @@ _promote_tuple_shape((m,)::Tuple{Integer}, (n,)::Tuple{Integer}) = (min(m, n),)
 _promote_tuple_shape(a, b) = promote_shape(a, b)
 _promote_tuple_shape(a, b...) = _promote_tuple_shape(a, _promote_tuple_shape(b...))
 _promote_tuple_shape(a) = a
-eltype(::Type{Zip{Is}}) where {Is<:Tuple} = Tuple{map(eltype, fieldtypes(Is))...}
+eltype(::Type{Zip{Is}}) where {Is<:Tuple} = TupleOrBottom(map(eltype, fieldtypes(Is))...)
 #eltype(::Type{Zip{Tuple{}}}) = Tuple{}
 #eltype(::Type{Zip{Tuple{A}}}) where {A} = Tuple{eltype(A)}
 #eltype(::Type{Zip{Tuple{A, B}}}) where {A, B} = Tuple{eltype(A), eltype(B)}
@@ -1072,8 +1072,7 @@ end
 
 eltype(::Type{ProductIterator{I}}) where {I} = _prod_eltype(I)
 _prod_eltype(::Type{Tuple{}}) = Tuple{}
-_prod_eltype(::Type{I}) where {I<:Tuple} =
-    Tuple{ntuple(n -> eltype(fieldtype(I, n)), _counttuple(I)::Int)...}
+_prod_eltype(::Type{I}) where {I<:Tuple} = TupleOrBottom(ntuple(n -> eltype(fieldtype(I, n)), _counttuple(I)::Int)...)
 
 iterate(::ProductIterator{Tuple{}}) = (), true
 iterate(::ProductIterator{Tuple{}}, state) = nothing
@@ -1171,6 +1170,7 @@ IteratorEltype(::Type{Flatten{Tuple{}}}) = IteratorEltype(Tuple{})
 _flatteneltype(I, ::HasEltype) = IteratorEltype(eltype(I))
 _flatteneltype(I, et) = EltypeUnknown()
 
+flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{Union{}}, slurp...) = HasLength() # length==0
 flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{<:NTuple{N,Any}}) where {N} = HasLength()
 flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{<:Tuple}) = SizeUnknown()
 flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{<:Number}) = HasLength()
@@ -1182,6 +1182,7 @@ _flatten_iteratorsize(sz, ::HasEltype, ::Type{Tuple{}}) = HasLength()
 
 IteratorSize(::Type{Flatten{I}}) where {I} = _flatten_iteratorsize(IteratorSize(I), IteratorEltype(I), I)
 
+flatten_length(f, T::Type{Union{}}, slurp...) = 0
 function flatten_length(f, T::Type{<:NTuple{N,Any}}) where {N}
     return N * length(f.it)
 end
@@ -1442,6 +1443,7 @@ end
 function _approx_iter_type(itrT::Type, vstate::Type)
     vstate <: Union{Nothing, Tuple{Any, Any}} || return Any
     vstate <: Union{} && return Union{}
+    itrT <: Union{} && return Union{}
     nextvstate = Base._return_type(doiterate, Tuple{itrT, vstate})
     return (nextvstate <: vstate ? vstate : Any)
 end
diff --git a/base/libc.jl b/base/libc.jl
index 7d88e89bf605a..82286fbf01af6 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -225,7 +225,7 @@ function strptime(fmt::AbstractString, timestr::AbstractString)
     @static if Sys.isapple()
         # if we didn't explicitly parse the weekday or year day, use mktime
         # to fill them in automatically.
-        if !occursin(r"([^%]|^)%(a|A|j|w|Ow)", fmt)
+        if !occursin(r"([^%]|^)%(a|A|j|w|Ow)"a, fmt)
             ccall(:mktime, Int, (Ref{TmStruct},), tm)
         end
     end
@@ -235,14 +235,14 @@ end
 # system date in seconds
 
 """
-    time(t::TmStruct)
+    time(t::TmStruct) -> Float64
 
 Converts a `TmStruct` struct to a number of seconds since the epoch.
 """
 time(tm::TmStruct) = Float64(ccall(:mktime, Int, (Ref{TmStruct},), tm))
 
 """
-    time()
+    time() -> Float64
 
 Get the system time in seconds since the epoch, with fairly high (typically, microsecond) resolution.
 """
@@ -260,7 +260,7 @@ getpid() = ccall(:uv_os_getpid, Int32, ())
 ## network functions ##
 
 """
-    gethostname() -> AbstractString
+    gethostname() -> String
 
 Get the local machine's host name.
 """
diff --git a/base/linking.jl b/base/linking.jl
index fb9f6d087a2d0..fd21ce74c9268 100644
--- a/base/linking.jl
+++ b/base/linking.jl
@@ -11,6 +11,8 @@ const PATH_list = String[]
 const LIBPATH_list = String[]
 const lld_path = Ref{String}()
 const lld_exe = Sys.iswindows() ? "lld.exe" : "lld"
+const dsymutil_path = Ref{String}()
+const dsymutil_exe = Sys.iswindows() ? "dsymutil.exe" : "dsymutil"
 
 if Sys.iswindows()
     const LIBPATH_env = "PATH"
@@ -47,8 +49,8 @@ end
 
 function __init_lld_path()
     # Prefer our own bundled lld, but if we don't have one, pick it up off of the PATH
-    # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `libexec`
-    for bundled_lld_path in (joinpath(Sys.BINDIR, Base.LIBEXECDIR, lld_exe),
+    # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `private_libexecdir`
+    for bundled_lld_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, lld_exe),
                              joinpath(Sys.BINDIR, "..", "tools", lld_exe),
                              joinpath(Sys.BINDIR, lld_exe))
         if isfile(bundled_lld_path)
@@ -60,12 +62,27 @@ function __init_lld_path()
     return
 end
 
+function __init_dsymutil_path()
+    #Same as with lld but for dsymutil
+    for bundled_dsymutil_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, dsymutil_exe),
+                             joinpath(Sys.BINDIR, "..", "tools", dsymutil_exe),
+                             joinpath(Sys.BINDIR, dsymutil_exe))
+        if isfile(bundled_dsymutil_path)
+            dsymutil_path[] = abspath(bundled_dsymutil_path)
+            return
+        end
+    end
+    dsymutil_path[] = something(Sys.which(dsymutil_exe), dsymutil_exe)
+    return
+end
+
 const VERBOSE = Ref{Bool}(false)
 
 function __init__()
     VERBOSE[] = Base.get_bool_env("JULIA_VERBOSE_LINKING", false)
 
     __init_lld_path()
+    __init_dsymutil_path()
     PATH[] = dirname(lld_path[])
     if Sys.iswindows()
         # On windows, the dynamic libraries (.dll) are in Sys.BINDIR ("usr\\bin")
@@ -82,6 +99,11 @@ function lld(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
     return Cmd(Cmd([lld_path[]]); env)
 end
 
+function dsymutil(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([dsymutil_path[]]); env)
+end
+
 function ld()
     default_args = ``
     @static if Sys.iswindows()
@@ -140,8 +162,8 @@ function link_image_cmd(path, out)
     `$(ld()) $V $SHARED -o $out $WHOLE_ARCHIVE $path $NO_WHOLE_ARCHIVE $LIBDIR $PRIVATE_LIBDIR $SHLIBDIR $LIBS`
 end
 
-function link_image(path, out, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
-    run(link_image_cmd(path, out), Base.DevNull(), stderr, stdout)
+function link_image(path, out, internal_stderr::IO=stderr, internal_stdout::IO=stdout)
+    run(link_image_cmd(path, out), Base.DevNull(), internal_stderr, internal_stdout)
 end
 
 end # module Linking
diff --git a/base/loading.jl b/base/loading.jl
index e22142e0abe88..9cc2f13752dfb 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -258,7 +258,7 @@ struct LoadingCache
     require_parsed::Set{String}
     identified_where::Dict{Tuple{PkgId, String}, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
     identified::Dict{String, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
-    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{String, Nothing}}
+    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{Tuple{Union{String, Nothing}, Union{String, Nothing}}, Nothing}}
 end
 const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
 LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set(), Dict(), Dict(), Dict())
@@ -390,30 +390,17 @@ identify_package(where::Module, name::String) = _nothing_or_first(identify_packa
 identify_package(where::PkgId, name::String)  = _nothing_or_first(identify_package_env(where, name))
 identify_package(name::String)                = _nothing_or_first(identify_package_env(name))
 
-
-"""
-    Base.locate_package(pkg::PkgId)::Union{String, Nothing}
-
-The path to the entry-point file for the package corresponding to the identifier
-`pkg`, or `nothing` if not found. See also [`identify_package`](@ref).
-
-```julia-repl
-julia> pkg = Base.identify_package("Pkg")
-Pkg [44cfe95a-1eb2-52ea-b672-e2afdf69b78f]
-
-julia> Base.locate_package(pkg)
-"/path/to/julia/stdlib/v$(VERSION.major).$(VERSION.minor)/Pkg/src/Pkg.jl"
-```
-"""
-function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,String}
+function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
     cache = LOADING_CACHE[]
     if cache !== nothing
-        path = get(cache.located, (pkg, stopenv), nothing)
-        path === nothing || return path
+        pathenv = get(cache.located, (pkg, stopenv), nothing)
+        pathenv === nothing || return pathenv
     end
     path = nothing
+    env′ = nothing
     if pkg.uuid === nothing
         for env in load_path()
+            env′ = env
             # look for the toplevel pkg `pkg.name` in this entry
             found = project_deps_get(env, pkg.name)
             if found !== nothing
@@ -426,10 +413,13 @@ function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Un
                     @goto done
                 end
             end
-            stopenv == env && @goto done
+            if !(loading_extension || precompiling_extension)
+                stopenv == env && @goto done
+            end
         end
     else
         for env in load_path()
+            env′ = env
             path = manifest_uuid_path(env, pkg)
             # missing is used as a sentinel to stop looking further down in envs
             if path === missing
@@ -440,7 +430,9 @@ function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Un
                 path = entry_path(path, pkg.name)
                 @goto done
             end
-            stopenv == env && break
+            if !(loading_extension || precompiling_extension)
+                stopenv == env && break
+            end
         end
         # Allow loading of stdlibs if the name/uuid are given
         # e.g. if they have been explicitly added to the project/manifest
@@ -452,9 +444,27 @@ function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Un
     end
     @label done
     if cache !== nothing
-        cache.located[(pkg, stopenv)] = path
+        cache.located[(pkg, stopenv)] = path, env′
     end
-    return path
+    return path, env′
+end
+
+"""
+    Base.locate_package(pkg::PkgId)::Union{String, Nothing}
+
+The path to the entry-point file for the package corresponding to the identifier
+`pkg`, or `nothing` if not found. See also [`identify_package`](@ref).
+
+```julia-repl
+julia> pkg = Base.identify_package("Pkg")
+Pkg [44cfe95a-1eb2-52ea-b672-e2afdf69b78f]
+
+julia> Base.locate_package(pkg)
+"/path/to/julia/stdlib/v$(VERSION.major).$(VERSION.minor)/Pkg/src/Pkg.jl"
+```
+"""
+function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,String}
+    _nothing_or_first(locate_package_env(pkg, stopenv))
 end
 
 """
@@ -613,6 +623,24 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi
             pkg_uuid = explicit_project_deps_get(project_file, name)
             return PkgId(pkg_uuid, name)
         end
+        d = parsed_toml(project_file)
+        exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
+        if exts !== nothing
+            # Check if `where` is an extension of the project
+            if where.name in keys(exts) && where.uuid == uuid5(proj.uuid, where.name)
+                # Extensions can load weak deps...
+                weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing}
+                if weakdeps !== nothing
+                    wuuid = get(weakdeps, name, nothing)::Union{String, Nothing}
+                    if wuuid !== nothing
+                        return PkgId(UUID(wuuid), name)
+                    end
+                end
+                # ... and they can load same deps as the project itself
+                mby_uuid = explicit_project_deps_get(project_file, name)
+                mby_uuid === nothing || return PkgId(mby_uuid, name)
+            end
+        end
         # look for manifest file and `where` stanza
         return explicit_manifest_deps_get(project_file, where, name)
     elseif project_file
@@ -630,6 +658,8 @@ function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missi
             # if `pkg` matches the project, return the project itself
             return project_file_path(project_file)
         end
+        mby_ext = project_file_ext_path(project_file, pkg.name)
+        mby_ext === nothing || return mby_ext
         # look for manifest file and `where` stanza
         return explicit_manifest_uuid_path(project_file, pkg)
     elseif project_file
@@ -639,6 +669,25 @@ function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missi
     return nothing
 end
 
+
+function find_ext_path(project_path::String, extname::String)
+    extfiledir = joinpath(project_path, "ext", extname, extname * ".jl")
+    isfile(extfiledir) && return extfiledir
+    return joinpath(project_path, "ext", extname * ".jl")
+end
+
+function project_file_ext_path(project_file::String, name::String)
+    d = parsed_toml(project_file)
+    p = project_file_path(project_file)
+    exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
+    if exts !== nothing
+        if name in keys(exts)
+            return find_ext_path(p, name)
+        end
+    end
+    return nothing
+end
+
 # find project file's top-level UUID entry (or nothing)
 function project_file_name_uuid(project_file::String, name::String)::PkgId
     d = parsed_toml(project_file)
@@ -741,10 +790,10 @@ function explicit_project_deps_get(project_file::String, name::String)::Union{No
     return nothing
 end
 
-function is_v1_format_manifest(raw_manifest::Dict)
+function is_v1_format_manifest(raw_manifest::Dict{String})
     if haskey(raw_manifest, "manifest_format")
         mf = raw_manifest["manifest_format"]
-        if mf isa Dict && haskey(mf, "uuid")
+        if mf isa Dict{String} && haskey(mf, "uuid")
             # the off-chance where an old format manifest has a dep called "manifest_format"
             return true
         end
@@ -870,9 +919,7 @@ function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{No
                     error("failed to find source of parent package: \"$name\"")
                 end
                 p = normpath(dirname(parent_path), "..")
-                extfiledir = joinpath(p, "ext", pkg.name, pkg.name * ".jl")
-                isfile(extfiledir) && return extfiledir
-                return joinpath(p, "ext", pkg.name * ".jl")
+                return find_ext_path(p, pkg.name)
             end
         end
     end
@@ -1002,10 +1049,10 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
 
     if ocachepath !== nothing
         @debug "Loading object cache file $ocachepath for $pkg"
-        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint), ocachepath, depmods, false)
+        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring), ocachepath, depmods, false, pkg.name)
     else
         @debug "Loading cache file $path for $pkg"
-        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false)
+        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), path, depmods, false, pkg.name)
     end
     if isa(sv, Exception)
         return sv
@@ -1020,11 +1067,9 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
                 elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
                 comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
                 print(lpad(elapsed, 9), " ms  ")
-                for extid in EXT_DORMITORY
-                    if extid.id == pkg
-                        print(extid.parentid.name, " → ")
-                        break
-                    end
+                parentid = get(EXT_PRIMED, pkg, nothing)
+                if parentid !== nothing
+                    print(parentid.name, " → ")
                 end
                 print(pkg.name)
                 if comp_time > 0
@@ -1067,7 +1112,9 @@ function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
     if !isempty(inits)
         unlock(require_lock) # temporarily _unlock_ during these callbacks
         try
-            ccall(:jl_init_restored_modules, Cvoid, (Any,), inits)
+            for (i, mod) in pairs(inits)
+                run_module_init(mod, i)
+            end
         finally
             lock(require_lock)
         end
@@ -1075,10 +1122,42 @@ function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
     return restored
 end
 
+function run_module_init(mod::Module, i::Int=1)
+    # `i` informs ordering for the `@time_imports` report formatting
+    if TIMING_IMPORTS[] == 0
+        ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+    else
+        if isdefined(mod, :__init__)
+            connector = i > 1 ? "├" : "┌"
+            printstyled("               $connector ", color = :light_black)
+
+            elapsedtime = time_ns()
+            cumulative_compile_timing(true)
+            compile_elapsedtimes = cumulative_compile_time_ns()
+
+            ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+
+            elapsedtime = (time_ns() - elapsedtime) / 1e6
+            cumulative_compile_timing(false);
+            comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6
+
+            print(round(elapsedtime, digits=1), " ms $mod.__init__() ")
+            if comp_time > 0
+                printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color())
+            end
+            if recomp_time > 0
+                perc = Float64(100 * recomp_time / comp_time)
+                printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+            end
+            println()
+        end
+    end
+end
+
 function run_package_callbacks(modkey::PkgId)
+    run_extension_callbacks(modkey)
     assert_havelock(require_lock)
     unlock(require_lock)
-    run_extension_callbacks()
     try
         for callback in package_callbacks
             invokelatest(callback, modkey)
@@ -1099,25 +1178,41 @@ end
 ##############
 
 mutable struct ExtensionId
-    const id::PkgId # Could be symbol?
-    const parentid::PkgId
-    const triggers::Vector{PkgId} # What packages have to be loaded for the extension to get loaded
-    triggered::Bool
-    succeeded::Bool
+    const id::PkgId
+    const parentid::PkgId # just need the name, for printing
+    ntriggers::Int # how many more packages must be defined until this is loaded
 end
 
-const EXT_DORMITORY = ExtensionId[]
+const EXT_PRIMED = Dict{PkgId, PkgId}() # Extension -> Parent
+const EXT_DORMITORY = Dict{PkgId,Vector{ExtensionId}}() # Trigger -> Extensions that can be triggered by it
+const EXT_DORMITORY_FAILED = ExtensionId[]
 
 function insert_extension_triggers(pkg::PkgId)
     pkg.uuid === nothing && return
-    for env in load_path()
-        insert_extension_triggers(env, pkg)
+    path_env_loc = locate_package_env(pkg)
+    path_env_loc === nothing && return
+    path, env_loc = path_env_loc
+    if path === nothing || env_loc === nothing
+        return
     end
+    insert_extension_triggers(env_loc, pkg)
 end
 
 function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missing}
     project_file = env_project_file(env)
     if project_file isa String
+        # Look in project for extensions to insert
+        proj_pkg = project_file_name_uuid(project_file, pkg.name)
+        if pkg == proj_pkg
+            d_proj = parsed_toml(project_file)
+            weakdeps = get(d_proj, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
+            extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
+            extensions === nothing && return
+            weakdeps === nothing && return
+            return _insert_extension_triggers(pkg, extensions, weakdeps)
+        end
+
+        # Now look in manifest
         manifest_file = project_file_manifest_path(project_file)
         manifest_file === nothing && return
         d = get_deps(parsed_toml(manifest_file))
@@ -1159,72 +1254,98 @@ end
 function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, <:Any}, weakdeps::Dict{String, <:Any})
     for (ext::String, triggers::Union{String, Vector{String}}) in extensions
         triggers isa String && (triggers = [triggers])
-        triggers_id = PkgId[]
         id = PkgId(uuid5(parent.uuid, ext), ext)
+        if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id)
+            continue  # extension is already primed or loaded, don't add it again
+        end
+        EXT_PRIMED[id] = parent
+        gid = ExtensionId(id, parent, 1 + length(triggers))
+        trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, parent)
+        push!(trigger1, gid)
         for trigger in triggers
             # TODO: Better error message if this lookup fails?
             uuid_trigger = UUID(weakdeps[trigger]::String)
-            push!(triggers_id, PkgId(uuid_trigger, trigger))
+            trigger_id = PkgId(uuid_trigger, trigger)
+            if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id)
+                trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id)
+                push!(trigger1, gid)
+            else
+                gid.ntriggers -= 1
+            end
         end
-        gid = ExtensionId(id, parent, triggers_id, false, false)
-        push!(EXT_DORMITORY, gid)
     end
 end
 
-function run_extension_callbacks(; force::Bool=false)
-    try
-        # TODO, if `EXT_DORMITORY` becomes very long, do something smarter
-        for extid in EXT_DORMITORY
-            extid.succeeded && continue
-            !force && extid.triggered && continue
-            if all(x -> haskey(Base.loaded_modules, x) && !haskey(package_locks, x), extid.triggers)
-                ext_not_allowed_load = nothing
-                extid.triggered = true
-                # It is possible that some of the triggers were loaded in an environment
-                # below the one of the parent. This will cause a load failure when the
-                # pkg ext tries to load the triggers. Therefore, check this first
-                # before loading the pkg ext.
-                for trigger in extid.triggers
-                    pkgenv = Base.identify_package_env(extid.id, trigger.name)
-                    if pkgenv === nothing
-                        ext_not_allowed_load = trigger
-                        break
-                    else
-                        pkg, env = pkgenv
-                        path = Base.locate_package(pkg, env)
-                        if path === nothing
-                            ext_not_allowed_load = trigger
-                            break
-                        end
-                    end
-                end
-                if ext_not_allowed_load !== nothing
-                    @debug "Extension $(extid.id.name) of $(extid.parentid.name) not loaded due to \
-                            $(ext_not_allowed_load.name) loaded in environment lower in load path"
-                else
-                    require(extid.id)
-                    @debug "Extension $(extid.id.name) of $(extid.parentid.name) loaded"
-                end
-                extid.succeeded = true
-            end
-        end
+loading_extension::Bool = false
+precompiling_extension::Bool = false
+function run_extension_callbacks(extid::ExtensionId)
+    assert_havelock(require_lock)
+    succeeded = try
+        # Used by Distributed to now load extensions in the package callback
+        global loading_extension = true
+        _require_prelocked(extid.id)
+        @debug "Extension $(extid.id.name) of $(extid.parentid.name) loaded"
+        true
     catch
         # Try to continue loading if loading an extension errors
         errs = current_exceptions()
-        @error "Error during loading of extension" exception=errs
+        @error "Error during loading of extension $(extid.id.name) of $(extid.parentid.name), \
+                use `Base.retry_load_extensions()` to retry." exception=errs
+        false
+    finally
+        global loading_extension = false
     end
-    nothing
+    return succeeded
+end
+
+function run_extension_callbacks(pkgid::PkgId)
+    assert_havelock(require_lock)
+    # take ownership of extids that depend on this pkgid
+    extids = pop!(EXT_DORMITORY, pkgid, nothing)
+    extids === nothing && return
+    for extid in extids
+        if extid.ntriggers > 0
+            # indicate pkgid is loaded
+            extid.ntriggers -= 1
+        end
+        if extid.ntriggers < 0
+            # indicate pkgid is loaded
+            extid.ntriggers += 1
+            succeeded = false
+        else
+            succeeded = true
+        end
+        if extid.ntriggers == 0
+            # actually load extid, now that all dependencies are met,
+            # and record the result
+            succeeded = succeeded && run_extension_callbacks(extid)
+            succeeded || push!(EXT_DORMITORY_FAILED, extid)
+        end
+    end
+    return
 end
 
 """
-    load_extensions()
+    retry_load_extensions()
 
 Loads all the (not yet loaded) extensions that have their extension-dependencies loaded.
 This is used in cases where the automatic loading of an extension failed
 due to some problem with the extension. Instead of restarting the Julia session,
 the extension can be fixed, and this function run.
 """
-retry_load_extensions() = run_extension_callbacks(; force=true)
+function retry_load_extensions()
+    @lock require_lock begin
+    # this copy is desired since run_extension_callbacks will release this lock
+    # so this can still mutate the list to drop successful ones
+    failed = copy(EXT_DORMITORY_FAILED)
+    empty!(EXT_DORMITORY_FAILED)
+    filter!(failed) do extid
+        return !run_extension_callbacks(extid)
+    end
+    prepend!(EXT_DORMITORY_FAILED, failed)
+    end
+    return
+end
 
 """
     get_extension(parent::Module, extension::Symbol)
@@ -1247,20 +1368,15 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
     if root_module_exists(modkey)
         loaded = root_module(modkey)
     else
-        loading = get(package_locks, modkey, false)
-        if loading !== false
-            # load already in progress for this module
-            loaded = wait(loading::Threads.Condition)
-        else
-            package_locks[modkey] = Threads.Condition(require_lock)
+        loaded = start_loading(modkey)
+        if loaded === nothing
             try
                 modpath = locate_package(modkey)
                 modpath === nothing && return nothing
                 set_pkgorigin_version_path(modkey, String(modpath))
                 loaded = _require_search_from_serialized(modkey, String(modpath), build_id)
             finally
-                loading = pop!(package_locks, modkey)
-                notify(loading, loaded, all=true)
+                end_loading(modkey, loaded)
             end
             if loaded isa Module
                 insert_extension_triggers(modkey)
@@ -1282,26 +1398,21 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, ocachepath::Un
     if root_module_exists(modkey)
         loaded = root_module(modkey)
     else
-        loading = get(package_locks, modkey, false)
-        if loading !== false
-            # load already in progress for this module
-            loaded = wait(loading::Threads.Condition)
-        else
-            for i in 1:length(depmods)
-                dep = depmods[i]
-                dep isa Module && continue
-                _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
-                @assert root_module_exists(depkey)
-                dep = root_module(depkey)
-                depmods[i] = dep
-            end
-            package_locks[modkey] = Threads.Condition(require_lock)
+        loaded = start_loading(modkey)
+        if loaded === nothing
             try
+                for i in 1:length(depmods)
+                    dep = depmods[i]
+                    dep isa Module && continue
+                    _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
+                    @assert root_module_exists(depkey)
+                    dep = root_module(depkey)
+                    depmods[i] = dep
+                end
                 set_pkgorigin_version_path(modkey, sourcepath)
                 loaded = _include_from_serialized(modkey, path, ocachepath, depmods)
             finally
-                loading = pop!(package_locks, modkey)
-                notify(loading, loaded, all=true)
+                end_loading(modkey, loaded)
             end
             if loaded isa Module
                 insert_extension_triggers(modkey)
@@ -1357,7 +1468,6 @@ end
 @constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
     assert_havelock(require_lock)
     paths = find_all_in_cache_path(pkg)
-    ocachefile = nothing
     for path_to_try in paths::Vector{String}
         staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try)
         if staledeps === true
@@ -1370,26 +1480,18 @@ end
             dep isa Module && continue
             modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
             modpaths = find_all_in_cache_path(modkey)
-            modfound = false
-            for modpath_to_try in modpaths::Vector{String}
+            for modpath_to_try in modpaths
                 modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try)
                 if modstaledeps === true
                     continue
                 end
                 modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}}
                 staledeps[i] = (modpath, modkey, modpath_to_try, modstaledeps, modocachepath)
-                modfound = true
-                break
-            end
-            if !modfound
-                @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
-                staledeps = true
-                break
+                @goto check_next_dep
             end
-        end
-        if staledeps === true
-            ocachefile = nothing
-            continue
+            @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
+            @goto check_next_path
+            @label check_next_dep
         end
         try
             touch(path_to_try) # update timestamp of precompilation file
@@ -1404,27 +1506,79 @@ end
             dep = _tryrequire_from_serialized(modkey, modcachepath, modocachepath, modpath, modstaledeps)
             if !isa(dep, Module)
                 @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
-                staledeps = true
-                break
+                @goto check_next_path
             end
-            (staledeps::Vector{Any})[i] = dep
+            staledeps[i] = dep
         end
-        if staledeps === true
-            ocachefile = nothing
-            continue
+        restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps)
+        isa(restored, Module) && return restored
+        @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
+        continue
+        @label check_next_path
+    end
+    return nothing
+end
+
+# to synchronize multiple tasks trying to import/using something
+const package_locks = Dict{PkgId,Pair{Task,Threads.Condition}}()
+
+debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but more complete algorithm that can handle simultaneous tasks.
+                               # This only triggers if you have multiple tasks trying to load the same package at the same time,
+                               # so it is unlikely to make a difference normally.
+function start_loading(modkey::PkgId)
+    # handle recursive calls to require
+    assert_havelock(require_lock)
+    loading = get(package_locks, modkey, nothing)
+    if loading !== nothing
+        # load already in progress for this module on the task
+        task, cond = loading
+        deps = String[modkey.name]
+        pkgid = modkey
+        assert_havelock(cond.lock)
+        if debug_loading_deadlocks && current_task() !== task
+            waiters = Dict{Task,Pair{Task,PkgId}}() # invert to track waiting tasks => loading tasks
+            for each in package_locks
+                cond2 = each[2][2]
+                assert_havelock(cond2.lock)
+                for waiting in cond2.waitq
+                    push!(waiters, waiting => (each[2][1] => each[1]))
+                end
+            end
+            while true
+                running = get(waiters, task, nothing)
+                running === nothing && break
+                task, pkgid = running
+                push!(deps, pkgid.name)
+                task === current_task() && break
+            end
         end
-        restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps::Vector{Any})
-        if !isa(restored, Module)
-            @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
-        else
-            return restored
+        if current_task() === task
+            others = String[modkey.name] # repeat this to emphasize the cycle here
+            for each in package_locks # list the rest of the packages being loaded too
+                if each[2][1] === task
+                    other = each[1].name
+                    other == modkey.name || other == pkgid.name || push!(others, other)
+                end
+            end
+            msg = sprint(deps, others) do io, deps, others
+                print(io, "deadlock detected in loading ")
+                join(io, deps, " -> ")
+                print(io, " -> ")
+                join(io, others, " && ")
+            end
+            throw(ConcurrencyViolationError(msg))
         end
+        return wait(cond)
     end
+    package_locks[modkey] = current_task() => Threads.Condition(require_lock)
     return
 end
 
-# to synchronize multiple tasks trying to import/using something
-const package_locks = Dict{PkgId,Threads.Condition}()
+function end_loading(modkey::PkgId, @nospecialize loaded)
+    loading = pop!(package_locks, modkey)
+    notify(loading[2], loaded, all=true)
+    nothing
+end
 
 # to notify downstream consumers that a module was successfully loaded
 # Callbacks take the form (mod::Base.PkgId) -> nothing.
@@ -1457,11 +1611,11 @@ end
 """
     include_dependency(path::AbstractString)
 
-In a module, declare that the file specified by `path` (relative or absolute) is a
-dependency for precompilation; that is, the module will need to be recompiled if this file
-changes.
+In a module, declare that the file, directory, or symbolic link specified by `path`
+(relative or absolute) is a dependency for precompilation; that is, the module will need
+to be recompiled if the modification time of `path` changes.
 
-This is only needed if your module depends on a file that is not used via [`include`](@ref). It has
+This is only needed if your module depends on a path that is not used via [`include`](@ref). It has
 no effect outside of compilation.
 """
 function include_dependency(path::AbstractString)
@@ -1564,6 +1718,8 @@ end
 
 require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
 
+const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL")
+
 function _require_prelocked(uuidkey::PkgId, env=nothing)
     assert_havelock(require_lock)
     if !root_module_exists(uuidkey)
@@ -1575,6 +1731,9 @@ function _require_prelocked(uuidkey::PkgId, env=nothing)
         insert_extension_triggers(uuidkey)
         # After successfully loading, notify downstream consumers
         run_package_callbacks(uuidkey)
+        if uuidkey == REPL_PKGID
+            REPL_MODULE_REF[] = newm
+        end
     else
         newm = root_module(uuidkey)
     end
@@ -1663,19 +1822,16 @@ function set_pkgorigin_version_path(pkg::PkgId, path::Union{String,Nothing})
     nothing
 end
 
+# A hook to allow code load to use Pkg.precompile
+const PKG_PRECOMPILE_HOOK = Ref{Function}()
+
 # Returns `nothing` or the new(ish) module
 function _require(pkg::PkgId, env=nothing)
     assert_havelock(require_lock)
-    # handle recursive calls to require
-    loading = get(package_locks, pkg, false)
-    if loading !== false
-        # load already in progress for this module
-        return wait(loading::Threads.Condition)
-    end
-    package_locks[pkg] = Threads.Condition(require_lock)
+    loaded = start_loading(pkg)
+    loaded === nothing || return loaded
 
     last = toplevel_load[]
-    loaded = nothing
     try
         toplevel_load[] = false
         # perform the search operation to select the module file require intends to load
@@ -1688,8 +1844,11 @@ function _require(pkg::PkgId, env=nothing)
         end
         set_pkgorigin_version_path(pkg, path)
 
+        pkg_precompile_attempted = false # being safe to avoid getting stuck in a Pkg.precompile loop
+
         # attempt to load the module file via the precompile cache locations
         if JLOptions().use_compiled_modules != 0
+            @label load_from_cache
             m = _require_search_from_serialized(pkg, path, UInt128(0))
             if m isa Module
                 return m
@@ -1711,6 +1870,16 @@ function _require(pkg::PkgId, env=nothing)
 
         if JLOptions().use_compiled_modules != 0
             if (0 == ccall(:jl_generating_output, Cint, ())) || (JLOptions().incremental != 0)
+                if !pkg_precompile_attempted && isinteractive() && isassigned(PKG_PRECOMPILE_HOOK)
+                    pkg_precompile_attempted = true
+                    unlock(require_lock)
+                    try
+                        PKG_PRECOMPILE_HOOK[](pkg.name, _from_loading = true)
+                    finally
+                        lock(require_lock)
+                    end
+                    @goto load_from_cache
+                end
                 # spawn off a new incremental pre-compile task for recursive `require` calls
                 cachefile = compilecache(pkg, path)
                 if isa(cachefile, Exception)
@@ -1753,8 +1922,7 @@ function _require(pkg::PkgId, env=nothing)
         end
     finally
         toplevel_load[] = last
-        loading = pop!(package_locks, pkg)
-        notify(loading, loaded, all=true)
+        end_loading(pkg, loaded)
     end
     return loaded
 end
@@ -2004,12 +2172,14 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
                               --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no")
                               $trace
                               -`,
-                              "OPENBLAS_NUM_THREADS" => 1),
+                              "OPENBLAS_NUM_THREADS" => 1,
+                              "JULIA_NUM_THREADS" => 1),
                        stderr = internal_stderr, stdout = internal_stdout),
               "w", stdout)
     # write data over stdin to avoid the (unlikely) case of exceeding max command line size
     write(io.in, """
         empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated
+        Base.precompiling_extension = $(loading_extension)
         Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
             $(repr(load_path)), $deps, $(repr(source_path(nothing))))
         """)
@@ -2153,7 +2323,11 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
                     rm(evicted_cachefile; force=true)
                     try
                         rm(ocachefile_from_cachefile(evicted_cachefile); force=true)
-                    catch
+                        @static if Sys.isapple()
+                            rm(ocachefile_from_cachefile(evicted_cachefile) * ".dSYM"; force=true, recursive=true)
+                        end
+                    catch e
+                        e isa IOError || rethrow()
                     end
                 end
             end
@@ -2180,6 +2354,9 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
                     cachefile = cachefile_from_ocachefile(ocachefile)
                     rename(tmppath_so, ocachefile::String; force=true)
                 end
+                @static if Sys.isapple()
+                    run(`$(Linking.dsymutil()) $ocachefile`, Base.DevNull(), Base.DevNull(), Base.DevNull())
+                end
             end
             # this is atomic according to POSIX (not Win32):
             rename(tmppath, cachefile; force=true)
@@ -2407,7 +2584,7 @@ end
 
 # Test to see if this UUID is mentioned in this `Project.toml`; either as
 # the top-level UUID (e.g. that of the project itself), as a dependency,
-# or as an extra for Preferences.
+# or as an extra/weakdep for Preferences.
 function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
     uuid_p = get(project, "uuid", nothing)::Union{Nothing, String}
     name = get(project, "name", nothing)::Union{Nothing, String}
@@ -2422,7 +2599,7 @@ function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
             end
         end
     end
-    for subkey in ("deps", "extras")
+    for subkey in ("deps", "extras", "weakdeps")
         subsection = get(project, subkey, nothing)::Union{Nothing, Dict{String, Any}}
         if subsection !== nothing
             for (k, v) in subsection
@@ -2726,7 +2903,7 @@ end
             end
             for chi in includes
                 f, ftime_req = chi.filename, chi.mtime
-                if !isfile(f)
+                if !ispath(f)
                     _f = fixup_stdlib_path(f)
                     if isfile(_f) && startswith(_f, Sys.STDLIB)
                         # mtime is changed by extraction
@@ -2775,7 +2952,7 @@ end
 end
 
 """
-    @__FILE__ -> AbstractString
+    @__FILE__ -> String
 
 Expand to a string with the path to the file containing the
 macrocall, or an empty string if evaluated by `julia -e <expr>`.
@@ -2788,7 +2965,7 @@ macro __FILE__()
 end
 
 """
-    @__DIR__ -> AbstractString
+    @__DIR__ -> String
 
 Expand to a string with the absolute path to the directory of the file
 containing the macrocall.
diff --git a/base/lock.jl b/base/lock.jl
index 1321b0c0f48c7..1663a765111bb 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -481,8 +481,8 @@ end
 """
     reset(::Event)
 
-Reset an Event back into an un-set state. Then any future calls to `wait` will
-block until `notify` is called again.
+Reset an [`Event`](@ref) back into an un-set state. Then any future calls to `wait` will
+block until [`notify`](@ref) is called again.
 """
 function reset(e::Event)
     @atomic e.set = false # full barrier
diff --git a/base/locks-mt.jl b/base/locks-mt.jl
index bfa3ac1b8352e..5d355b9ed200c 100644
--- a/base/locks-mt.jl
+++ b/base/locks-mt.jl
@@ -43,7 +43,7 @@ function lock(l::SpinLock)
         if @inline trylock(l)
             return
         end
-        ccall(:jl_cpu_pause, Cvoid, ())
+        ccall(:jl_cpu_suspend, Cvoid, ())
         # Temporary solution before we have gc transition support in codegen.
         ccall(:jl_gc_safepoint, Cvoid, ())
     end
diff --git a/base/logging.jl b/base/logging.jl
index dd45d05a084af..c42af08d8f4ae 100644
--- a/base/logging.jl
+++ b/base/logging.jl
@@ -369,7 +369,8 @@ function logmsg_code(_module, file, line, level, message, exs...)
     return quote
         let
             level = $level
-            std_level = convert(LogLevel, level)
+            # simplify std_level code emitted, if we know it is one of our global constants
+            std_level = $(level isa Symbol ? :level : :(level isa LogLevel ? level : convert(LogLevel, level)::LogLevel))
             if std_level >= _min_enabled_level[]
                 group = $(log_data._group)
                 _module = $(log_data._module)
@@ -445,7 +446,7 @@ function default_group_code(file)
         QuoteNode(default_group(file))  # precompute if we can
     else
         ref = Ref{Symbol}()  # memoized run-time execution
-        :(isassigned($ref) ? $ref[] : $ref[] = default_group(something($file, "")))
+        :(isassigned($ref) ? $ref[] : $ref[] = default_group(something($file, ""))::Symbol)
     end
 end
 
diff --git a/base/math.jl b/base/math.jl
index f41057c76cfc2..71bd4949498b5 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -5,12 +5,12 @@ module Math
 export sin, cos, sincos, tan, sinh, cosh, tanh, asin, acos, atan,
        asinh, acosh, atanh, sec, csc, cot, asec, acsc, acot,
        sech, csch, coth, asech, acsch, acoth,
-       sinpi, cospi, sincospi, sinc, cosc,
+       sinpi, cospi, sincospi, tanpi, sinc, cosc,
        cosd, cotd, cscd, secd, sind, tand, sincosd,
        acosd, acotd, acscd, asecd, asind, atand,
        rad2deg, deg2rad,
        log, log2, log10, log1p, exponent, exp, exp2, exp10, expm1,
-       cbrt, sqrt, significand,
+       cbrt, sqrt, fourthroot, significand,
        hypot, max, min, minmax, ldexp, frexp,
        clamp, clamp!, modf, ^, mod2pi, rem2pi,
        @evalpoly, evalpoly
@@ -31,7 +31,11 @@ using .Base: IEEEFloat
 
 @noinline function throw_complex_domainerror(f::Symbol, x)
     throw(DomainError(x,
-        LazyString(f," will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
+        LazyString(f," was called with a negative real argument but will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
+end
+@noinline function throw_complex_domainerror_neg1(f::Symbol, x)
+    throw(DomainError(x,
+        LazyString(f," was called with a real argument < -1 but will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
 end
 @noinline function throw_exp_domainerror(x)
     throw(DomainError(x, LazyString(
@@ -42,6 +46,11 @@ end
 
 # non-type specific math functions
 
+function two_mul(x::T, y::T) where {T<:Number}
+    xy = x*y
+    xy, fma(x, y, -xy)
+end
+
 @assume_effects :consistent @inline function two_mul(x::Float64, y::Float64)
     if Core.Intrinsics.have_fma(Float64)
         xy = x*y
@@ -358,14 +367,14 @@ julia> log(4,2)
 
 julia> log(-2, 3)
 ERROR: DomainError with -2.0:
-log will only return a complex result if called with a complex argument. Try log(Complex(x)).
+log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
 
 julia> log(2, -3)
 ERROR: DomainError with -3.0:
-log will only return a complex result if called with a complex argument. Try log(Complex(x)).
+log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
@@ -579,7 +588,7 @@ julia> log(2)
 
 julia> log(-3)
 ERROR: DomainError with -3.0:
-log will only return a complex result if called with a complex argument. Try log(Complex(x)).
+log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
@@ -611,7 +620,7 @@ julia> log2(10)
 
 julia> log2(-2)
 ERROR: DomainError with -2.0:
-log2 will only return a complex result if called with a complex argument. Try log2(Complex(x)).
+log2 was called with a negative real argument but will only return a complex result if called with a complex argument. Try log2(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(f::Symbol, x::Float64) at ./math.jl:31
 [...]
@@ -641,7 +650,7 @@ julia> log10(2)
 
 julia> log10(-2)
 ERROR: DomainError with -2.0:
-log10 will only return a complex result if called with a complex argument. Try log10(Complex(x)).
+log10 was called with a negative real argument but will only return a complex result if called with a complex argument. Try log10(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(f::Symbol, x::Float64) at ./math.jl:31
 [...]
@@ -665,7 +674,7 @@ julia> log1p(0)
 
 julia> log1p(-2)
 ERROR: DomainError with -2.0:
-log1p will only return a complex result if called with a complex argument. Try log1p(Complex(x)).
+log1p was called with a real argument < -1 but will only return a complex result if called with a complex argument. Try log1p(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
@@ -711,6 +720,13 @@ julia> .√(1:4)
 """
 sqrt(x)
 
+"""
+    fourthroot(x)
+
+Return the fourth root of `x` by applying `sqrt` twice successively.
+"""
+fourthroot(x::Number) = sqrt(sqrt(x))
+
 """
     hypot(x, y)
 
@@ -737,7 +753,7 @@ julia> hypot(a, a)
 
 julia> √(a^2 + a^2) # a^2 overflows
 ERROR: DomainError with -2.914184810805068e18:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 
@@ -841,6 +857,20 @@ function _hypot(x::NTuple{N,<:Number}) where {N}
     end
 end
 
+function _hypot(x::NTuple{N,<:IEEEFloat}) where {N}
+    T = eltype(x)
+    infT = convert(T, Inf)
+    x = abs.(x) # doesn't change result but enables computational shortcuts
+    # note: any() was causing this to not inline for N=3 but mapreduce() was not
+    mapreduce(==(infT), |, x) && return infT # return Inf even if an argument is NaN
+    maxabs = reinterpret(T, maximum(z -> reinterpret(Signed, z), x)) # for abs(::IEEEFloat) values, a ::BitInteger cast does not change the result
+    maxabs > zero(T) || return maxabs # catch NaN before the @fastmath below, but also shortcut 0 since we can (remove if no more @fastmath below)
+    scale,invscale = scaleinv(maxabs)
+     # @fastmath(+) to allow reassociation (see #48129)
+    add_fast(x, y) = Core.Intrinsics.add_float_fast(x, y) # @fastmath is not available during bootstrap
+    return scale * sqrt(mapreduce(y -> abs2(y * invscale), add_fast, x))
+end
+
 atan(y::Real, x::Real) = atan(promote(float(y),float(x))...)
 atan(y::T, x::T) where {T<:AbstractFloat} = Base.no_op_err("atan", T)
 
@@ -1066,6 +1096,40 @@ function frexp(x::T) where T<:IEEEFloat
     return reinterpret(T, xu), k
 end
 
+"""
+    $(@__MODULE__).scaleinv(x)
+
+Compute `(scale, invscale)` where `scale` and `invscale` are non-subnormal
+(https://en.wikipedia.org/wiki/Subnormal_number) finite powers of two such that
+`scale * invscale == 1` and `scale` is roughly on the order of `abs(x)`.
+Inf, NaN, and zero inputs also result in finite nonzero outputs.
+These values are useful to scale computations to prevent overflow and underflow
+without round-off errors or division.
+
+UNSTABLE DETAIL: For `x isa IEEEFLoat`, `scale` is chosen to be the
+`prevpow(2,abs(x))` when possible, but is never less than floatmin(x) or greater
+than inv(floatmin(x)). `Inf` and `NaN` resolve to `inv(floatmin(x))`. This
+behavior is subject to change.
+
+# Examples
+```jldoctest
+julia> $(@__MODULE__).scaleinv(7.5)
+(4.0, 0.25)
+```
+"""
+function scaleinv(x::T) where T<:IEEEFloat
+    # by removing the sign and significand and restricting values to a limited range,
+    # we can invert a number using bit-twiddling instead of division
+    U = uinttype(T)
+    umin = reinterpret(U, floatmin(T))
+    umax = reinterpret(U, inv(floatmin(T)))
+    emask = exponent_mask(T) # used to strip sign and significand
+    u = clamp(reinterpret(U, x) & emask, umin, umax)
+    scale = reinterpret(T, u)
+    invscale = reinterpret(T, umin + umax - u) # inv(scale)
+    return scale, invscale
+end
+
 # NOTE: This `rem` method is adapted from the msun `remainder` and `remainderf`
 # functions, which are under the following license:
 #
@@ -1278,7 +1342,8 @@ julia> rem2pi(7pi/4, RoundDown)
 """
 function rem2pi end
 function rem2pi(x::Float64, ::RoundingMode{:Nearest})
-    isfinite(x) || return NaN
+    isnan(x) && return x
+    isinf(x) && return NaN
 
     abs(x) < pi && return x
 
@@ -1303,7 +1368,8 @@ function rem2pi(x::Float64, ::RoundingMode{:Nearest})
     end
 end
 function rem2pi(x::Float64, ::RoundingMode{:ToZero})
-    isfinite(x) || return NaN
+    isnan(x) && return x
+    isinf(x) && return NaN
 
     ax = abs(x)
     ax <= 2*Float64(pi,RoundDown) && return x
@@ -1330,7 +1396,8 @@ function rem2pi(x::Float64, ::RoundingMode{:ToZero})
     copysign(z,x)
 end
 function rem2pi(x::Float64, ::RoundingMode{:Down})
-    isfinite(x) || return NaN
+    isnan(x) && return x
+    isinf(x) && return NaN
 
     if x < pi4o2_h
         if x >= 0
@@ -1361,7 +1428,8 @@ function rem2pi(x::Float64, ::RoundingMode{:Down})
     end
 end
 function rem2pi(x::Float64, ::RoundingMode{:Up})
-    isfinite(x) || return NaN
+    isnan(x) && return x
+    isinf(x) && return NaN
 
     if x > -pi4o2_h
         if x <= 0
@@ -1485,7 +1553,7 @@ include("special/log.jl")
 # Float16 definitions
 
 for func in (:sin,:cos,:tan,:asin,:acos,:atan,:cosh,:tanh,:asinh,:acosh,
-             :atanh,:log,:log2,:log10,:sqrt,:lgamma,:log1p)
+             :atanh,:log,:log2,:log10,:sqrt,:fourthroot,:log1p)
     @eval begin
         $func(a::Float16) = Float16($func(Float32(a)))
         $func(a::ComplexF16) = ComplexF16($func(ComplexF32(a)))
@@ -1521,5 +1589,6 @@ end
 exp2(x::AbstractFloat) = 2^x
 exp10(x::AbstractFloat) = 10^x
 clamp(::Missing, lo, hi) = missing
+fourthroot(::Missing) = missing
 
 end # module
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index 3bb4bb52ad07f..4bb8c409acf00 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -10,11 +10,11 @@ module MathConstants
 
 export π, pi, ℯ, e, γ, eulergamma, catalan, φ, golden
 
-Base.@irrational π        3.14159265358979323846  pi
-Base.@irrational ℯ        2.71828182845904523536  exp(big(1))
-Base.@irrational γ        0.57721566490153286061  euler
-Base.@irrational φ        1.61803398874989484820  (1+sqrt(big(5)))/2
-Base.@irrational catalan  0.91596559417721901505  catalan
+Base.@irrational π        pi
+Base.@irrational ℯ        exp(big(1))
+Base.@irrational γ        euler
+Base.@irrational φ        (1+sqrt(big(5)))/2
+Base.@irrational catalan  catalan
 
 # aliases
 """
diff --git a/base/methodshow.jl b/base/methodshow.jl
index 237ee006edce9..0eb99dc88303f 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -7,7 +7,7 @@ function strip_gensym(sym)
     if sym === :var"#self#" || sym === :var"#unused#"
         return empty_sym
     end
-    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$" => s"\1"))
+    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$"sa => s"\1"))
 end
 
 function argtype_decl(env, n, @nospecialize(sig::DataType), i::Int, nargs, isva::Bool) # -> (argname, argtype)
@@ -80,7 +80,7 @@ end
 function kwarg_decl(m::Method, kwtype = nothing)
     if m.sig !== Tuple # OpaqueClosure or Builtin
         kwtype = typeof(Core.kwcall)
-        sig = rewrap_unionall(Tuple{kwtype, Any, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig)
+        sig = rewrap_unionall(Tuple{kwtype, NamedTuple, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig)
         kwli = ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), kwtype.name.mt, sig, get_world_counter())
         if kwli !== nothing
             kwli = kwli::Method
@@ -93,6 +93,7 @@ function kwarg_decl(m::Method, kwtype = nothing)
                 push!(kws, kws[i])
                 deleteat!(kws, i)
             end
+            isempty(kws) && push!(kws,  :var"...")
             return kws
         end
     end
@@ -193,6 +194,9 @@ function functionloc(@nospecialize(f))
 end
 
 function sym_to_string(sym)
+    if sym === :var"..."
+        return "..."
+    end
     s = String(sym)
     if endswith(s, "...")
         return string(sprint(show_sym, Symbol(s[1:end-3])), "...")
@@ -361,7 +365,7 @@ function url(m::Method)
     (m.file === :null || m.file === :string) && return ""
     file = string(m.file)
     line = m.line
-    line <= 0 || occursin(r"In\[[0-9]+\]", file) && return ""
+    line <= 0 || occursin(r"In\[[0-9]+\]"a, file) && return ""
     Sys.iswindows() && (file = replace(file, '\\' => '/'))
     libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
     if inbase(M)
diff --git a/base/missing.jl b/base/missing.jl
index e1988064aadc1..4544c2b38c460 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -41,6 +41,7 @@ nonmissingtype(::Type{T}) where {T} = typesplit(T, Missing)
 function nonmissingtype_checked(T::Type)
     R = nonmissingtype(T)
     R >: T && error("could not compute non-missing type")
+    R <: Union{} && error("cannot convert a value to missing for assignment")
     return R
 end
 
@@ -69,7 +70,6 @@ convert(::Type{T}, x::T) where {T>:Union{Missing, Nothing}} = x
 convert(::Type{T}, x) where {T>:Missing} = convert(nonmissingtype_checked(T), x)
 convert(::Type{T}, x) where {T>:Union{Missing, Nothing}} = convert(nonmissingtype_checked(nonnothingtype_checked(T)), x)
 
-
 # Comparison operators
 ==(::Missing, ::Missing) = missing
 ==(::Missing, ::Any) = missing
diff --git a/base/mpfr.jl b/base/mpfr.jl
index d42beb0c59190..ff85fc6155df4 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -8,7 +8,7 @@ export
 
 import
     .Base: *, +, -, /, <, <=, ==, >, >=, ^, ceil, cmp, convert, copysign, div,
-        inv, exp, exp2, exponent, factorial, floor, fma, hypot, isinteger,
+        inv, exp, exp2, exponent, factorial, floor, fma, muladd, hypot, isinteger,
         isfinite, isinf, isnan, ldexp, log, log2, log10, max, min, mod, modf,
         nextfloat, prevfloat, promote_rule, rem, rem2pi, round, show, float,
         sum, sqrt, string, print, trunc, precision, _precision, exp10, expm1, log1p,
@@ -17,7 +17,7 @@ import
         cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
         isone, big, _string_n, decompose, minmax,
-        sinpi, cospi, sincospi, sind, cosd, tand, asind, acosd, atand
+        sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand
 
 import ..Rounding: rounding_raw, setrounding_raw
 
@@ -536,6 +536,8 @@ function fma(x::BigFloat, y::BigFloat, z::BigFloat)
     return r
 end
 
+muladd(x::BigFloat, y::BigFloat, z::BigFloat) = fma(x, y, z)
+
 # div
 # BigFloat
 function div(x::BigFloat, y::BigFloat)
@@ -722,7 +724,8 @@ end
 for f in (:log, :log2, :log10)
     @eval function $f(x::BigFloat)
         if x < 0
-            throw(DomainError(x, string($f, " will only return a complex result if called ",
+            throw(DomainError(x, string($f, " was called with a negative real argument but ",
+                              "will only return a complex result if called ",
                               "with a complex argument. Try ", $f, "(complex(x)).")))
         end
         z = BigFloat()
@@ -733,7 +736,8 @@ end
 
 function log1p(x::BigFloat)
     if x < -1
-        throw(DomainError(x, string("log1p will only return a complex result if called ",
+        throw(DomainError(x, string("log1p was called with a real argument < -1 but ",
+                          "will only return a complex result if called ",
                           "with a complex argument. Try log1p(complex(x)).")))
     end
     z = BigFloat()
@@ -790,7 +794,7 @@ function sum(arr::AbstractArray{BigFloat})
 end
 
 # Functions for which NaN results are converted to DomainError, following Base
-for f in (:sin, :cos, :tan, :sec, :csc, :acos, :asin, :atan, :acosh, :asinh, :atanh, :sinpi, :cospi)
+for f in (:sin, :cos, :tan, :sec, :csc, :acos, :asin, :atan, :acosh, :asinh, :atanh, :sinpi, :cospi, :tanpi)
     @eval begin
         function ($f)(x::BigFloat)
             isnan(x) && return x
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index cad9b088acf50..ce1b6c39adb43 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -509,8 +509,30 @@ module IteratorsMD
     end
 
     # reversed CartesianIndices iteration
+    @inline function Base._reverse(iter::CartesianIndices, ::Colon)
+        CartesianIndices(reverse.(iter.indices))
+    end
+
+    Base.@constprop :aggressive function Base._reverse(iter::CartesianIndices, dim::Integer)
+        1 <= dim <= ndims(iter) || throw(ArgumentError(Base.LazyString("invalid dimension ", dim, " in reverse")))
+        ndims(iter) == 1 && return Base._reverse(iter, :)
+        indices = iter.indices
+        return CartesianIndices(Base.setindex(indices, reverse(indices[dim]), dim))
+    end
+
+    Base.@constprop :aggressive function Base._reverse(iter::CartesianIndices, dims::Tuple{Vararg{Integer}})
+        indices = iter.indices
+        # use `sum` to force const fold
+        dimrev = ntuple(i -> sum(==(i), dims; init = 0) == 1, Val(length(indices)))
+        length(dims) == sum(dimrev) || throw(ArgumentError(Base.LazyString("invalid dimensions ", dims, " in reverse")))
+        length(dims) == length(indices) && return Base._reverse(iter, :)
+        indices′ = map((i, f) -> f ? (@noinline reverse(i)) : i, indices, dimrev)
+        return CartesianIndices(indices′)
+    end
 
-    Base.reverse(iter::CartesianIndices) = CartesianIndices(reverse.(iter.indices))
+    # fix ambiguity with array.jl:
+    Base._reverse(iter::CartesianIndices{1}, dims::Tuple{Integer}) =
+        Base._reverse(iter, first(dims))
 
     @inline function iterate(r::Reverse{<:CartesianIndices})
         iterfirst = last(r.itr)
diff --git a/base/multinverses.jl b/base/multinverses.jl
index 4342a9a5f5cf7..21d8e53d2ff83 100644
--- a/base/multinverses.jl
+++ b/base/multinverses.jl
@@ -14,7 +14,7 @@ unsigned(::Type{Int64}) = UInt64
 unsigned(::Type{Int128}) = UInt128
 unsigned(::Type{T}) where {T<:Unsigned} = T
 
-abstract type  MultiplicativeInverse{T} end
+abstract type  MultiplicativeInverse{T} <: Number end
 
 # Computes integer division by a constant using multiply, add, and bitshift.
 
@@ -97,7 +97,6 @@ struct UnsignedMultiplicativeInverse{T<:Unsigned} <: MultiplicativeInverse{T}
 
     function UnsignedMultiplicativeInverse{T}(d::T) where T<:Unsigned
         d == 0 && throw(ArgumentError("cannot compute magic for d == $d"))
-        u2 = convert(T, 2)
         add = false
         signedmin = one(d) << (sizeof(d)*8-1)
         signedmax = signedmin - one(T)
@@ -135,13 +134,33 @@ struct UnsignedMultiplicativeInverse{T<:Unsigned} <: MultiplicativeInverse{T}
 end
 UnsignedMultiplicativeInverse(x::Unsigned) = UnsignedMultiplicativeInverse{typeof(x)}(x)
 
+# Returns the higher half of the product a*b
+function _mul_high(a::T, b::T) where {T<:Union{Signed, Unsigned}}
+    ((widen(a)*b) >>> (sizeof(a)*8)) % T
+end
+
+function _mul_high(a::UInt128, b::UInt128)
+    shift = sizeof(a)*4
+    mask = typemax(UInt128) >> shift
+    a1, a2 = a >>> shift, a & mask
+    b1, b2 = b >>> shift, b & mask
+    a1b1, a1b2, a2b1, a2b2 = a1*b1, a1*b2, a2*b1, a2*b2
+    carry = ((a1b2 & mask) + (a2b1 & mask) + (a2b2 >>> shift)) >>> shift
+    a1b1 + (a1b2 >>> shift) + (a2b1 >>> shift) + carry
+end
+function _mul_high(a::Int128, b::Int128)
+    shift = sizeof(a)*8 - 1
+    t1, t2 = (a >> shift) & b % UInt128, (b >> shift) & a % UInt128
+    (_mul_high(a % UInt128, b % UInt128) - t1 - t2) % Int128
+end
+
 function div(a::T, b::SignedMultiplicativeInverse{T}) where T
-    x = ((widen(a)*b.multiplier) >>> (sizeof(a)*8)) % T
+    x = _mul_high(a, b.multiplier)
     x += (a*b.addmul) % T
     ifelse(abs(b.divisor) == 1, a*b.divisor, (signbit(x) + (x >> b.shift)) % T)
 end
 function div(a::T, b::UnsignedMultiplicativeInverse{T}) where T
-    x = ((widen(a)*b.multiplier) >>> (sizeof(a)*8)) % T
+    x = _mul_high(a, b.multiplier)
     x = ifelse(b.add, convert(T, convert(T, (convert(T, a - x) >>> 1)) + x), x)
     ifelse(b.divisor == 1, a, x >>> b.shift)
 end
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index fe6f3f0e81ce3..320d068205a3d 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -69,6 +69,7 @@ The name-value pairs can also be provided by splatting a named tuple or any
 iterator that yields two-value collections holding each a symbol as first
 value:
 
+```jldoctest
 julia> keys = (:a, :b, :c); values = (1, 2, 3);
 
 julia> NamedTuple{keys}(values)
@@ -123,7 +124,10 @@ end
 function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple}
     if @generated
         Expr(:new, :(NamedTuple{names, T}),
-             Any[ :(convert(fieldtype(T, $n), getfield(nt, $(QuoteNode(names[n]))))) for n in 1:length(names) ]...)
+             Any[ :(let Tn = fieldtype(T, $n),
+                      ntn = getfield(nt, $(QuoteNode(names[n])))
+                      ntn isa Tn ? ntn : convert(Tn, ntn)
+                  end) for n in 1:length(names) ]...)
     else
         NamedTuple{names, T}(map(Fix1(getfield, nt), names))
     end
@@ -176,16 +180,25 @@ empty(::NamedTuple) = NamedTuple()
 prevind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)+1
 
-convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names,T}) where {names,T<:Tuple} = nt
-convert(::Type{NamedTuple{names}}, nt::NamedTuple{names}) where {names} = nt
+convert(::Type{NT}, nt::NT) where {names, NT<:NamedTuple{names}} = nt
+convert(::Type{NT}, nt::NT) where {names, T<:Tuple, NT<:NamedTuple{names,T}} = nt
 
-function convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names}) where {names,T<:Tuple}
-    NamedTuple{names,T}(T(nt))::NamedTuple{names,T}
+function convert(::Type{NT}, nt::NamedTuple{names}) where {names, T<:Tuple, NT<:NamedTuple{names,T}}
+    if !@isdefined T
+        # converting abstract NT to an abstract Tuple type, to a concrete NT1, is not straightforward, so this could just be an error, but we define it anyways
+        # _tuple_error(NT, nt)
+        T1 = Tuple{ntuple(i -> fieldtype(NT, i), Val(length(names)))...}
+        NT1 = NamedTuple{names, T1}
+    else
+        T1 = T
+        NT1 = NT
+    end
+    return NT1(T1(nt))::NT1::NT
 end
 
 if nameof(@__MODULE__) === :Base
     Tuple(nt::NamedTuple) = (nt...,)
-    (::Type{T})(nt::NamedTuple) where {T <: Tuple} = convert(T, Tuple(nt))
+    (::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
 end
 
 function show(io::IO, t::NamedTuple)
@@ -452,20 +465,20 @@ This macro gives a more convenient syntax for declaring `NamedTuple` types. It r
 type with the given keys and types, equivalent to `NamedTuple{(:key1, :key2, ...), Tuple{Type1,Type2,...}}`.
 If the `::Type` declaration is omitted, it is taken to be `Any`.   The `begin ... end` form allows the
 declarations to be split across multiple lines (similar to a `struct` declaration), but is otherwise
-equivalent.
+equivalent. The `NamedTuple` macro is used when printing `NamedTuple` types to e.g. the REPL.
 
-For example, the tuple `(a=3.1, b="hello")` has a type `NamedTuple{(:a, :b),Tuple{Float64,String}}`, which
+For example, the tuple `(a=3.1, b="hello")` has a type `NamedTuple{(:a, :b), Tuple{Float64, String}}`, which
 can also be declared via `@NamedTuple` as:
 
 ```jldoctest
 julia> @NamedTuple{a::Float64, b::String}
-NamedTuple{(:a, :b), Tuple{Float64, String}}
+@NamedTuple{a::Float64, b::String}
 
 julia> @NamedTuple begin
            a::Float64
            b::String
        end
-NamedTuple{(:a, :b), Tuple{Float64, String}}
+@NamedTuple{a::Float64, b::String}
 ```
 
 !!! compat "Julia 1.5"
diff --git a/base/number.jl b/base/number.jl
index 31aa616b0eb55..923fc907d4038 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -307,6 +307,7 @@ julia> zero(rand(2,2))
 """
 zero(x::Number) = oftype(x,0)
 zero(::Type{T}) where {T<:Number} = convert(T,0)
+zero(::Type{Union{}}, slurp...) = Union{}(0)
 
 """
     one(x)
@@ -345,6 +346,7 @@ julia> import Dates; one(Dates.Day(1))
 """
 one(::Type{T}) where {T<:Number} = convert(T,1)
 one(x::T) where {T<:Number} = one(T)
+one(::Type{Union{}}, slurp...) = Union{}(1)
 # note that convert(T, 1) should throw an error if T is dimensionful,
 # so this fallback definition should be okay.
 
@@ -368,6 +370,7 @@ julia> import Dates; oneunit(Dates.Day)
 """
 oneunit(x::T) where {T} = T(one(x))
 oneunit(::Type{T}) where {T} = T(one(T))
+oneunit(::Type{Union{}}, slurp...) = Union{}(1)
 
 """
     big(T::Type)
@@ -388,3 +391,4 @@ Complex{BigInt}
 ```
 """
 big(::Type{T}) where {T<:Number} = typeof(big(zero(T)))
+big(::Type{Union{}}, slurp...) = Union{}(0)
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index 2bccd613d0009..bb0ae8935b06c 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -40,10 +40,11 @@ function compute_ir_rettype(ir::IRCode)
     return Core.Compiler.widenconst(rt)
 end
 
-function compute_oc_argtypes(ir, nargs, isva)
-    argtypes = ir.argtypes[2:end]
-    @assert nargs == length(argtypes)
-    argtypes = Core.Compiler.anymap(Core.Compiler.widenconst, argtypes)
+function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
+    argtypes = Vector{Any}(undef, nargs)
+    for i = 1:nargs
+        argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
+    end
     if isva
         lastarg = pop!(argtypes)
         if lastarg <: Tuple
@@ -52,34 +53,42 @@ function compute_oc_argtypes(ir, nargs, isva)
             push!(argtypes, Vararg{Any})
         end
     end
-    Tuple{argtypes...}
+    return Tuple{argtypes...}
 end
 
-function Core.OpaqueClosure(ir::IRCode, env...;
-        nargs::Int = length(ir.argtypes)-1,
-        isva::Bool = false,
-        rt = compute_ir_rettype(ir),
-        do_compile::Bool = true)
-    if (isva && nargs > length(ir.argtypes)) || (!isva && nargs != length(ir.argtypes)-1)
-        throw(ArgumentError("invalid argument count"))
-    end
+function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
+                            isva::Bool = false,
+                            do_compile::Bool = true)
+    # NOTE: we need ir.argtypes[1] == typeof(env)
+    ir = Core.Compiler.copy(ir)
+    nargs = length(ir.argtypes)-1
+    sig = compute_oc_signature(ir, nargs, isva)
+    rt = compute_ir_rettype(ir)
     src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-    src.slotflags = UInt8[]
     src.slotnames = fill(:none, nargs+1)
+    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
     src.slottypes = copy(ir.argtypes)
-    Core.Compiler.replace_code_newstyle!(src, ir, nargs+1)
-    Core.Compiler.widen_all_consts!(src)
-    src.inferred = true
-    # NOTE: we need ir.argtypes[1] == typeof(env)
-
-    ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint),
-        compute_oc_argtypes(ir, nargs, isva), Union{}, rt, @__MODULE__, src, 0, nothing, nargs, isva, env, do_compile)
+    src.rettype = rt
+    src = Core.Compiler.ir_to_codeinf!(src, ir)
+    return generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; do_compile)
 end
 
-function Core.OpaqueClosure(src::CodeInfo, env...)
-    M = src.parent.def
-    sig = Base.tuple_type_tail(src.parent.specTypes)
+function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...)
+    src.inferred || throw(ArgumentError("Expected inferred src::CodeInfo"))
+    mi = src.parent::Core.MethodInstance
+    sig = Base.tuple_type_tail(mi.specTypes)
+    method = mi.def::Method
+    nargs = method.nargs-1
+    isva = method.isva
+    return generate_opaque_closure(sig, Union{}, src.rettype, src, nargs, isva, env...)
+end
 
-    ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint),
-          sig, Union{}, src.rettype, @__MODULE__, src, 0, nothing, M.nargs - 1, M.isva, env, true)
+function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nospecialize(rt_ub),
+                                 src::CodeInfo, nargs::Int, isva::Bool, @nospecialize env...;
+                                 mod::Module=@__MODULE__,
+                                 lineno::Int=0,
+                                 file::Union{Nothing,Symbol}=nothing,
+                                 do_compile::Bool=true)
+    return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint),
+        sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile)
 end
diff --git a/base/operators.jl b/base/operators.jl
index 7ac5637951b16..5893c5944a3a0 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -178,6 +178,13 @@ isless(x::AbstractFloat, y::AbstractFloat) = (!isnan(x) & (isnan(y) | signless(x
 isless(x::Real,          y::AbstractFloat) = (!isnan(x) & (isnan(y) | signless(x, y))) | (x < y)
 isless(x::AbstractFloat, y::Real         ) = (!isnan(x) & (isnan(y) | signless(x, y))) | (x < y)
 
+# Performance optimization to reduce branching
+# This is useful for sorting tuples of integers
+# TODO: remove this when the compiler can optimize the generic version better
+# See #48724 and #48753
+isless(a::Tuple{BitInteger, BitInteger}, b::Tuple{BitInteger, BitInteger}) =
+    isless(a[1], b[1]) | (isequal(a[1], b[1]) & isless(a[2], b[2]))
+
 """
     isgreater(x, y)
 
@@ -881,6 +888,7 @@ julia> widen(1.5f0)
 """
 widen(x::T) where {T} = convert(widen(T), x)
 widen(x::Type{T}) where {T} = throw(MethodError(widen, (T,)))
+widen(x::Type{Union{}}, slurp...) = throw(MethodError(widen, (Union{},)))
 
 # function pipelining
 
diff --git a/base/options.jl b/base/options.jl
index dda0e8b377076..23a3dbc802b5f 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -11,6 +11,7 @@ struct JLOptions
     cpu_target::Ptr{UInt8}
     nthreadpools::Int16
     nthreads::Int16
+    ngcthreads::Int16
     nthreads_per_pool::Ptr{Int16}
     nprocs::Int32
     machine_file::Ptr{UInt8}
diff --git a/base/pair.jl b/base/pair.jl
index 28a9f981080ec..f34ebb89c80da 100644
--- a/base/pair.jl
+++ b/base/pair.jl
@@ -60,7 +60,11 @@ last(p::Pair) = p.second
 
 convert(::Type{Pair{A,B}}, x::Pair{A,B}) where {A,B} = x
 function convert(::Type{Pair{A,B}}, x::Pair) where {A,B}
-    Pair{A,B}(convert(A, x[1]), convert(B, x[2]))::Pair{A,B}
+    a = getfield(x, :first)
+    a isa A || (a = convert(A, a))
+    b = getfield(x, :second)
+    b isa B || (b = convert(B, b))
+    return Pair{A,B}(a, b)::Pair{A,B}
 end
 
 promote_rule(::Type{Pair{A1,B1}}, ::Type{Pair{A2,B2}}) where {A1,B1,A2,B2} =
diff --git a/base/parse.jl b/base/parse.jl
index 6e616004a47af..d800e54258b0d 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -36,6 +36,7 @@ julia> parse(Complex{Float64}, "3.2e-1 + 4.5im")
 ```
 """
 parse(T::Type, str; base = Int)
+parse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as Union{}")
 
 function parse(::Type{T}, c::AbstractChar; base::Integer = 10) where T<:Integer
     a::Int = (base <= 36 ? 10 : 36)
@@ -251,6 +252,7 @@ function parse(::Type{T}, s::AbstractString; base::Union{Nothing,Integer} = noth
     convert(T, tryparse_internal(T, s, firstindex(s), lastindex(s),
                                  base===nothing ? 0 : check_valid_base(base), true))
 end
+tryparse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as Union{}")
 
 ## string to float functions ##
 
diff --git a/base/partr.jl b/base/partr.jl
index c5bb6603d53af..a02272ceab202 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -179,13 +179,12 @@ function multiq_deletemin()
     return task
 end
 
-
 function multiq_check_empty()
-    for j = UInt32(1):length(heaps)
-        for i = UInt32(1):length(heaps[j])
-            if heaps[j][i].ntasks != 0
-                return false
-            end
+    tid = Threads.threadid()
+    tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    for i = UInt32(1):length(heaps[tp])
+        if heaps[tp][i].ntasks != 0
+            return false
         end
     end
     return true
diff --git a/base/path.jl b/base/path.jl
index 73d91e60f8c03..c439a2800acce 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -20,22 +20,22 @@ export
 
 if Sys.isunix()
     const path_separator    = "/"
-    const path_separator_re = r"/+"
-    const path_directory_re = r"(?:^|/)\.{0,2}$"
-    const path_dir_splitter = r"^(.*?)(/+)([^/]*)$"
-    const path_ext_splitter = r"^((?:.*/)?(?:\.|[^/\.])[^/]*?)(\.[^/\.]*|)$"
+    const path_separator_re = r"/+"sa
+    const path_directory_re = r"(?:^|/)\.{0,2}$"sa
+    const path_dir_splitter = r"^(.*?)(/+)([^/]*)$"sa
+    const path_ext_splitter = r"^((?:.*/)?(?:\.|[^/\.])[^/]*?)(\.[^/\.]*|)$"sa
 
     splitdrive(path::String) = ("",path)
 elseif Sys.iswindows()
     const path_separator    = "\\"
-    const path_separator_re = r"[/\\]+"
-    const path_absolute_re  = r"^(?:[A-Za-z]+:)?[/\\]"
-    const path_directory_re = r"(?:^|[/\\])\.{0,2}$"
-    const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"
-    const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"
+    const path_separator_re = r"[/\\]+"sa
+    const path_absolute_re  = r"^(?:[A-Za-z]+:)?[/\\]"sa
+    const path_directory_re = r"(?:^|[/\\])\.{0,2}$"sa
+    const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"sa
+    const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"sa
 
     function splitdrive(path::String)
-        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"s, path)::AbstractMatch
+        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"sa, path)::AbstractMatch
         String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
@@ -145,7 +145,7 @@ function _splitdir_nodrive(a::String, b::String)
 end
 
 """
-    dirname(path::AbstractString) -> AbstractString
+    dirname(path::AbstractString) -> String
 
 Get the directory part of a path. Trailing characters ('/' or '\\') in the path are
 counted as part of the path.
@@ -161,10 +161,10 @@ julia> dirname("/home/myuser/")
 
 See also [`basename`](@ref).
 """
- dirname(path::AbstractString) = splitdir(path)[1]
+dirname(path::AbstractString) = splitdir(path)[1]
 
 """
-    basename(path::AbstractString) -> AbstractString
+    basename(path::AbstractString) -> String
 
 Get the file name part of a path.
 
@@ -186,7 +186,7 @@ See also [`dirname`](@ref).
 basename(path::AbstractString) = splitdir(path)[2]
 
 """
-    splitext(path::AbstractString) -> (AbstractString, AbstractString)
+    splitext(path::AbstractString) -> (String, String)
 
 If the last component of a path contains one or more dots, split the path into everything before the
 last dot and everything including and after the dot. Otherwise, return a tuple of the argument
@@ -542,7 +542,7 @@ contractuser(path::AbstractString)
 
 
 """
-    relpath(path::AbstractString, startpath::AbstractString = ".") -> AbstractString
+    relpath(path::AbstractString, startpath::AbstractString = ".") -> String
 
 Return a relative filepath to `path` either from the current directory or from an optional
 start directory. This is a path computation: the filesystem is not accessed to confirm the
diff --git a/base/process.jl b/base/process.jl
index 55df523c1f7d2..ed51a30ae3ced 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -413,7 +413,7 @@ process failed, or if the process attempts to print anything to stdout.
 """
 function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
     P = open(cmds, args...; kwargs...)
-    function waitkill(P::Process)
+    function waitkill(P::Union{Process,ProcessChain})
         close(P)
         # 0.1 seconds after we hope it dies (from closing stdio),
         # we kill the process with SIGTERM (15)
diff --git a/base/promotion.jl b/base/promotion.jl
index fb5c5b83864ae..6e32bd7a42efa 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -19,9 +19,9 @@ Number
 """
 typejoin() = Bottom
 typejoin(@nospecialize(t)) = t
-typejoin(@nospecialize(t), ts...) = (@_total_meta; typejoin(t, typejoin(ts...)))
+typejoin(@nospecialize(t), ts...) = (@_foldable_meta; typejoin(t, typejoin(ts...)))
 function typejoin(@nospecialize(a), @nospecialize(b))
-    @_total_meta
+    @_foldable_meta
     if isa(a, TypeVar)
         return typejoin(a.ub, b)
     elseif isa(b, TypeVar)
@@ -172,7 +172,12 @@ function promote_typejoin(@nospecialize(a), @nospecialize(b))
     c = typejoin(_promote_typesubtract(a), _promote_typesubtract(b))
     return Union{a, b, c}::Type
 end
-_promote_typesubtract(@nospecialize(a)) = typesplit(a, Union{Nothing, Missing})
+_promote_typesubtract(@nospecialize(a)) =
+    a === Any ? a :
+    a >: Union{Nothing, Missing} ? typesplit(a, Union{Nothing, Missing}) :
+    a >: Nothing ? typesplit(a, Nothing) :
+    a >: Missing ? typesplit(a, Missing) :
+    a
 
 function promote_typejoin_union(::Type{T}) where T
     if T === Union{}
@@ -318,6 +323,12 @@ it for new types as appropriate.
 function promote_rule end
 
 promote_rule(::Type, ::Type) = Bottom
+# Define some methods to avoid needing to enumerate unrelated possibilities when presented
+# with Type{<:T}, and return a value in general accordance with the result given by promote_type
+promote_rule(::Type{Bottom}, slurp...) = Bottom
+promote_rule(::Type{Bottom}, ::Type{Bottom}, slurp...) = Bottom # not strictly necessary, since the next method would match unambiguously anyways
+promote_rule(::Type{Bottom}, ::Type{T}, slurp...) where {T} = T
+promote_rule(::Type{T}, ::Type{Bottom}, slurp...) where {T} = T
 
 promote_result(::Type,::Type,::Type{T},::Type{S}) where {T,S} = (@inline; promote_type(T,S))
 # If no promote_rule is defined, both directions give Bottom. In that
@@ -467,6 +478,11 @@ else
     _return_type(@nospecialize(f), @nospecialize(t)) = Any
 end
 
+function TupleOrBottom(tt...)
+    any(p -> p === Union{}, tt) && return Union{}
+    return Tuple{tt...}
+end
+
 """
     promote_op(f, argtypes...)
 
@@ -478,7 +494,12 @@ Guess what an appropriate container eltype would be for storing results of
     the container eltype on the type of the actual elements. Only in the absence of any
     elements (for an empty result container), it may be unavoidable to call `promote_op`.
 """
-promote_op(f, S::Type...) = _return_type(f, Tuple{S...})
+function promote_op(f, S::Type...)
+    argT = TupleOrBottom(S...)
+    argT === Union{} && return Union{}
+    return _return_type(f, argT)
+end
+
 
 ## catch-alls to prevent infinite recursion when definitions are missing ##
 
diff --git a/base/range.jl b/base/range.jl
index 9d12ae1001784..f7dc35703a196 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -24,17 +24,22 @@
 _colon(::Ordered, ::Any, start::T, step, stop::T) where {T} = StepRange(start, step, stop)
 # for T<:Union{Float16,Float32,Float64} see twiceprecision.jl
 _colon(::Ordered, ::ArithmeticRounds, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
+    StepRangeLen(start, step, convert(Integer, fld(stop - start, step)) + 1)
 _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
+    StepRangeLen(start, step, convert(Integer, fld(stop - start, step)) + 1)
 
 """
     (:)(start, [step], stop)
 
 Range operator. `a:b` constructs a range from `a` to `b` with a step size
-of 1 (often a [`UnitRange`](@ref)), and `a:s:b` is similar but uses a step
-size of `s` (a [`StepRange`](@ref) or [`StepRangeLen`](@ref)).
-See also [`range`](@ref) for more control.
+equal to 1, which produces:
+
+* a [`UnitRange`](@ref) when `a` and `b` are integers, or
+* a [`StepRange`](@ref) when `a` and `b` are characters, or
+* a [`StepRangeLen`](@ref) when `a` and/or `b` are floating-point.
+
+`a:s:b` is similar but uses a step size of `s` (a [`StepRange`](@ref) or
+[`StepRangeLen`](@ref)). See also [`range`](@ref) for more control.
 
 The operator `:` is also used in indexing to select whole dimensions, e.g. in `A[:, 1]`.
 
@@ -345,7 +350,8 @@ function steprange_last(start, step, stop)::typeof(stop)
             # Compute remainder as a nonnegative number:
             if absdiff isa Signed && absdiff < zero(absdiff)
                 # unlikely, but handle the signed overflow case with unsigned rem
-                remain = convert(typeof(absdiff), unsigned(absdiff) % absstep)
+                overflow_case(absdiff, absstep) = (@noinline; convert(typeof(absdiff), unsigned(absdiff) % absstep))
+                remain = overflow_case(absdiff, absstep)
             else
                 remain = convert(typeof(absdiff), absdiff % absstep)
             end
@@ -469,9 +475,9 @@ A range `r` where `r[i]` produces values of type `T` (in the first
 form, `T` is deduced automatically), parameterized by a `ref`erence
 value, a `step`, and the `len`gth. By default `ref` is the starting
 value `r[1]`, but alternatively you can supply it as the value of
-`r[offset]` for some other index `1 <= offset <= len`. In conjunction
-with `TwicePrecision` this can be used to implement ranges that are
-free of roundoff error.
+`r[offset]` for some other index `1 <= offset <= len`. The syntax `a:b`
+or `a:b:c`, where any of `a`, `b`, or `c` are floating-point numbers, creates a
+`StepRangeLen`.
 
 !!! compat "Julia 1.7"
     The 4th type parameter `L` requires at least Julia 1.7.
@@ -1093,7 +1099,7 @@ show(io::IO, r::AbstractRange) = print(io, repr(first(r)), ':', repr(step(r)), '
 show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r)))
 show(io::IO, r::OneTo) = print(io, "Base.OneTo(", r.stop, ")")
 function show(io::IO, r::StepRangeLen)
-    if step(r) != 0
+    if !iszero(step(r))
         print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
     else
         # ugly temporary printing, to avoid 0:0:0 etc.
@@ -1236,19 +1242,17 @@ end
 
 # _findin (the index of intersection)
 function _findin(r::AbstractRange{<:Integer}, span::AbstractUnitRange{<:Integer})
-    local ifirst
-    local ilast
     fspan = first(span)
     lspan = last(span)
     fr = first(r)
     lr = last(r)
     sr = step(r)
     if sr > 0
-        ifirst = fr >= fspan ? 1 : ceil(Integer,(fspan-fr)/sr)+1
-        ilast = lr <= lspan ? length(r) : length(r) - ceil(Integer,(lr-lspan)/sr)
+        ifirst = fr >= fspan ? 1 : cld(fspan-fr, sr)+1
+        ilast = lr <= lspan ? length(r) : length(r) - cld(lr-lspan, sr)
     elseif sr < 0
-        ifirst = fr <= lspan ? 1 : ceil(Integer,(lspan-fr)/sr)+1
-        ilast = lr >= fspan ? length(r) : length(r) - ceil(Integer,(lr-fspan)/sr)
+        ifirst = fr <= lspan ? 1 : cld(lspan-fr, sr)+1
+        ilast = lr >= fspan ? length(r) : length(r) - cld(lr-fspan, sr)
     else
         ifirst = fr >= fspan ? 1 : length(r)+1
         ilast = fr <= lspan ? length(r) : 0
diff --git a/base/rational.jl b/base/rational.jl
index 26746ad0b4bc2..6ab022736388e 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -83,6 +83,11 @@ end
 
 function show(io::IO, x::Rational)
     show(io, numerator(x))
+
+    if isone(denominator(x)) && get(io, :typeinfo, Any) <: Rational
+        return
+    end
+
     print(io, "//")
     show(io, denominator(x))
 end
@@ -272,7 +277,7 @@ signbit(x::Rational) = signbit(x.num)
 copysign(x::Rational, y::Real) = unsafe_rational(copysign(x.num, y), x.den)
 copysign(x::Rational, y::Rational) = unsafe_rational(copysign(x.num, y.num), x.den)
 
-abs(x::Rational) = Rational(abs(x.num), x.den)
+abs(x::Rational) = unsafe_rational(checked_abs(x.num), x.den)
 
 typemin(::Type{Rational{T}}) where {T<:Signed} = unsafe_rational(T, -one(T), zero(T))
 typemin(::Type{Rational{T}}) where {T<:Integer} = unsafe_rational(T, zero(T), one(T))
@@ -540,7 +545,7 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
         pow = trailing_zeros(den)
         den >>= pow
         pow = -pow
-        if den == 1 && abs(num) < 9007199254740992
+        if den == 1 && uabs(num) < UInt64(maxintfloat(Float64))
             return hash(ldexp(Float64(num),pow),h)
         end
     end
diff --git a/base/reduce.jl b/base/reduce.jl
index ae2671a2e746a..61a0f466b2902 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -1354,15 +1354,7 @@ count(itr; init=0) = count(identity, itr; init)
 
 count(f, itr; init=0) = _simple_count(f, itr, init)
 
-_simple_count(pred, itr, init) = _simple_count_helper(Generator(pred, itr), init)
-
-function _simple_count_helper(g, init::T) where {T}
-    n::T = init
-    for x in g
-        n += x::Bool
-    end
-    return n
-end
+_simple_count(pred, itr, init) = sum(_bool(pred), itr; init)
 
 function _simple_count(::typeof(identity), x::Array{Bool}, init::T=0) where {T}
     n::T = init
diff --git a/base/reducedim.jl b/base/reducedim.jl
index dc34b4feb1f6a..c1c58ccdfefed 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -211,8 +211,8 @@ reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reduce
 let
     BitIntFloat = Union{BitInteger, IEEEFloat}
     T = Union{
-        [AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
-        [AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
+        Any[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
+        Any[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
 
     global function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region)
         z = zero(f(zero(eltype(A))))
@@ -525,6 +525,8 @@ sum(f, A::AbstractArray; dims)
     sum!(r, A)
 
 Sum elements of `A` over the singleton dimensions of `r`, and write results to `r`.
+Note that since the sum! function is intended to operate without making any allocations,
+the target should not alias with the source.
 
 # Examples
 ```jldoctest
diff --git a/base/reflection.jl b/base/reflection.jl
index 102c1ca9605e3..97f1ed14c6729 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -2,19 +2,6 @@
 
 # name and module reflection
 
-"""
-    nameof(m::Module) -> Symbol
-
-Get the name of a `Module` as a [`Symbol`](@ref).
-
-# Examples
-```jldoctest
-julia> nameof(Base.Broadcast)
-:Broadcast
-```
-"""
-nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
-
 """
     parentmodule(m::Module) -> Module
 
@@ -347,17 +334,6 @@ macro locals()
     return Expr(:locals)
 end
 
-"""
-    objectid(x) -> UInt
-
-Get a hash value for `x` based on object identity.
-
-If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
-
-See also [`hash`](@ref), [`IdDict`](@ref).
-"""
-objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
-
 # concrete datatype predicates
 
 datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
@@ -398,15 +374,18 @@ LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
 # amount of total space taken by T when stored in a container
 function aligned_sizeof(@nospecialize T::Type)
     @_foldable_meta
-    if isbitsunion(T)
-        _, sz, al = uniontype_layout(T)
-        return LLT_ALIGN(sz, al)
+    if isa(T, Union)
+        if allocatedinline(T)
+            # NOTE this check is equivalent to `isbitsunion(T)`, we can improve type
+            # inference in the second branch with the outer `isa(T, Union)` check
+            _, sz, al = uniontype_layout(T)
+            return LLT_ALIGN(sz, al)
+        end
     elseif allocatedinline(T)
         al = datatype_alignment(T)
         return LLT_ALIGN(Core.sizeof(T), al)
-    else
-        return Core.sizeof(Ptr{Cvoid})
     end
+    return Core.sizeof(Ptr{Cvoid})
 end
 
 gc_alignment(sz::Integer) = Int(ccall(:jl_alignment, Cint, (Csize_t,), sz))
@@ -613,6 +592,28 @@ Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
 """
 isbits(@nospecialize x) = isbitstype(typeof(x))
 
+"""
+    objectid(x) -> UInt
+
+Get a hash value for `x` based on object identity.
+
+If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
+
+See also [`hash`](@ref), [`IdDict`](@ref).
+"""
+function objectid(x)
+    # objectid is foldable iff it isn't a pointer.
+    if isidentityfree(typeof(x))
+        return _foldable_objectid(x)
+    end
+    return _objectid(x)
+end
+function _foldable_objectid(@nospecialize(x))
+    @_foldable_meta
+    _objectid(x)
+end
+_objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
+
 """
     isdispatchtuple(T)
 
@@ -974,10 +975,11 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
     if debuginfo !== :source && debuginfo !== :none
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
-    return map(method_instances(f, t)) do m
+    world = get_world_counter()
+    return map(method_instances(f, t, world)) do m
         if generated && hasgenerator(m)
             if may_invoke_generator(m)
-                return ccall(:jl_code_for_staged, Any, (Any,), m)::CodeInfo
+                return ccall(:jl_code_for_staged, Any, (Any, UInt), m, world)::CodeInfo
             else
                 error("Could not expand generator for `@generated` method ", m, ". ",
                       "This can happen if the provided argument types (", t, ") are ",
@@ -1066,6 +1068,8 @@ methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
 function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
     tt = signature_type(f, t)
     world = get_world_counter()
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     min = RefValue{UInt}(typemin(UInt))
     max = RefValue{UInt}(typemax(UInt))
     ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector
@@ -1083,29 +1087,31 @@ function visit(f, mt::Core.MethodTable)
     nothing
 end
 function visit(f, mc::Core.TypeMapLevel)
-    if mc.targ !== nothing
-        e = mc.targ::Vector{Any}
+    function avisit(f, e::Array{Any,1})
         for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
+            isassigned(e, i) || continue
+            ei = e[i]
+            if ei isa Vector{Any}
+                for j in 2:2:length(ei)
+                    isassigned(ei, j) || continue
+                    visit(f, ei[j])
+                end
+            else
+                visit(f, ei)
+            end
         end
     end
+    if mc.targ !== nothing
+        avisit(f, mc.targ::Vector{Any})
+    end
     if mc.arg1 !== nothing
-        e = mc.arg1::Vector{Any}
-        for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
-        end
+        avisit(f, mc.arg1::Vector{Any})
     end
     if mc.tname !== nothing
-        e = mc.tname::Vector{Any}
-        for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
-        end
+        avisit(f, mc.tname::Vector{Any})
     end
     if mc.name1 !== nothing
-        e = mc.name1::Vector{Any}
-        for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
-        end
+        avisit(f, mc.name1::Vector{Any})
     end
     mc.list !== nothing && visit(f, mc.list)
     mc.any !== nothing && visit(f, mc.any)
@@ -1118,6 +1124,34 @@ function visit(f, d::Core.TypeMapEntry)
     end
     nothing
 end
+struct MethodSpecializations
+    specializations::Union{Nothing, Core.MethodInstance, Core.SimpleVector}
+end
+"""
+    specializations(m::Method) → itr
+
+Return an iterator `itr` of all compiler-generated specializations of `m`.
+"""
+specializations(m::Method) = MethodSpecializations(isdefined(m, :specializations) ? m.specializations : nothing)
+function iterate(specs::MethodSpecializations)
+    s = specs.specializations
+    s === nothing && return nothing
+    isa(s, Core.MethodInstance) && return (s, nothing)
+    return iterate(specs, 0)
+end
+iterate(specs::MethodSpecializations, ::Nothing) = nothing
+function iterate(specs::MethodSpecializations, i::Int)
+    s = specs.specializations::Core.SimpleVector
+    n = length(s)
+    i >= n && return nothing
+    item = nothing
+    while i < n && item === nothing
+        item = s[i+=1]
+    end
+    item === nothing && return nothing
+    return (item, i)
+end
+length(specs::MethodSpecializations) = count(Returns(true), specs)
 
 function length(mt::Core.MethodTable)
     n = 0
@@ -1132,15 +1166,17 @@ uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m, m.sourc
                              isdefined(m, :generator) ? error("Method is @generated; try `code_lowered` instead.") :
                              error("Code for this Method is not available.")
 _uncompressed_ir(m::Method, s::CodeInfo) = copy(s)
-_uncompressed_ir(m::Method, s::Array{UInt8,1}) = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)::CodeInfo
-_uncompressed_ir(ci::Core.CodeInstance, s::Array{UInt8,1}) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
+_uncompressed_ir(m::Method, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)::CodeInfo
+_uncompressed_ir(ci::Core.CodeInstance, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
 # for backwards compat
 const uncompressed_ast = uncompressed_ir
 const _uncompressed_ast = _uncompressed_ir
 
-function method_instances(@nospecialize(f), @nospecialize(t), world::UInt=get_world_counter())
+function method_instances(@nospecialize(f), @nospecialize(t), world::UInt)
     tt = signature_type(f, t)
     results = Core.MethodInstance[]
+    # this make a better error message than the typeassert that follows
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     for match in _methods_by_ftype(tt, -1, world)::Vector
         instance = Core.Compiler.specialize_method(match)
         push!(results, instance)
@@ -1167,7 +1203,7 @@ struct CodegenParams
                    prefer_specsig::Bool=false,
                    gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
                    safepoint_on_entry::Bool=true,
-                   lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred),
+                   lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})),
                    generic_context = nothing)
         return new(
             Cint(track_allocations), Cint(code_coverage),
@@ -1211,20 +1247,22 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim
     # generator only has one method
     generator = method.generator
     isa(generator, Core.GeneratedFunctionStub) || return false
-    gen_mthds = methods(generator.gen)::MethodList
-    length(gen_mthds) == 1 || return false
+    gen_mthds = _methods_by_ftype(Tuple{typeof(generator.gen), Vararg{Any}}, 1, method.primary_world)
+    (gen_mthds isa Vector && length(gen_mthds) == 1) || return false
 
-    generator_method = first(gen_mthds)
+    generator_method = first(gen_mthds).method
     nsparams = length(sparams)
     isdefined(generator_method, :source) || return false
     code = generator_method.source
     nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
-    at = unwrap_unionall(atype)::DataType
+    at = unwrap_unionall(atype)
+    at isa DataType || return false
     (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
 
+    firstarg = 1
     for i = 1:nsparams
         if isa(sparams[i], TypeVar)
-            if (ast_slotflag(code, 1 + i) & SLOT_USED) != 0
+            if (ast_slotflag(code, firstarg + i) & SLOT_USED) != 0
                 return false
             end
         end
@@ -1233,7 +1271,7 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim
     non_va_args = method.isva ? nargs - 1 : nargs
     for i = 1:non_va_args
         if !isdispatchelem(at.parameters[i])
-            if (ast_slotflag(code, 1 + i + nsparams) & SLOT_USED) != 0
+            if (ast_slotflag(code, firstarg + i + nsparams) & SLOT_USED) != 0
                 return false
             end
         end
@@ -1241,7 +1279,7 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim
     if method.isva
         # If the va argument is used, we need to ensure that all arguments that
         # contribute to the va tuple are dispatchelemes
-        if (ast_slotflag(code, 1 + nargs + nsparams) & SLOT_USED) != 0
+        if (ast_slotflag(code, firstarg + nargs + nsparams) & SLOT_USED) != 0
             for i = (non_va_args+1):length(at.parameters)
                 if !isdispatchelem(at.parameters[i])
                     return false
@@ -1331,7 +1369,8 @@ function code_typed_by_type(@nospecialize(tt::Type);
                             debuginfo::Symbol=:default,
                             world = get_world_counter(),
                             interp = Core.Compiler.NativeInterpreter(world))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     if @isdefined(IRShow)
         debuginfo = IRShow.debuginfo(debuginfo)
     elseif debuginfo === :default
@@ -1440,7 +1479,7 @@ function code_ircode_by_type(
     interp = Core.Compiler.NativeInterpreter(world),
     optimize_until::Union{Integer,AbstractString,Nothing} = nothing,
 )
-    ccall(:jl_is_in_pure_context, Bool, ()) &&
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     tt = to_tuple_type(tt)
     matches = _methods_by_ftype(tt, -1, world)::Vector
@@ -1464,10 +1503,44 @@ function code_ircode_by_type(
     return asts
 end
 
+
+"""
+    Base.return_types(f::Function, types::DataType=default_tt(f);
+                      world::UInt=get_world_counter(), interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world))
+
+Return a list of possible return types for a given function `f` and argument types `types`.
+The list corresponds to the results of type inference on all the possible method match
+candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
+
+# Example
+
+```julia
+julia> Base.return_types(sum, Tuple{Vector{Int}})
+1-element Vector{Any}:
+ Int64
+
+julia> methods(sum, (Union{Vector{Int},UnitRange{Int}},))
+# 2 methods for generic function "sum" from Base:
+ [1] sum(r::AbstractRange{<:Real})
+     @ range.jl:1396
+ [2] sum(a::AbstractArray; dims, kw...)
+     @ reducedim.jl:996
+
+julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},))
+2-element Vector{Any}:
+ Int64 # the result of inference on sum(r::AbstractRange{<:Real})
+ Int64 # the result of inference on sum(a::AbstractArray; dims, kw...)
+```
+
+!!! warning
+    The `return_types` function should not be used from generated functions;
+    doing so will result in an error.
+"""
 function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
                       world = get_world_counter(),
                       interp = Core.Compiler.NativeInterpreter(world))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     if isa(f, Core.OpaqueClosure)
         _, rt = only(code_typed_opaque_closure(f))
         return Any[rt]
@@ -1488,10 +1561,47 @@ function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
     return rts
 end
 
+"""
+    infer_effects(f, types=default_tt(f); world=get_world_counter(), interp=Core.Compiler.NativeInterpreter(world))
+
+Compute the `Effects` of a function `f` with argument types `types`. The `Effects` represents the computational effects of the function call, such as whether it is free of side effects, guaranteed not to throw an exception, guaranteed to terminate, etc. The `world` and `interp` arguments specify the world counter and the native interpreter to use for the analysis.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The native interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `effects::Effects`: The computed effects of the function call.
+
+# Example
+
+```julia
+julia> function foo(x)
+           y = x * 2
+           return y
+       end;
+
+julia> effects = Base.infer_effects(foo, (Int,))
+(+c,+e,+n,+t,+s,+m,+i)
+```
+
+This function will return an `Effects` object with information about the computational effects of the function `foo` when called with an `Int` argument. See the documentation for `Effects` for more information on the various effect properties.
+
+!!! warning
+    The `infer_effects` function should not be used from generated functions;
+    doing so will result in an error.
+
+# See Also
+- [`Core.Compiler.Effects`](@ref): A type representing the computational effects of a method call.
+- [`Base.@assume_effects`](@ref): A macro for making assumptions about the effects of a method.
+"""
 function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
                        world = get_world_counter(),
                        interp = Core.Compiler.NativeInterpreter(world))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         types = to_tuple_type(types)
         argtypes = Any[Core.Compiler.Const(f), types.parameters...]
@@ -1548,7 +1658,8 @@ function print_statement_costs(io::IO, @nospecialize(tt::Type);
         else
             empty!(cst)
             resize!(cst, length(code.code))
-            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, Any[match.sparams...], false, params)
+            sptypes = Core.Compiler.VarState[Core.Compiler.VarState(sp, false) for sp in match.sparams]
+            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, sptypes, false, params)
             nd = ndigits(maxcost)
             irshow_config = IRShow.IRShowConfig() do io, linestart, idx
                 print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " ")
@@ -1566,6 +1677,7 @@ function _which(@nospecialize(tt::Type);
     method_table::Union{Nothing,Core.MethodTable,Core.Compiler.MethodTableView}=nothing,
     world::UInt=get_world_counter(),
     raise::Bool=true)
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     if method_table === nothing
         table = Core.Compiler.InternalMethodTable(world)
     elseif method_table isa Core.MethodTable
@@ -1708,20 +1820,30 @@ julia> hasmethod(g, Tuple{}, (:a, :b, :c, :d))  # g accepts arbitrary kwargs
 true
 ```
 """
-function hasmethod(@nospecialize(f), @nospecialize(t); world::UInt=get_world_counter())
-    t = signature_type(f, t)
-    return ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), t, nothing, world) !== nothing
+function hasmethod(@nospecialize(f), @nospecialize(t))
+    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
 end
 
-function hasmethod(@nospecialize(f), @nospecialize(t), kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
-    # TODO: this appears to be doing the wrong queries
-    hasmethod(f, t, world=world) || return false
-    isempty(kwnames) && return true
-    m = which(f, t)
-    kws = kwarg_decl(m)
+function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
+    world = kwargs.world::UInt # make sure this is the only local, to avoid confusing kwarg_decl()
+    return ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), signature_type(f, t), nothing, world) !== nothing
+end
+
+function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
+    @nospecialize
+    isempty(kwnames) && return hasmethod(f, t; world)
+    t = to_tuple_type(t)
+    ft = Core.Typeof(f)
+    u = unwrap_unionall(t)::DataType
+    tt = rewrap_unionall(Tuple{typeof(Core.kwcall), NamedTuple, ft, u.parameters...}, t)
+    match = ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world)
+    match === nothing && return false
+    kws = ccall(:jl_uncompress_argnames, Array{Symbol,1}, (Any,), (match::Method).slot_syms)
+    isempty(kws) && return true # some kwfuncs simply forward everything directly
     for kw in kws
         endswith(String(kw), "...") && return true
     end
+    kwnames = Symbol[kwnames[i] for i in 1:length(kwnames)]
     return issubset(kwnames, kws)
 end
 
diff --git a/base/refvalue.jl b/base/refvalue.jl
index 7cbb651d41aee..000088ff0ce76 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -45,7 +45,10 @@ function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})
         # If it is actually an immutable, then we can't take it's pointer directly
         # Instead, explicitly load the pointer from the `RefValue`,
         # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
-        p = pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), 1, Core.sizeof(Ptr{Cvoid}))
+        p = atomic_pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), :monotonic)
+    end
+    if p == C_NULL
+        throw(UndefRefError())
     end
     return p
 end
diff --git a/base/regex.jl b/base/regex.jl
index 820fc3eca502a..400784e1b27d7 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -8,7 +8,7 @@ const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.MATCH_INVALID_UTF | PCRE.ALT_BSUX
 const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK
 
 """
-    Regex(pattern[, flags])
+    Regex(pattern[, flags]) <: AbstractPattern
 
 A type representing a regular expression. `Regex` objects can be used to match strings
 with [`match`](@ref).
@@ -46,19 +46,24 @@ mutable struct Regex <: AbstractPattern
 end
 
 function Regex(pattern::AbstractString, flags::AbstractString)
-    options = DEFAULT_COMPILER_OPTS
+    compile_options = DEFAULT_COMPILER_OPTS
+    match_options = DEFAULT_MATCH_OPTS
     for f in flags
         if f == 'a'
-            options &= ~PCRE.UCP
+            # instruct pcre2 to treat the strings as simple bytes (aka "ASCII"), not char encodings
+            compile_options &= ~PCRE.UCP  # user can re-enable with (*UCP)
+            compile_options &= ~PCRE.UTF # user can re-enable with (*UTF)
+            compile_options &= ~PCRE.MATCH_INVALID_UTF # this would force on UTF
+            match_options &= ~PCRE.NO_UTF_CHECK # if the user did force on UTF, we should check it for safety
         else
-            options |= f=='i' ? PCRE.CASELESS  :
-                       f=='m' ? PCRE.MULTILINE :
-                       f=='s' ? PCRE.DOTALL    :
-                       f=='x' ? PCRE.EXTENDED  :
-                       throw(ArgumentError("unknown regex flag: $f"))
+            compile_options |= f=='i' ? PCRE.CASELESS  :
+                               f=='m' ? PCRE.MULTILINE :
+                               f=='s' ? PCRE.DOTALL    :
+                               f=='x' ? PCRE.EXTENDED  :
+                               throw(ArgumentError("unknown regex flag: $f"))
         end
     end
-    Regex(pattern, options, DEFAULT_MATCH_OPTS)
+    Regex(pattern, compile_options, match_options)
 end
 Regex(pattern::AbstractString) = Regex(pattern, DEFAULT_COMPILER_OPTS, DEFAULT_MATCH_OPTS)
 
@@ -96,9 +101,15 @@ listed after the ending quote, to change its behaviour:
 - `s` allows the `.` modifier to match newlines.
 - `x` enables "comment mode": whitespace is enabled except when escaped with `\\`, and `#`
   is treated as starting a comment.
-- `a` disables `UCP` mode (enables ASCII mode). By default `\\B`, `\\b`, `\\D`, `\\d`, `\\S`,
-  `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With this option,
-  these sequences only match ASCII characters.
+- `a` enables ASCII mode (disables `UTF` and `UCP` modes). By default `\\B`, `\\b`, `\\D`,
+  `\\d`, `\\S`, `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With
+  this option, these sequences only match ASCII characters. This includes `\\u` also, which
+  will emit the specified character value directly as a single byte, and not attempt to
+  encode it into UTF-8. Importantly, this option allows matching against invalid UTF-8
+  strings, by treating both matcher and target as simple bytes (as if they were ISO/IEC
+  8859-1 / Latin-1 bytes) instead of as character encodings. In this case, this option is
+  often combined with `s`. This option can be further refined by starting the pattern with
+  (*UCP) or (*UTF).
 
 See [`Regex`](@ref) if interpolation is needed.
 
@@ -112,23 +123,38 @@ This regex has the first three flags enabled.
 macro r_str(pattern, flags...) Regex(pattern, flags...) end
 
 function show(io::IO, re::Regex)
-    imsxa = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED|PCRE.UCP
+    imsx = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED
+    ac = PCRE.UTF|PCRE.MATCH_INVALID_UTF|PCRE.UCP
+    am = PCRE.NO_UTF_CHECK
     opts = re.compile_options
-    if (opts & ~imsxa) == (DEFAULT_COMPILER_OPTS & ~imsxa)
+    mopts = re.match_options
+    default = ((opts & ~imsx) | ac) == DEFAULT_COMPILER_OPTS
+    if default
+       if (opts & ac) == ac
+           default = mopts == DEFAULT_MATCH_OPTS
+       elseif (opts & ac) == 0
+           default = mopts == (DEFAULT_MATCH_OPTS & ~am)
+       else
+           default = false
+       end
+   end
+    if default
         print(io, "r\"")
         escape_raw_string(io, re.pattern)
         print(io, "\"")
-        if (opts & PCRE.CASELESS ) != 0; print(io, 'i'); end
-        if (opts & PCRE.MULTILINE) != 0; print(io, 'm'); end
-        if (opts & PCRE.DOTALL   ) != 0; print(io, 's'); end
-        if (opts & PCRE.EXTENDED ) != 0; print(io, 'x'); end
-        if (opts & PCRE.UCP      ) == 0; print(io, 'a'); end
+        if (opts & PCRE.CASELESS ) != 0; print(io, "i"); end
+        if (opts & PCRE.MULTILINE) != 0; print(io, "m"); end
+        if (opts & PCRE.DOTALL   ) != 0; print(io, "s"); end
+        if (opts & PCRE.EXTENDED ) != 0; print(io, "x"); end
+        if (opts & ac            ) == 0; print(io, "a"); end
     else
         print(io, "Regex(")
         show(io, re.pattern)
-        print(io, ',')
+        print(io, ", ")
         show(io, opts)
-        print(io, ')')
+        print(io, ", ")
+        show(io, mopts)
+        print(io, ")")
     end
 end
 
@@ -139,7 +165,7 @@ in a string using an `AbstractPattern`.
 abstract type AbstractMatch end
 
 """
-    RegexMatch
+    RegexMatch <: AbstractMatch
 
 A type representing a single match to a `Regex` found in a string.
 Typically created from the [`match`](@ref) function.
@@ -243,19 +269,17 @@ end
 
 # Capture group extraction
 getindex(m::RegexMatch, idx::Integer) = m.captures[idx]
-function getindex(m::RegexMatch, name::Symbol)
+function getindex(m::RegexMatch, name::Union{AbstractString,Symbol})
     idx = PCRE.substring_number_from_name(m.regex.regex, name)
     idx <= 0 && error("no capture group named $name found in regex")
     m[idx]
 end
-getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]
 
 haskey(m::RegexMatch, idx::Integer) = idx in eachindex(m.captures)
-function haskey(m::RegexMatch, name::Symbol)
+function haskey(m::RegexMatch, name::Union{AbstractString,Symbol})
     idx = PCRE.substring_number_from_name(m.regex.regex, name)
     return idx > 0
 end
-haskey(m::RegexMatch, name::AbstractString) = haskey(m, Symbol(name))
 
 iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
 length(m::RegexMatch) = length(m.captures)
@@ -501,7 +525,7 @@ function count(t::Union{AbstractChar,AbstractString,AbstractPattern}, s::Abstrac
 end
 
 """
-    SubstitutionString(substr)
+    SubstitutionString(substr) <: AbstractString
 
 Stores the given string `substr` as a `SubstitutionString`, for use in regular expression
 substitutions. Most commonly constructed using the [`@s_str`](@ref) macro.
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index ebcb743729160..2fc246f86fa96 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -26,7 +26,7 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
 
     global reinterpret
 
-    """
+    @doc """
         reinterpret(T::DataType, A::AbstractArray)
 
     Construct a view of the array with the same binary data as the given
@@ -38,13 +38,13 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     ```jldoctest
     julia> reinterpret(Float32, UInt32[1 2 3 4 5])
     1×5 reinterpret(Float32, ::Matrix{UInt32}):
-    1.0f-45  3.0f-45  4.0f-45  6.0f-45  7.0f-45
+     1.0f-45  3.0f-45  4.0f-45  6.0f-45  7.0f-45
 
     julia> reinterpret(Complex{Int}, 1:6)
     3-element reinterpret(Complex{$Int}, ::UnitRange{$Int}):
-    1 + 2im
-    3 + 4im
-    5 + 6im
+     1 + 2im
+     3 + 4im
+     5 + 6im
     ```
     """
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
diff --git a/base/set.jl b/base/set.jl
index 6f8580e222e40..a91bf328bd911 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -13,7 +13,7 @@ See also: [`AbstractSet`](@ref), [`BitSet`](@ref), [`Dict`](@ref),
 [`push!`](@ref), [`empty!`](@ref), [`union!`](@ref), [`in`](@ref), [`isequal`](@ref)
 
 # Examples
-```jldoctest filter = r"^\\S.+"
+```jldoctest; filter = r"^  '.'"ma
 julia> s = Set("aaBca")
 Set{Char} with 3 elements:
   'a'
@@ -23,9 +23,9 @@ Set{Char} with 3 elements:
 julia> push!(s, 'b')
 Set{Char} with 4 elements:
   'a'
-  'c'
   'b'
   'B'
+  'c'
 
 julia> s = Set([NaN, 0.0, 1.0, 2.0]);
 
@@ -617,7 +617,7 @@ function replace_pairs!(res, A, count::Int, old_new::Tuple{Vararg{Pair}})
 end
 
 """
-    replace!(new::Function, A; [count::Integer])
+    replace!(new::Union{Function, Type}, A; [count::Integer])
 
 Replace each element `x` in collection `A` by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
@@ -710,7 +710,7 @@ subtract_singletontype(::Type{T}, x::Pair{K}, y::Pair...) where {T, K} =
     subtract_singletontype(subtract_singletontype(T, y...), x)
 
 """
-    replace(new::Function, A; [count::Integer])
+    replace(new::Union{Function, Type}, A; [count::Integer])
 
 Return a copy of `A` where each value `x` in `A` is replaced by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
diff --git a/base/shell.jl b/base/shell.jl
index f443a1f9c094a..5bfd11fb46d29 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -292,9 +292,9 @@ function shell_escape_csh(io::IO, args::AbstractString...)
         first = false
         i = 1
         while true
-            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z" => "",
-                         r"^[^']*\z" => "'", r"^[^\$\`\"]*\z" => "\"",
-                         r"^[^']+"  => "'", r"^[^\$\`\"]+"  => "\"")
+            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z"sa => "",
+                         r"^[^']*\z"sa => "'", r"^[^\$\`\"]*\z"sa => "\"",
+                         r"^[^']+"sa  => "'", r"^[^\$\`\"]+"sa  => "\"")
                 if ((m = match(r, SubString(arg, i))) !== nothing)
                     write(io, e)
                     write(io, replace(m.match, '\n' => "\\\n"))
@@ -361,12 +361,12 @@ cmdargs = Base.shell_escape_wincmd("Passing args with %cmdargs% works 100%!")
 run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
 ```
 
-!warning
+!!! warning
     The argument parsing done by CMD when calling batch files (either inside
     `.bat` files or as arguments to them) is not fully compatible with the
     output of this function. In particular, the processing of `%` is different.
 
-!important
+!!! important
     Due to a peculiar behavior of the CMD parser/interpreter, each command
     after a literal `|` character (indicating a command pipeline) must have
     `shell_escape_wincmd` applied twice since it will be parsed twice by CMD.
@@ -391,7 +391,7 @@ julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
 """
 function shell_escape_wincmd(io::IO, s::AbstractString)
     # https://stackoverflow.com/a/4095133/1990689
-    occursin(r"[\r\n\0]", s) &&
+    occursin(r"[\r\n\0]"sa, s) &&
         throw(ArgumentError("control character unsupported by CMD.EXE"))
     i = 1
     len = ncodeunits(s)
@@ -446,7 +446,7 @@ function escape_microsoft_c_args(io::IO, args::AbstractString...)
         else
             write(io, ' ')  # separator
         end
-        if isempty(arg) || occursin(r"[ \t\"]", arg)
+        if isempty(arg) || occursin(r"[ \t\"]"sa, arg)
             # Julia raw strings happen to use the same escaping convention
             # as the argv[] parser in Microsoft's C runtime library.
             write(io, '"')
diff --git a/base/show.jl b/base/show.jl
index 3dcdac77afb89..36f7df54d0008 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1060,11 +1060,12 @@ end
 function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
     parameters = x.parameters::SimpleVector
     istuple = x.name === Tuple.name
+    isnamedtuple = x.name === typename(NamedTuple)
     n = length(parameters)
 
     # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg`
-    max_n = 3
     if istuple
+        max_n = 3
         taillen = 1
         for i in (n-1):-1:1
             if parameters[i] === parameters[n]
@@ -1090,10 +1091,31 @@ function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
             end
             print(io, "}")
         end
-    else
-        show_type_name(io, x.name)
-        show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres)
+        return
+    elseif isnamedtuple
+        syms, types = parameters
+        first = true
+        if syms isa Tuple && types isa DataType
+            print(io, "@NamedTuple{")
+            for i in 1:length(syms)
+                if !first
+                    print(io, ", ")
+                end
+                print(io, syms[i])
+                typ = types.parameters[i]
+                if typ !== Any
+                    print(io, "::")
+                    show(io, typ)
+                end
+                first = false
+            end
+            print(io, "}")
+            return
+        end
     end
+
+    show_type_name(io, x.name)
+    show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres)
 end
 
 function show_supertypes(io::IO, typ::DataType)
@@ -1372,9 +1394,11 @@ show(io::IO, s::Symbol) = show_unquoted_quote_expr(io, s, 0, 0, 0)
 #   eval(Meta.parse("Set{Int64}([2,3,1])")) # ==> An actual set
 # While this isn’t true of ALL show methods, it is of all ASTs.
 
-const ExprNode = Union{Expr, QuoteNode, Slot, LineNumberNode, SSAValue,
-                       GotoNode, GlobalRef, PhiNode, PhiCNode, UpsilonNode,
-                       Core.Compiler.GotoIfNot, Core.Compiler.ReturnNode}
+using Core.Compiler: TypedSlot, UnoptSlot
+
+const ExprNode = Union{Expr, QuoteNode, UnoptSlot, LineNumberNode, SSAValue,
+                       GotoNode, GotoIfNot, GlobalRef, PhiNode, PhiCNode, UpsilonNode,
+                       ReturnNode}
 # Operators have precedence levels from 1-N, and show_unquoted defaults to a
 # precedence level of 0 (the fourth argument). The top-level print and show
 # methods use a precedence of -1 to specially allow space-separated macro syntax.
@@ -1723,7 +1747,7 @@ function show_globalref(io::IO, ex::GlobalRef; allow_macroname=false)
     nothing
 end
 
-function show_unquoted(io::IO, ex::Slot, ::Int, ::Int)
+function show_unquoted(io::IO, ex::UnoptSlot, ::Int, ::Int)
     typ = isa(ex, TypedSlot) ? ex.typ : Any
     slotid = ex.id
     slotnames = get(io, :SOURCE_SLOTNAMES, false)
@@ -1818,10 +1842,16 @@ function show_import_path(io::IO, ex, quote_level)
         end
     elseif ex.head === :(.)
         for i = 1:length(ex.args)
-            if ex.args[i] === :(.)
+            sym = ex.args[i]::Symbol
+            if sym === :(.)
                 print(io, '.')
             else
-                show_sym(io, ex.args[i]::Symbol, allow_macroname=(i==length(ex.args)))
+                if sym === :(..)
+                    # special case for https://github.com/JuliaLang/julia/issues/49168
+                    print(io, "(..)")
+                else
+                    show_sym(io, sym, allow_macroname=(i==length(ex.args)))
+                end
                 i < length(ex.args) && print(io, '.')
             end
         end
@@ -2588,7 +2618,7 @@ module IRShow
     const Compiler = Core.Compiler
     using Core.IR
     import ..Base
-    import .Compiler: IRCode, ReturnNode, GotoIfNot, CFG, scan_ssa_use!, Argument,
+    import .Compiler: IRCode, TypedSlot, CFG, scan_ssa_use!,
         isexpr, compute_basic_blocks, block_for_inst, IncrementalCompact,
         Effects, ALWAYS_TRUE, ALWAYS_FALSE
     Base.getindex(r::Compiler.StmtRange, ind::Integer) = Compiler.getindex(r, ind)
@@ -2634,13 +2664,18 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
 end
 
 function show(io::IO, inferred::Core.Compiler.InferenceResult)
-    tt = inferred.linfo.specTypes.parameters[2:end]
+    mi = inferred.linfo
+    tt = mi.specTypes.parameters[2:end]
     tts = join(["::$(t)" for t in tt], ", ")
     rettype = inferred.result
     if isa(rettype, Core.Compiler.InferenceState)
         rettype = rettype.bestguess
     end
-    print(io, "$(inferred.linfo.def.name)($(tts)) => $(rettype)")
+    if isa(mi.def, Method)
+        print(io, mi.def.name, "(", tts, " => ", rettype, ")")
+    else
+        print(io, "Toplevel MethodInstance thunk from ", mi.def, " => ", rettype)
+    end
 end
 
 function show(io::IO, ::Core.Compiler.NativeInterpreter)
diff --git a/base/slicearray.jl b/base/slicearray.jl
index fae353dbe7690..e5a433cdb8d2a 100644
--- a/base/slicearray.jl
+++ b/base/slicearray.jl
@@ -40,7 +40,8 @@ unitaxis(::AbstractArray) = Base.OneTo(1)
 
 function Slices(A::P, slicemap::SM, ax::AX) where {P,SM,AX}
     N = length(ax)
-    S = Base._return_type(view, Tuple{P, map((a,l) -> l === (:) ? Colon : eltype(a), axes(A), slicemap)...})
+    argT = map((a,l) -> l === (:) ? Colon : eltype(a), axes(A), slicemap)
+    S = Base.promote_op(view, P, argT...)
     Slices{P,SM,AX,S,N}(A, slicemap, ax)
 end
 
diff --git a/base/some.jl b/base/some.jl
index 08cb3c1648ba1..0d538cbed6c23 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -29,6 +29,7 @@ end
 function nonnothingtype_checked(T::Type)
     R = nonnothingtype(T)
     R >: T && error("could not compute non-nothing type")
+    R <: Union{} && error("cannot convert a value to nothing for assignment")
     return R
 end
 
diff --git a/base/sort.jl b/base/sort.jl
index 2ecc7ff62b291..0e84657fc481e 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -429,19 +429,18 @@ macro getkw(syms...)
     Expr(:block, (:($(esc(:((kw, $sym) = $getter(v, o, kw))))) for (sym, getter) in zip(syms, getters))...)
 end
 
-for (sym, deps, exp, type) in [
-        (:lo, (), :(firstindex(v)), Integer),
-        (:hi, (), :(lastindex(v)),  Integer),
-        (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))),
-        (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))),
-        (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype
-        (:allow_legacy_dispatch, (), true, Bool)]
+for (sym, exp, type) in [
+        (:lo, :(firstindex(v)), Integer),
+        (:hi, :(lastindex(v)),  Integer),
+        (:mn, :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))),
+        (:mx, :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))),
+        (:scratch, nothing, :(Union{Nothing, Vector})), # could have different eltype
+        (:allow_legacy_dispatch, true, Bool)]
     usym = Symbol(:_, sym)
     @eval function $usym(v, o, kw)
         # using missing instead of nothing because scratch could === nothing.
         res = get(kw, $(Expr(:quote, sym)), missing)
         res !== missing && return kw, res::$type
-        @getkw $(deps...)
         $sym = $exp
         (;kw..., $sym), $sym::$type
     end
@@ -534,6 +533,7 @@ Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i)
     v
 end
 Base.size(v::WithoutMissingVector) = size(v.data)
+Base.axes(v::WithoutMissingVector) = axes(v.data)
 
 """
     send_to_end!(f::Function, v::AbstractVector; [lo, hi])
@@ -578,19 +578,20 @@ elements that are not
 
 function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw)
     @getkw lo hi
-    if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering
+    if o isa DirectOrdering && eltype(v) >: Missing && nonmissingtype(eltype(v)) != eltype(v)
         lo, hi = send_to_end!(ismissing, v, o; lo, hi)
         _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi))
-    elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering &&
-                nonmissingtype(eltype(o.data)) != eltype(o.data) &&
+    elseif o isa Perm && o.order isa DirectOrdering && eltype(v) <: Integer &&
+                eltype(o.data) >: Missing && nonmissingtype(eltype(o.data)) != eltype(o.data) &&
                 all(i === j for (i,j) in zip(v, eachindex(o.data)))
         # TODO make this branch known at compile time
         # This uses a custom function because we need to ensure stability of both sides and
         # we can assume v is equal to eachindex(o.data) which allows a copying partition
         # without allocations.
         lo_i, hi_i = lo, hi
-        for (i,x) in zip(eachindex(o.data), o.data)
-            if ismissing(x) == (o.order == Reverse) # should i go at the beginning?
+        for i in eachindex(o.data) # equal to copy(v)
+            x = o.data[i]
+            if ismissing(x) == (o.order == Reverse) # should x go at the beginning/end?
                 v[lo_i] = i
                 lo_i += 1
             else
@@ -786,7 +787,7 @@ function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw)
 
     # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%)
     if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o))
-        # If reversing is valid, do so. This does violates stability.
+        # If reversing is valid, do so. This violates stability.
         reverse!(v, lo, hi)
         return scratch
     end
@@ -814,7 +815,6 @@ function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw)
         lt(o, vi, mn) && (mn = vi)
         lt(o, mx, vi) && (mx = vi)
     end
-    mn, mx
 
     lt(o, mn, mx) || return scratch # all same
 
@@ -1139,7 +1139,7 @@ function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size)
             counts[i] += 1            # increment that bucket's count
         end
 
-        counts[1] = lo                # set target index for the first bucket
+        counts[1] = lo + offset       # set target index for the first bucket
         cumsum!(counts, counts)       # set target indices for subsequent buckets
         # counts[1:mask+1] now stores indices where the first member of each bucket
         # belongs, not the number of elements in each bucket. We will put the first element
@@ -1150,7 +1150,7 @@ function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size)
             x = v[k]                  # lookup the element
             i = (x >> shift)&mask + 1 # compute its bucket's index for this pass
             j = counts[i]             # lookup the target index
-            t[j + offset] = x         # put the element where it belongs
+            t[j] = x                  # put the element where it belongs
             counts[i] = j + 1         # increment the target index for the next
         end                           #  ↳ element in this bucket
     end
@@ -1170,15 +1170,6 @@ end
 
 maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt
 maybe_unsigned(x::BitSigned) = unsigned(x)
-function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    mn = mx = v[lo]
-    @inbounds for i in (lo+1):hi
-        vi = v[i]
-        lt(o, vi, mn) && (mn = vi)
-        lt(o, mx, vi) && (mx = vi)
-    end
-    mn, mx
-end
 function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
     @boundscheck checkbounds(v, lo:hi)
     @inbounds for i in (lo+1):hi
@@ -1343,7 +1334,8 @@ specific algorithm to use via the `alg` keyword (see [Sorting Algorithms](@ref)
 available algorithms). The `by` keyword lets you provide a function that will be applied to
 each element before comparison; the `lt` keyword allows providing a custom "less than"
 function (note that for every `x` and `y`, only one of `lt(x,y)` and `lt(y,x)` can return
-`true`); use `rev=true` to reverse the sorting order. These options are independent and can
+`true`); use `rev=true` to reverse the sorting order. `rev=true` preserves forward stability:
+Elements that compare equal are not reversed. These options are independent and can
 be used together in all possible combinations: if both `by` and `lt` are specified, the `lt`
 function is applied to the result of the `by` function; `rev=true` reverses whatever
 ordering specified via the `by` and `lt` keywords.
@@ -1926,6 +1918,7 @@ julia> map(x->issorted(x[k]), (s1, s2))
 
 julia> s1[k] == s2[k]
 true
+```
 """
 struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm
     k::T
diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl
index 74f750064c7c2..333951b6f6024 100644
--- a/base/special/hyperbolic.jl
+++ b/base/special/hyperbolic.jl
@@ -175,7 +175,7 @@ function asinh(x::T) where T <: Union{Float32, Float64}
     #        return sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1)))
     #    d) |x| >= 2^28
     #        return sign(x)*(log(x)+ln2))
-    if isnan(x) || isinf(x)
+    if !isfinite(x)
         return x
     end
     absx = abs(x)
diff --git a/base/special/log.jl b/base/special/log.jl
index 5e20cdbaa06a6..5d7f1c8118724 100644
--- a/base/special/log.jl
+++ b/base/special/log.jl
@@ -367,7 +367,7 @@ function log1p(x::Float64)
     elseif isnan(x)
         NaN
     else
-        throw_complex_domainerror(:log1p, x)
+        throw_complex_domainerror_neg1(:log1p, x)
     end
 end
 
@@ -399,7 +399,7 @@ function log1p(x::Float32)
     elseif isnan(x)
         NaN32
     else
-        throw_complex_domainerror(:log1p, x)
+        throw_complex_domainerror_neg1(:log1p, x)
     end
 end
 
diff --git a/base/special/trig.jl b/base/special/trig.jl
index 929e259913104..5b2a23688ca6b 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -34,7 +34,7 @@ function sin(x::T) where T<:Union{Float32, Float64}
         end
         return sin_kernel(x)
     elseif isnan(x)
-        return T(NaN)
+        return x
     elseif isinf(x)
         sin_domain_error(x)
     end
@@ -103,7 +103,7 @@ function cos(x::T) where T<:Union{Float32, Float64}
         end
         return cos_kernel(x)
     elseif isnan(x)
-        return T(NaN)
+        return x
     elseif isinf(x)
         cos_domain_error(x)
     else
@@ -179,7 +179,7 @@ function sincos(x::T) where T<:Union{Float32, Float64}
         end
         return sincos_kernel(x)
     elseif isnan(x)
-        return T(NaN), T(NaN)
+        return x, x
     elseif isinf(x)
         sincos_domain_error(x)
     end
@@ -221,7 +221,7 @@ function tan(x::T) where T<:Union{Float32, Float64}
         end
         return tan_kernel(x)
     elseif isnan(x)
-        return T(NaN)
+        return x
     elseif isinf(x)
         tan_domain_error(x)
     end
@@ -582,8 +582,8 @@ function atan(y::T, x::T) where T<:Union{Float32, Float64}
     #    S8) ATAN2(+-INF,+INF ) is +-pi/4 ;
     #    S9) ATAN2(+-INF,-INF ) is +-3pi/4;
     #    S10) ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
-    if isnan(x) || isnan(y) # S1 or S2
-        return T(NaN)
+    if isnan(x) | isnan(y) # S1 or S2
+        return isnan(x) ? x : y
     end
 
     if x == T(1.0) # then y/x = y and x > 0, see M2
@@ -725,29 +725,41 @@ end
 
 # Uses minimax polynomial of sin(π * x) for π * x in [0, .25]
 @inline function sinpi_kernel(x::Float64)
+    sinpi_kernel_wide(x)
+end
+@inline function sinpi_kernel_wide(x::Float64)
     x² = x*x
     x⁴ = x²*x²
     r  = evalpoly(x², (2.5501640398773415, -0.5992645293202981, 0.08214588658006512,
-                      -7.370429884921779e-3, 4.662827319453555e-4, -2.1717412523382308e-5))
+                       -7.370429884921779e-3, 4.662827319453555e-4, -2.1717412523382308e-5))
     return muladd(3.141592653589793, x, x*muladd(-5.16771278004997,
                   x², muladd(x⁴, r,  1.2245907532225998e-16)))
 end
 @inline function sinpi_kernel(x::Float32)
+    Float32(sinpi_kernel_wide(x))
+end
+@inline function sinpi_kernel_wide(x::Float32)
     x = Float64(x)
-    return Float32(x*evalpoly(x*x, (3.1415926535762266, -5.167712769188119,
-                                    2.5501626483206374, -0.5992021090314925, 0.08100185277841528)))
+    return x*evalpoly(x*x, (3.1415926535762266, -5.167712769188119,
+                            2.5501626483206374, -0.5992021090314925, 0.08100185277841528))
 end
 
 @inline function sinpi_kernel(x::Float16)
+    Float16(sinpi_kernel_wide(x))
+end
+@inline function sinpi_kernel_wide(x::Float16)
     x = Float32(x)
-    return Float16(x*evalpoly(x*x, (3.1415927f0, -5.1677127f0, 2.5501626f0, -0.5992021f0, 0.081001855f0)))
+    return x*evalpoly(x*x, (3.1415927f0, -5.1677127f0, 2.5501626f0, -0.5992021f0, 0.081001855f0))
 end
 
 # Uses minimax polynomial of cos(π * x) for π * x in [0, .25]
 @inline function cospi_kernel(x::Float64)
+    cospi_kernel_wide(x)
+end
+@inline function cospi_kernel_wide(x::Float64)
     x² = x*x
     r = x²*evalpoly(x², (4.058712126416765, -1.3352627688537357, 0.23533063027900392,
-                        -0.025806887811869204, 1.9294917136379183e-3, -1.0368935675474665e-4))
+                         -0.025806887811869204, 1.9294917136379183e-3, -1.0368935675474665e-4))
     a_x² = 4.934802200544679 * x²
     a_x²lo = muladd(3.109686485461973e-16, x², muladd(4.934802200544679, x², -a_x²))
 
@@ -755,13 +767,19 @@ end
     return w + muladd(x², r, ((1.0-w)-a_x²) - a_x²lo)
 end
 @inline function cospi_kernel(x::Float32)
+    Float32(cospi_kernel_wide(x))
+end
+@inline function cospi_kernel_wide(x::Float32)
     x = Float64(x)
-    return Float32(evalpoly(x*x, (1.0, -4.934802200541122, 4.058712123568637,
-                                 -1.3352624040152927, 0.23531426791507182, -0.02550710082498761)))
+    return evalpoly(x*x, (1.0, -4.934802200541122, 4.058712123568637,
+                          -1.3352624040152927, 0.23531426791507182, -0.02550710082498761))
 end
 @inline function cospi_kernel(x::Float16)
+    Float16(cospi_kernel_wide(x))
+end
+@inline function cospi_kernel_wide(x::Float16)
     x = Float32(x)
-    return Float16(evalpoly(x*x, (1.0f0, -4.934802f0, 4.058712f0, -1.3352624f0, 0.23531426f0, -0.0255071f0)))
+    return evalpoly(x*x, (1.0f0, -4.934802f0, 4.058712f0, -1.3352624f0, 0.23531426f0, -0.0255071f0))
 end
 
 """
@@ -867,12 +885,59 @@ function sincospi(_x::T) where T<:Union{IEEEFloat, Rational}
     return si, co
 end
 
+"""
+    tanpi(x)
+
+Compute ``\\tan(\\pi x)`` more accurately than `tan(pi*x)`, especially for large `x`.
+
+!!! compat "Julia 1.10"
+    This function requires at least Julia 1.10.
+
+See also [`tand`](@ref), [`sinpi`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
+"""
+
+function tanpi(_x::T) where T<:Union{IEEEFloat, Rational}
+    # This is modified from sincospi.
+    # Would it be faster or more accurate to make a tanpi_kernel?
+    x = abs(_x)
+    if !isfinite(x)
+        isnan(x) && return x
+        throw(DomainError(x, "`x` cannot be infinite."))
+    end
+    # For large x, answers are all zero.
+    # All integer values for floats larger than maxintfloat are even.
+    if T <: AbstractFloat
+        x >= maxintfloat(T) && return copysign(zero(T), _x)
+    end
+
+    # reduce to interval [0, 0.5]
+    n = round(2*x)
+    rx = float(muladd(T(-.5), n, x))
+    n = Int64(n) & 3
+    si, co = sinpi_kernel_wide(rx), cospi_kernel_wide(rx)
+    if n==0
+        si, co = si, co
+    elseif n==1
+        si, co  = co, zero(T)-si
+    elseif n==2
+        si, co  = zero(T)-si, zero(T)-co
+    else
+        si, co  = zero(T)-co, si
+    end
+    si = ifelse(signbit(_x), -si, si)
+    return float(T)(si / co)
+end
+
 sinpi(x::Integer) = x >= 0 ? zero(float(x)) : -zero(float(x))
 cospi(x::Integer) = isodd(x) ? -one(float(x)) : one(float(x))
+tanpi(x::Integer) = x >= 0 ? (isodd(x) ? -zero(float(x)) : zero(float(x))) :
+                             (isodd(x) ? zero(float(x)) : -zero(float(x)))
 sincospi(x::Integer) = (sinpi(x), cospi(x))
 sinpi(x::Real) = sin(pi*x)
 cospi(x::Real) = cos(pi*x)
 sincospi(x::Real) = sincos(pi*x)
+tanpi(x::Real) = tan(pi*x)
+tanpi(x::Complex) = sinpi(x) / cospi(x) # Is there a better way to do this?
 
 function sinpi(z::Complex{T}) where T
     F = float(T)
@@ -1126,7 +1191,7 @@ function sind(x::Real)
     if isinf(x)
         return throw(DomainError(x, "`x` cannot be infinite."))
     elseif isnan(x)
-        return oftype(x,NaN)
+        return x
     end
 
     rx = copysign(float(rem(x,360)),x)
@@ -1157,7 +1222,7 @@ function cosd(x::Real)
     if isinf(x)
         return throw(DomainError(x, "`x` cannot be infinite."))
     elseif isnan(x)
-        return oftype(x,NaN)
+        return x
     end
 
     rx = abs(float(rem(x,360)))
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index d74d47e1eb292..ee6a2762d7818 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -52,8 +52,9 @@ struct StackFrame # this type should be kept platform-agnostic so that profiles
     file::Symbol
     "the line number in the file containing the execution context"
     line::Int
-    "the MethodInstance or CodeInfo containing the execution context (if it could be found)"
-    linfo::Union{MethodInstance, CodeInfo, Nothing}
+    "the MethodInstance or CodeInfo containing the execution context (if it could be found), \
+     or Module (for macro expansions)"
+    linfo::Union{MethodInstance, Method, Module, CodeInfo, Nothing}
     "true if the code is from C"
     from_c::Bool
     "true if the code is from an inlined frame"
@@ -95,6 +96,86 @@ function hash(frame::StackFrame, h::UInt)
     return h
 end
 
+get_inlinetable(::Any) = nothing
+function get_inlinetable(mi::MethodInstance)
+    isdefined(mi, :def) && mi.def isa Method && isdefined(mi, :cache) && isdefined(mi.cache, :inferred) &&
+        mi.cache.inferred !== nothing || return nothing
+    linetable = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), mi.def, mi.cache, mi.cache.inferred).linetable
+    return filter!(x -> x.inlined_at > 0, linetable)
+end
+
+get_method_instance_roots(::Any) = nothing
+function get_method_instance_roots(mi::Union{Method, MethodInstance})
+    m = mi isa MethodInstance ? mi.def : mi
+    m isa Method && isdefined(m, :roots) || return nothing
+    return filter(x -> x isa MethodInstance, m.roots)
+end
+
+function lookup_inline_frame_info(func::Symbol, file::Symbol, linenum::Int, inlinetable::Vector{Core.LineInfoNode})
+    #REPL frames and some base files lack this prefix while others have it; should fix?
+    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
+    linfo = nothing
+    #=
+    Some matching entries contain the MethodInstance directly.
+    Other matching entries contain only a Method or Symbol (function name); such entries
+    are located after the entry with the MethodInstance, so backtracking is required.
+    If backtracking fails, the Method or Module is stored for return, but we continue
+    the search in case a MethodInstance is found later.
+    TODO: If a backtrack has failed, do we need to backtrack again later if another Method
+    or Symbol match is found? Or can a limit on the subsequent backtracks be placed?
+    =#
+    for (i, line) in enumerate(inlinetable)
+        Base.IRShow.method_name(line) === func && line.file ∈ (file, filestripped) && line.line == linenum || continue
+        if line.method isa MethodInstance
+            linfo = line.method
+            break
+        elseif line.method isa Method || line.method isa Symbol
+            linfo = line.method isa Method ? line.method : line.module
+            # backtrack to find the matching MethodInstance, if possible
+            for j in (i - 1):-1:1
+                nextline = inlinetable[j]
+                nextline.inlined_at == line.inlined_at && Base.IRShow.method_name(line) === Base.IRShow.method_name(nextline) && line.file === nextline.file || break
+                if nextline.method isa MethodInstance
+                    linfo = nextline.method
+                    break
+                end
+            end
+        end
+    end
+    return linfo
+end
+
+function lookup_inline_frame_info(func::Symbol, file::Symbol, miroots::Vector{Any})
+    # REPL frames and some base files lack this prefix while others have it; should fix?
+    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
+    matches = filter(miroots) do x
+        x.def isa Method || return false
+        m = x.def::Method
+        return m.name == func && m.file ∈ (file, filestripped)
+    end
+    if length(matches) > 1
+        # ambiguous, check if method is same and return that instead
+        all_matched = true
+        for m in matches
+            all_matched = m.def.line == matches[1].def.line &&
+                m.def.module == matches[1].def.module
+            all_matched || break
+        end
+        if all_matched
+            return matches[1].def
+        end
+        # all else fails, return module if they match, or give up
+        all_matched = true
+        for m in matches
+            all_matched = m.def.module == matches[1].def.module
+            all_matched || break
+        end
+        return all_matched ? matches[1].def.module : nothing
+    elseif length(matches) == 1
+        return matches[1]
+    end
+    return nothing
+end
 
 """
     lookup(pointer::Ptr{Cvoid}) -> Vector{StackFrame}
@@ -107,11 +188,26 @@ Base.@constprop :none function lookup(pointer::Ptr{Cvoid})
     infos = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint), pointer, false)::Core.SimpleVector
     pointer = convert(UInt64, pointer)
     isempty(infos) && return [StackFrame(empty_sym, empty_sym, -1, nothing, true, false, pointer)] # this is equal to UNKNOWN
+    parent_linfo = infos[end][4]
+    inlinetable = get_inlinetable(parent_linfo)
+    miroots = inlinetable === nothing ? get_method_instance_roots(parent_linfo) : nothing # fallback if linetable missing
     res = Vector{StackFrame}(undef, length(infos))
-    for i in 1:length(infos)
+    for i in reverse(1:length(infos))
         info = infos[i]::Core.SimpleVector
         @assert(length(info) == 6)
-        res[i] = StackFrame(info[1]::Symbol, info[2]::Symbol, info[3]::Int, info[4], info[5]::Bool, info[6]::Bool, pointer)
+        func = info[1]::Symbol
+        file = info[2]::Symbol
+        linenum = info[3]::Int
+        linfo = info[4]
+        if i < length(infos)
+            if inlinetable !== nothing
+                linfo = lookup_inline_frame_info(func, file, linenum, inlinetable)
+            elseif miroots !== nothing
+                linfo = lookup_inline_frame_info(func, file, miroots)
+            end
+            linfo = linfo === nothing ? parentmodule(res[i + 1]) : linfo # e.g. `macro expansion`
+        end
+        res[i] = StackFrame(func, file, linenum, linfo, info[5]::Bool, info[6]::Bool, pointer)
     end
     return res
 end
@@ -219,11 +315,19 @@ function show_spec_linfo(io::IO, frame::StackFrame)
         else
             Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
         end
-    elseif linfo isa MethodInstance
-        def = linfo.def
-        if isa(def, Method)
-            sig = linfo.specTypes
+    elseif linfo isa CodeInfo
+        print(io, "top-level scope")
+    elseif linfo isa Module
+        Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
+    else
+        def, sig = if linfo isa MethodInstance
+             linfo.def, linfo.specTypes
+        else
+            linfo, linfo.sig
+        end
+        if def isa Method
             argnames = Base.method_argnames(def)
+            argnames = replace(argnames, :var"#unused#" => :var"")
             if def.nkw > 0
                 # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...)
                 kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+def.nkw) ]
@@ -246,8 +350,6 @@ function show_spec_linfo(io::IO, frame::StackFrame)
         else
             Base.show_mi(io, linfo, true)
         end
-    elseif linfo isa CodeInfo
-        print(io, "top-level scope")
     end
 end
 
@@ -272,10 +374,18 @@ function Base.parentmodule(frame::StackFrame)
     linfo = frame.linfo
     if linfo isa MethodInstance
         def = linfo.def
-        return def isa Module ? def : parentmodule(def::Method)
+        if def isa Module
+            return def
+        else
+            return (def::Method).module
+        end
+    elseif linfo isa Method
+        return linfo.module
+    elseif linfo isa Module
+        return linfo
     else
-        # The module is not always available (common reasons include inlined
-        # frames and frames arising from the interpreter)
+        # The module is not always available (common reasons include
+        # frames arising from the interpreter)
         nothing
     end
 end
diff --git a/base/stream.jl b/base/stream.jl
index 8e247fc074422..0b6c9a93777f6 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -457,7 +457,7 @@ function closewrite(s::LibuvStream)
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we won't get spurious notifications later
@@ -1050,7 +1050,7 @@ function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 26d4eb6b91798..2609edeaaaa18 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -613,6 +613,38 @@ isascii(c::Char) = bswap(reinterpret(UInt32, c)) < 0x80
 isascii(s::AbstractString) = all(isascii, s)
 isascii(c::AbstractChar) = UInt32(c) < 0x80
 
+@inline function _isascii(code_units::AbstractVector{CU}, first, last) where {CU}
+    r = zero(CU)
+    for n = first:last
+        @inbounds r |= code_units[n]
+    end
+    return 0 ≤ r < 0x80
+end
+
+#The chunking algorithm makes the last two chunks overlap inorder to keep the size fixed
+@inline function  _isascii_chunks(chunk_size,cu::AbstractVector{CU}, first,last) where {CU}
+    n=first
+    while n <= last - chunk_size
+        _isascii(cu,n,n+chunk_size-1) || return false
+        n += chunk_size
+    end
+    return  _isascii(cu,last-chunk_size+1,last)
+end
+"""
+    isascii(cu::AbstractVector{CU}) where {CU <: Integer} -> Bool
+
+Test whether all values in the vector belong to the ASCII character set (0x00 to 0x7f).
+This function is intended to be used by other string implementations that need a fast ASCII check.
+"""
+function isascii(cu::AbstractVector{CU}) where {CU <: Integer}
+    chunk_size = 1024
+    chunk_threshold =  chunk_size + (chunk_size ÷ 2)
+    first = firstindex(cu);   last = lastindex(cu)
+    l = last - first + 1
+    l < chunk_threshold && return _isascii(cu,first,last)
+    return _isascii_chunks(chunk_size,cu,first,last)
+end
+
 ## string map, filter ##
 
 function map(f, s::AbstractString)
@@ -636,7 +668,7 @@ function filter(f, s::AbstractString)
     for c in s
         f(c) && write(out, c)
     end
-    String(take!(out))
+    String(_unsafe_take!(out))
 end
 
 ## string first and last ##
diff --git a/base/strings/io.jl b/base/strings/io.jl
index e800002076d54..5ae67fc8c841c 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -113,7 +113,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0)
     else
         f(s, args...)
     end
-    String(resize!(s.data, s.size))
+    String(_unsafe_take!(s))
 end
 
 function _str_sizehint(x)
@@ -125,6 +125,10 @@ function _str_sizehint(x)
         return sizeof(x)
     elseif x isa Char
         return ncodeunits(x)
+    elseif x isa UInt64 || x isa UInt32
+        return ndigits(x)
+    elseif x isa Int64 || x isa Int32
+        return ndigits(x) + (x < zero(x))
     else
         return 8
     end
@@ -143,7 +147,7 @@ function print_to_string(xs...)
     for x in xs
         print(s, x)
     end
-    String(resize!(s.data, s.size))
+    String(_unsafe_take!(s))
 end
 
 function string_with_env(env, xs...)
@@ -160,7 +164,7 @@ function string_with_env(env, xs...)
     for x in xs
         print(env_io, x)
     end
-    String(resize!(s.data, s.size))
+    String(_unsafe_take!(s))
 end
 
 """
diff --git a/base/strings/lazy.jl b/base/strings/lazy.jl
index 3510afc9b4f11..eaaa6397d37f2 100644
--- a/base/strings/lazy.jl
+++ b/base/strings/lazy.jl
@@ -67,7 +67,7 @@ macro lazy_str(text)
     parts = Any[]
     lastidx = idx = 1
     while (idx = findnext('$', text, idx)) !== nothing
-        lastidx < idx && push!(parts, text[lastidx:idx-1])
+        lastidx < idx && push!(parts, text[lastidx:prevind(text, idx)])
         idx += 1
         expr, idx = Meta.parseatom(text, idx; filename=string(__source__.file))
         push!(parts, esc(expr))
diff --git a/base/strings/string.jl b/base/strings/string.jl
index 3d8db74d7b795..9716d06deefdf 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -98,6 +98,7 @@ String(s::AbstractString) = print_to_string(s)
 @assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
 
 unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
+unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
 
 Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
 Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
@@ -113,7 +114,8 @@ pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
 ncodeunits(s::String) = Core.sizeof(s)
 codeunit(s::String) = UInt8
 
-@inline function codeunit(s::String, i::Integer)
+codeunit(s::String, i::Integer) = codeunit(s, Int(i))
+@assume_effects :foldable @inline function codeunit(s::String, i::Int)
     @boundscheck checkbounds(s, i)
     b = GC.@preserve s unsafe_load(pointer(s, i))
     return b
@@ -121,20 +123,20 @@ end
 
 ## comparison ##
 
-_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len) =
+@assume_effects :total _memcmp(a::String, b::String) = @invoke _memcmp(a::Union{Ptr{UInt8},AbstractString},b::Union{Ptr{UInt8},AbstractString})
+
+_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b)))
+function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int)
     ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), a, b, len % Csize_t) % Int
+end
 
 function cmp(a::String, b::String)
     al, bl = sizeof(a), sizeof(b)
-    c = _memcmp(a, b, min(al,bl))
+    c = _memcmp(a, b)
     return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
 end
 
-function ==(a::String, b::String)
-    pointer_from_objref(a) == pointer_from_objref(b) && return true
-    al = sizeof(a)
-    return al == sizeof(b) && 0 == _memcmp(a, b, al)
-end
+==(a::String, b::String) = a===b
 
 typemin(::Type{String}) = ""
 typemin(::String) = typemin(String)
@@ -190,15 +192,201 @@ end
 end
 
 ## checking UTF-8 & ACSII validity ##
+#=
+    The UTF-8 Validation is performed by a shift based DFA.
+    ┌───────────────────────────────────────────────────────────────────┐
+    │    UTF-8 DFA State Diagram    ┌──────────────2──────────────┐     │
+    │                               ├────────3────────┐           │     │
+    │                 ┌──────────┐  │     ┌─┐        ┌▼┐          │     │
+    │      ASCII      │  UTF-8   │  ├─5──►│9├───1────► │          │     │
+    │                 │          │  │     ├─┤        │ │         ┌▼┐    │
+    │                 │  ┌─0─┐   │  ├─6──►│8├─1,7,9──►4├──1,7,9──► │    │
+    │      ┌─0─┐      │  │   │   │  │     ├─┤        │ │         │ │    │
+    │      │   │      │ ┌▼───┴┐  │  ├─11─►│7├──7,9───► │ ┌───────►3├─┐  │
+    │     ┌▼───┴┐     │ │     │  ▼  │     └─┘        └─┘ │       │ │ │  │
+    │     │  0  ├─────┘ │  1  ├─► ──┤                    │  ┌────► │ │  │
+    │     └─────┘       │     │     │     ┌─┐            │  │    └─┘ │  │
+    │                   └──▲──┘     ├─10─►│5├─────7──────┘  │        │  │
+    │                      │        │     ├─┤               │        │  │
+    │                      │        └─4──►│6├─────1,9───────┘        │  │
+    │          INVALID     │              └─┘                        │  │
+    │           ┌─*─┐      └──────────────────1,7,9──────────────────┘  │
+    │          ┌▼───┴┐                                                  │
+    │          │  2  ◄─── All undefined transitions result in state 2   │
+    │          └─────┘                                                  │
+    └───────────────────────────────────────────────────────────────────┘
+
+        Validation States
+            0 -> _UTF8_DFA_ASCII is the start state and will only stay in this state if the string is only ASCII characters
+                        If the DFA ends in this state the string is ASCII only
+            1 -> _UTF8_DFA_ACCEPT is the valid complete character state of the DFA once it has encountered a UTF-8 Unicode character
+            2 -> _UTF8_DFA_INVALID is only reached by invalid bytes and once in this state it will not change
+                    as seen by all 1s in that column of table below
+            3 -> One valid continuation byte needed to return to state 0
+        4,5,6 -> Two valid continuation bytes needed to return to state 0
+        7,8,9 -> Three valids continuation bytes needed to return to state 0
+
+                        Current State
+                    0̲  1̲  2̲  3̲  4̲  5̲  6̲  7̲  8̲  9̲
+                0 | 0  1  2  2  2  2  2  2  2  2
+                1 | 2  2  2  1  3  2  3  2  4  4
+                2 | 3  3  2  2  2  2  2  2  2  2
+                3 | 4  4  2  2  2  2  2  2  2  2
+                4 | 6  6  2  2  2  2  2  2  2  2
+    Character   5 | 9  9  2  2  2  2  2  2  2  2     <- Next State
+    Class       6 | 8  8  2  2  2  2  2  2  2  2
+                7 | 2  2  2  1  3  3  2  4  4  2
+                8 | 2  2  2  2  2  2  2  2  2  2
+                9 | 2  2  2  1  3  2  3  4  4  2
+               10 | 5  5  2  2  2  2  2  2  2  2
+               11 | 7  7  2  2  2  2  2  2  2  2
+
+           Shifts | 0  4 10 14 18 24  8 20 12 26
+
+    The shifts that represent each state were derived using teh SMT solver Z3, to ensure when encoded into
+    the rows the correct shift was a result.
+
+    Each character class row is encoding 10 states with shifts as defined above. By shifting the bitsof a row by
+    the current state then masking the result with 0x11110 give the shift for the new state
+
+
+=#
+
+#State type used by UTF-8 DFA
+const _UTF8DFAState = UInt32
+# Fill the table with 256 UInt64 representing the DFA transitions for all bytes
+const _UTF8_DFA_TABLE = let # let block rather than function doesn't pollute base
+    num_classes=12
+    num_states=10
+    bit_per_state = 6
+
+    # These shifts were derived using a SMT solver
+    state_shifts = [0, 4, 10, 14, 18, 24, 8, 20, 12, 26]
+
+    character_classes = [   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                            9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                            8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                            10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
+                            11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ]
+
+    # These are the rows discussed in comments above
+    state_arrays = [ 0  1  2  2  2  2  2  2  2  2;
+                     2  2  2  1  3  2  3  2  4  4;
+                     3  3  2  2  2  2  2  2  2  2;
+                     4  4  2  2  2  2  2  2  2  2;
+                     6  6  2  2  2  2  2  2  2  2;
+                     9  9  2  2  2  2  2  2  2  2;
+                     8  8  2  2  2  2  2  2  2  2;
+                     2  2  2  1  3  3  2  4  4  2;
+                     2  2  2  2  2  2  2  2  2  2;
+                     2  2  2  1  3  2  3  4  4  2;
+                     5  5  2  2  2  2  2  2  2  2;
+                     7  7  2  2  2  2  2  2  2  2]
+
+    #This converts the state_arrays into the shift encoded _UTF8DFAState
+    class_row = zeros(_UTF8DFAState, num_classes)
+
+    for i = 1:num_classes
+        row = _UTF8DFAState(0)
+        for j in 1:num_states
+            #Calculate the shift required for the next state
+            to_shift = UInt8((state_shifts[state_arrays[i,j]+1]) )
+            #Shift the next state into the position of the current state
+            row = row | (_UTF8DFAState(to_shift) << state_shifts[j])
+        end
+        class_row[i]=row
+    end
+
+    map(c->class_row[c+1],character_classes)
+end
+
+
+const _UTF8_DFA_ASCII = _UTF8DFAState(0) #This state represents the start and end of any valid string
+const _UTF8_DFA_ACCEPT = _UTF8DFAState(4) #This state represents the start and end of any valid string
+const _UTF8_DFA_INVALID = _UTF8DFAState(10) # If the state machine is ever in this state just stop
+
+# The dfa step is broken out so that it may be used in other functions. The mask was calculated to work with state shifts above
+@inline _utf_dfa_step(state::_UTF8DFAState, byte::UInt8) = @inbounds (_UTF8_DFA_TABLE[byte+1] >> state) & _UTF8DFAState(0x0000001E)
+
+@inline function _isvalid_utf8_dfa(state::_UTF8DFAState, bytes::AbstractVector{UInt8}, first::Int = firstindex(bytes), last::Int = lastindex(bytes))
+    for i = first:last
+       @inbounds state = _utf_dfa_step(state, bytes[i])
+    end
+    return (state)
+end
+
+@inline function  _find_nonascii_chunk(chunk_size,cu::AbstractVector{CU}, first,last) where {CU}
+    n=first
+    while n <= last - chunk_size
+        _isascii(cu,n,n+chunk_size-1) || return n
+        n += chunk_size
+    end
+    n= last-chunk_size+1
+    _isascii(cu,n,last) || return n
+    return nothing
+end
+
+##
 
-byte_string_classify(s::Union{String,Vector{UInt8},FastContiguousSubArray{UInt8,1,Vector{UInt8}}}) =
-    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
+# Classifcations of string
     # 0: neither valid ASCII nor UTF-8
     # 1: valid ASCII
     # 2: valid UTF-8
+ byte_string_classify(s::AbstractString) = byte_string_classify(codeunits(s))
+
+
+function byte_string_classify(bytes::AbstractVector{UInt8})
+    chunk_size = 1024
+    chunk_threshold =  chunk_size + (chunk_size ÷ 2)
+    n = length(bytes)
+    if n > chunk_threshold
+        start = _find_nonascii_chunk(chunk_size,bytes,1,n)
+        isnothing(start) && return 1
+    else
+        _isascii(bytes,1,n) && return 1
+        start = 1
+    end
+    return _byte_string_classify_nonascii(bytes,start,n)
+end
 
-isvalid(::Type{String}, s::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,Vector{UInt8}},String}) = byte_string_classify(s) ≠ 0
-isvalid(s::String) = isvalid(String, s)
+function _byte_string_classify_nonascii(bytes::AbstractVector{UInt8}, first::Int, last::Int)
+    chunk_size = 256
+
+    start = first
+    stop = min(last,first + chunk_size - 1)
+    state = _UTF8_DFA_ACCEPT
+    while start <= last
+        # try to process ascii chunks
+        while state == _UTF8_DFA_ACCEPT
+            _isascii(bytes,start,stop) || break
+            (start = start + chunk_size) <= last || break
+            stop = min(last,stop + chunk_size)
+        end
+        # Process non ascii chunk
+        state = _isvalid_utf8_dfa(state,bytes,start,stop)
+        state == _UTF8_DFA_INVALID && return 0
+
+        start = start + chunk_size
+        stop = min(last,stop + chunk_size)
+    end
+    return ifelse(state == _UTF8_DFA_ACCEPT,2,0)
+end
+
+isvalid(::Type{String}, bytes::AbstractVector{UInt8}) = (@inline byte_string_classify(bytes)) ≠ 0
+isvalid(::Type{String}, s::AbstractString) =  (@inline byte_string_classify(s)) ≠ 0
+
+@inline isvalid(s::AbstractString) = @inline isvalid(String, codeunits(s))
 
 is_valid_continuation(c) = c & 0xc0 == 0x80
 
@@ -284,9 +472,11 @@ getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(
     return ss
 end
 
-length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
+# nothrow because we know the start and end indices are valid
+@assume_effects :nothrow length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
 
-@inline function length(s::String, i::Int, j::Int)
+# effects needed because @inbounds
+@assume_effects :consistent :effect_free @inline function length(s::String, i::Int, j::Int)
     @boundscheck begin
         0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i))
         0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
@@ -294,13 +484,13 @@ length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
     j < i && return 0
     @inbounds i, k = thisind(s, i), i
     c = j - i + (i == k)
-    length_continued(s, i, j, c)
+    @inbounds length_continued(s, i, j, c)
 end
 
-@inline function length_continued(s::String, i::Int, n::Int, c::Int)
+@assume_effects :terminates_locally @inline @propagate_inbounds function length_continued(s::String, i::Int, n::Int, c::Int)
     i < n || return c
-    @inbounds b = codeunit(s, i)
-    @inbounds while true
+    b = codeunit(s, i)
+    while true
         while true
             (i += 1) ≤ n || return c
             0xc0 ≤ b ≤ 0xf7 && break
@@ -326,12 +516,10 @@ end
 
 isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
 
-function isascii(s::String)
-    @inbounds for i = 1:ncodeunits(s)
-        codeunit(s, i) >= 0x80 && return false
-    end
-    return true
-end
+isascii(s::String) = isascii(codeunits(s))
+
+# don't assume effects for general integers since we cannot know their implementation
+@assume_effects :foldable repeat(c::Char, r::BitInteger) = @invoke repeat(c::Char, r::Integer)
 
 """
     repeat(c::AbstractChar, r::Integer) -> String
@@ -345,8 +533,8 @@ julia> repeat('A', 3)
 "AAA"
 ```
 """
-repeat(c::AbstractChar, r::Integer) = repeat(Char(c), r) # fallback
-function repeat(c::Char, r::Integer)
+function repeat(c::AbstractChar, r::Integer)
+    c = Char(c)::Char
     r == 0 && return ""
     r < 0 && throw(ArgumentError("can't repeat a character $r times"))
     u = bswap(reinterpret(UInt32, c))
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index baaea038b2cfe..5ba08ac2f7fff 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -92,31 +92,24 @@ function getindex(s::SubString, i::Integer)
     @inbounds return getindex(s.string, s.offset + i)
 end
 
+isascii(ss::SubString{String}) = isascii(codeunits(ss))
+
 function isvalid(s::SubString, i::Integer)
     ib = true
     @boundscheck ib = checkbounds(Bool, s, i)
     @inbounds return ib && isvalid(s.string, s.offset + i)::Bool
 end
 
-byte_string_classify(s::SubString{String}) =
-    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
-
-isvalid(::Type{String}, s::SubString{String}) = byte_string_classify(s) ≠ 0
-isvalid(s::SubString{String}) = isvalid(String, s)
-
 thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
 nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)
 
 function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}})
-    s = sizeof(a)
-    s == sizeof(b) && 0 == _memcmp(a, b, s)
+    sizeof(a) == sizeof(b) && _memcmp(a, b) == 0
 end
 
 function cmp(a::SubString{String}, b::SubString{String})
-    na = sizeof(a)
-    nb = sizeof(b)
-    c = _memcmp(a, b, min(na, nb))
-    return c < 0 ? -1 : c > 0 ? +1 : cmp(na, nb)
+    c = _memcmp(a, b)
+    return c < 0 ? -1 : c > 0 ? +1 : cmp(sizeof(a), sizeof(b))
 end
 
 # don't make unnecessary copies when passing substrings to C functions
@@ -207,19 +200,30 @@ end
     return n
 end
 
-@inline function __unsafe_string!(out, s::Union{String, SubString{String}}, offs::Integer)
+@assume_effects :nothrow @inline function __unsafe_string!(out, s::String, offs::Integer)
     n = sizeof(s)
     GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
     return n
 end
 
-@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
+@inline function __unsafe_string!(out, s::SubString{String}, offs::Integer)
+    n = sizeof(s)
+    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
+    return n
+end
+
+@assume_effects :nothrow @inline function __unsafe_string!(out, s::Symbol, offs::Integer)
     n = sizeof(s)
     GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
     return n
 end
 
-function string(a::Union{Char, String, SubString{String}, Symbol}...)
+# nothrow needed here because for v in a can't prove the indexing is inbounds.
+@assume_effects :foldable :nothrow string(a::Union{Char, String, Symbol}...) = _string(a...)
+
+string(a::Union{Char, String, SubString{String}, Symbol}...) = _string(a...)
+
+function _string(a::Union{Char, String, SubString{String}, Symbol}...)
     n = 0
     for v in a
         # 4 types is too many for automatic Union-splitting, so we split manually
@@ -248,6 +252,10 @@ function string(a::Union{Char, String, SubString{String}, Symbol}...)
     return out
 end
 
+# don't assume effects for general integers since we cannot know their implementation
+# not nothrow because r<0 throws
+@assume_effects :foldable repeat(s::String, r::BitInteger) = @invoke repeat(s::String, r::Integer)
+
 function repeat(s::Union{String, SubString{String}}, r::Integer)
     r < 0 && throw(ArgumentError("can't repeat a string $r times"))
     r == 0 && return ""
diff --git a/base/strings/util.jl b/base/strings/util.jl
index dabb84ae65639..7a42d7fecfc91 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -67,6 +67,25 @@ function startswith(a::Union{String, SubString{String}},
     end
 end
 
+"""
+    startswith(io::IO, prefix::Union{AbstractString,Base.Chars})
+
+Check if an `IO` object starts with a prefix.  See also [`peek`](@ref).
+"""
+function Base.startswith(io::IO, prefix::Base.Chars)
+    mark(io)
+    c = read(io, Char)
+    reset(io)
+    return c in prefix
+end
+function Base.startswith(io::IO, prefix::Union{String,SubString{String}})
+    mark(io)
+    s = read(io, ncodeunits(prefix))
+    reset(io)
+    return s == codeunits(prefix)
+end
+Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix))
+
 function endswith(a::Union{String, SubString{String}},
                   b::Union{String, SubString{String}})
     cub = ncodeunits(b)
diff --git a/base/sysimg.jl b/base/sysimg.jl
index ef7bad929b743..b0eeffa5757ba 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -5,6 +5,7 @@ Core.include(Main, "Base.jl")
 using .Base
 
 # Set up Main module
+using Base.MainInclude # ans, err, and sometimes Out
 import Base.MainInclude: eval, include
 
 # Ensure this file is also tracked
@@ -26,6 +27,7 @@ let
     task.rngState1 = 0x7431eaead385992c
     task.rngState2 = 0x503e1d32781c2608
     task.rngState3 = 0x3a77f7189200c20b
+    task.rngState4 = 0x5502376d099035ae
 
     # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
     # Run with the `--exclude-jlls` option to filter out all JLL packages
@@ -54,19 +56,14 @@ let
 
         # 2-depth packages
         :Dates,
-        :Distributed,
         :Future,
         :InteractiveUtils,
         :LibGit2,
-        :Profile,
-        :SparseArrays,
         :UUIDs,
 
         # 3-depth packages
         :REPL,
-        :SharedArrays,
         :TOML,
-        :Test,
 
         # 4-depth packages
         :LibCURL,
@@ -76,9 +73,6 @@ let
 
         # 6-depth packages
         :Pkg,
-
-        # 7-depth packages
-        :LazyArtifacts,
     ]
     # PackageCompiler can filter out stdlibs so it can be empty
     maxlen = maximum(textwidth.(string.(stdlibs)); init=0)
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index b885d88a5f3cb..2c962088484e7 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -543,9 +543,21 @@ function which(program_name::String)
     for path_dir in path_dirs
         for pname in program_names
             program_path = joinpath(path_dir, pname)
-            # If we find something that matches our name and we can execute
-            if isfile(program_path) && isexecutable(program_path)
-                return program_path
+            try
+                # If we find something that matches our name and we can execute
+                if isfile(program_path) && isexecutable(program_path)
+                    return program_path
+                end
+            catch e
+                # If we encounter a permission error, we skip this directory
+                # and continue to the next directory in the PATH variable.
+                if isa(e, Base.IOError) && e.code == Base.UV_EACCES
+                    # Permission denied, continue searching
+                    continue
+                else
+                    # Rethrow the exception if it's not a permission error
+                    rethrow(e)
+                end
             end
         end
     end
diff --git a/base/task.jl b/base/task.jl
index ce34d2f179fc5..4fbb51fde3e8e 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -70,7 +70,7 @@ end
 """
     TaskFailedException
 
-This exception is thrown by a `wait(t)` call when task `t` fails.
+This exception is thrown by a [`wait(t)`](@ref) call when task `t` fails.
 `TaskFailedException` wraps the failed task `t`.
 """
 struct TaskFailedException <: Exception
@@ -131,7 +131,8 @@ true
 ```
 """
 macro task(ex)
-    :(Task(()->$(esc(ex))))
+    thunk = Base.replace_linenums!(:(()->$(esc(ex))), __source__)
+    :(Task($thunk))
 end
 
 """
@@ -253,7 +254,7 @@ istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
 function Threads.threadpool(t::Task)
     tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), t)
-    return tpid == 0 ? :default : :interactive
+    return Threads._tpid_to_sym(tpid)
 end
 
 task_result(t::Task) = t.result
@@ -317,22 +318,22 @@ end
 # have `waiter` wait for `t`
 function _wait2(t::Task, waiter::Task)
     if !istaskdone(t)
+        # since _wait2 is similar to schedule, we should observe the sticky
+        # bit, even if we don't call `schedule` with early-return below
+        if waiter.sticky && Threads.threadid(waiter) == 0 && !GC.in_finalizer()
+            # Issue #41324
+            # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+            # the parent task. If the parent (current_task) is not sticky we must
+            # set it to be sticky.
+            # XXX: Ideally we would be able to unset this
+            current_task().sticky = true
+            tid = Threads.threadid()
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
+        end
         lock(t.donenotify)
         if !istaskdone(t)
             push!(t.donenotify.waitq, waiter)
             unlock(t.donenotify)
-            # since _wait2 is similar to schedule, we should observe the sticky
-            # bit, even if we aren't calling `schedule` due to this early-return
-            if waiter.sticky && Threads.threadid(waiter) == 0
-                # Issue #41324
-                # t.sticky && tid == 0 is a task that needs to be co-scheduled with
-                # the parent task. If the parent (current_task) is not sticky we must
-                # set it to be sticky.
-                # XXX: Ideally we would be able to unset this
-                current_task().sticky = true
-                tid = Threads.threadid()
-                ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
-            end
             return nothing
         else
             unlock(t.donenotify)
@@ -361,8 +362,8 @@ fetch(@nospecialize x) = x
 """
     fetch(t::Task)
 
-Wait for a Task to finish, then return its result value.
-If the task fails with an exception, a `TaskFailedException` (which wraps the failed task)
+Wait for a [`Task`](@ref) to finish, then return its result value.
+If the task fails with an exception, a [`TaskFailedException`](@ref) (which wraps the failed task)
 is thrown.
 """
 function fetch(t::Task)
@@ -503,15 +504,15 @@ isolating the asynchronous code from changes to the variable's value in the curr
     Interpolating values via `\$` is available as of Julia 1.4.
 """
 macro async(expr)
-    do_async_macro(expr)
+    do_async_macro(expr, __source__)
 end
 
 # generate the code for @async, possibly wrapping the task in something before
 # pushing it to the wait queue.
-function do_async_macro(expr; wrap=identity)
+function do_async_macro(expr, linenums; wrap=identity)
     letargs = Base._lift_one_interp!(expr)
 
-    thunk = esc(:(()->($expr)))
+    thunk = Base.replace_linenums!(:(()->($(esc(expr)))), linenums)
     var = esc(sync_varname)
     quote
         let $(letargs...)
@@ -551,7 +552,7 @@ fetch(t::UnwrapTaskFailedException) = unwrap_task_failed(fetch, t)
 
 # macro for running async code that doesn't throw wrapped exceptions
 macro async_unwrap(expr)
-    do_async_macro(expr, wrap=task->:(Base.UnwrapTaskFailedException($task)))
+    do_async_macro(expr, __source__, wrap=task->:(Base.UnwrapTaskFailedException($task)))
 end
 
 """
@@ -767,22 +768,33 @@ end
 
 function enq_work(t::Task)
     (t._state === task_state_runnable && t.queue === nothing) || error("schedule: Task not runnable")
-    if t.sticky || Threads.threadpoolsize() == 1
+
+    # Sticky tasks go into their thread's work queue.
+    if t.sticky
         tid = Threads.threadid(t)
-        if tid == 0
-            # Issue #41324
-            # t.sticky && tid == 0 is a task that needs to be co-scheduled with
-            # the parent task. If the parent (current_task) is not sticky we must
-            # set it to be sticky.
-            # XXX: Ideally we would be able to unset this
-            current_task().sticky = true
+        if tid == 0 && !GC.in_finalizer()
+            # The task is not yet stuck to a thread. Stick it to the current
+            # thread and do the same to the parent task (the current task) so
+            # that the tasks are correctly co-scheduled (issue #41324).
+            # XXX: Ideally we would be able to unset this.
             tid = Threads.threadid()
             ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+            current_task().sticky = true
         end
         push!(workqueue_for(tid), t)
     else
-        Partr.multiq_insert(t, t.priority)
-        tid = 0
+        tp = Threads.threadpool(t)
+        if Threads.threadpoolsize(tp) == 1
+            # There's only one thread in the task's assigned thread pool;
+            # use its work queue.
+            tid = (tp === :interactive) ? 1 : Threads.threadpoolsize(:interactive)+1
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+            push!(workqueue_for(tid), t)
+        else
+            # Otherwise, put the task in the multiqueue.
+            Partr.multiq_insert(t, t.priority)
+            tid = 0
+        end
     end
     ccall(:jl_wakeup_thread, Cvoid, (Int16,), (tid - 1) % Int16)
     return t
@@ -828,7 +840,7 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     # schedule a task to be (re)started with the given value or exception
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
-        t.queue === nothing || Base.list_deletefirst!(t.queue, t)
+        t.queue === nothing || Base.list_deletefirst!(t.queue::IntrusiveLinkedList{Task}, t)
         setfield!(t, :result, arg)
         setfield!(t, :_isexception, true)
     else
@@ -852,7 +864,7 @@ function yield()
     try
         wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         rethrow()
     end
 end
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index 643cd95e57ebf..d150fd3ea1af4 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -32,22 +32,21 @@ See also `BLAS.get_num_threads` and `BLAS.set_num_threads` in the [`LinearAlgebr
 man-linalg) standard library, and `nprocs()` in the [`Distributed`](@ref man-distributed)
 standard library and [`Threads.maxthreadid()`](@ref).
 """
-function nthreads(pool::Symbol)
-    if pool === :default
-        tpid = Int8(0)
-    elseif pool === :interactive
-        tpid = Int8(1)
-    else
-        error("invalid threadpool specified")
-    end
-    return _nthreads_in_pool(tpid)
-end
+nthreads(pool::Symbol) = threadpoolsize(pool)
 
 function _nthreads_in_pool(tpid::Int8)
     p = unsafe_load(cglobal(:jl_n_threads_per_pool, Ptr{Cint}))
     return Int(unsafe_load(p, tpid + 1))
 end
 
+function _tpid_to_sym(tpid::Int8)
+    return tpid == 0 ? :interactive : :default
+end
+
+function _sym_to_tpid(tp::Symbol)
+    return tp === :interactive ? Int8(0) : Int8(1)
+end
+
 """
     Threads.threadpool(tid = threadid()) -> Symbol
 
@@ -55,7 +54,7 @@ Returns the specified thread's threadpool; either `:default` or `:interactive`.
 """
 function threadpool(tid = threadid())
     tpid = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1)
-    return tpid == 0 ? :default : :interactive
+    return _tpid_to_sym(tpid)
 end
 
 """
@@ -66,24 +65,56 @@ Returns the number of threadpools currently configured.
 nthreadpools() = Int(unsafe_load(cglobal(:jl_n_threadpools, Cint)))
 
 """
-    Threads.threadpoolsize()
+    Threads.threadpoolsize(pool::Symbol = :default) -> Int
 
-Get the number of threads available to the Julia default worker-thread pool.
+Get the number of threads available to the default thread pool (or to the
+specified thread pool).
 
 See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
 [`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
 [`Distributed`](@ref man-distributed) standard library.
 """
-threadpoolsize() = Threads._nthreads_in_pool(Int8(0))
+function threadpoolsize(pool::Symbol = :default)
+    if pool === :default || pool === :interactive
+        tpid = _sym_to_tpid(pool)
+    else
+        error("invalid threadpool specified")
+    end
+    return _nthreads_in_pool(tpid)
+end
+
+"""
+    threadpooltids(pool::Symbol)
+
+Returns a vector of IDs of threads in the given pool.
+"""
+function threadpooltids(pool::Symbol)
+    ni = _nthreads_in_pool(Int8(0))
+    if pool === :interactive
+        return collect(1:ni)
+    elseif pool === :default
+        return collect(ni+1:ni+_nthreads_in_pool(Int8(1)))
+    else
+        error("invalid threadpool specified")
+    end
+end
+
+"""
+    Threads.ngcthreads() -> Int
+
+Returns the number of GC threads currently configured.
+"""
+ngcthreads() = Int(unsafe_load(cglobal(:jl_n_gcthreads, Cint))) + 1
 
 function threading_run(fun, static)
     ccall(:jl_enter_threaded_region, Cvoid, ())
     n = threadpoolsize()
+    tid_offset = threadpoolsize(:interactive)
     tasks = Vector{Task}(undef, n)
     for i = 1:n
         t = Task(() -> fun(i)) # pass in tid
         t.sticky = static
-        static && ccall(:jl_set_task_tid, Cint, (Any, Cint), t, i-1)
+        static && ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid_offset + i-1)
         tasks[i] = t
         schedule(t)
     end
@@ -286,6 +317,15 @@ macro threads(args...)
     return _threadsfor(ex.args[1], ex.args[2], sched)
 end
 
+function _spawn_set_thrpool(t::Task, tp::Symbol)
+    tpid = _sym_to_tpid(tp)
+    if _nthreads_in_pool(tpid) == 0
+        tpid = _sym_to_tpid(:default)
+    end
+    ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid)
+    nothing
+end
+
 """
     Threads.@spawn [:default|:interactive] expr
 
@@ -314,7 +354,7 @@ the variable's value in the current task.
     A threadpool may be specified as of Julia 1.9.
 """
 macro spawn(args...)
-    tpid = Int8(0)
+    tp = :default
     na = length(args)
     if na == 2
         ttype, ex = args
@@ -324,9 +364,9 @@ macro spawn(args...)
             # TODO: allow unquoted symbols
             ttype = nothing
         end
-        if ttype === :interactive
-            tpid = Int8(1)
-        elseif ttype !== :default
+        if ttype === :interactive || ttype === :default
+            tp = ttype
+        else
             throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
         end
     elseif na == 1
@@ -337,13 +377,13 @@ macro spawn(args...)
 
     letargs = Base._lift_one_interp!(ex)
 
-    thunk = esc(:(()->($ex)))
+    thunk = Base.replace_linenums!(:(()->($(esc(ex)))), __source__)
     var = esc(Base.sync_varname)
     quote
         let $(letargs...)
             local task = Task($thunk)
             task.sticky = false
-            ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), task, $tpid)
+            _spawn_set_thrpool(task, $(QuoteNode(tp)))
             if $(Expr(:islocal, var))
                 put!($var, task)
             end
diff --git a/base/timing.jl b/base/timing.jl
index e082c09156b84..3e1f3a3451149 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -12,14 +12,14 @@ struct GC_Num
     freecall        ::Int64
     total_time      ::Int64
     total_allocd    ::Int64 # GC internal
-    since_sweep     ::Int64 # GC internal
     collect         ::Csize_t # GC internal
     pause           ::Cint
     full_sweep      ::Cint
     max_pause       ::Int64
     max_memory      ::Int64
-    time_to_safepoint             ::Int64
-    max_time_to_safepointp        ::Int64
+    time_to_safepoint           ::Int64
+    max_time_to_safepoint       ::Int64
+    total_time_to_safepoint     ::Int64
     sweep_time      ::Int64
     mark_time       ::Int64
     total_sweep_time  ::Int64
@@ -103,7 +103,7 @@ Return the total amount (in bytes) allocated by the just-in-time compiler
 for e.g. native code and data.
 """
 function jit_total_bytes()
-    return Int(ccall(:jl_jit_total_bytes, Csize_t, ()))
+    return ccall(:jl_jit_total_bytes, Csize_t, ())
 end
 
 # print elapsed time, return expression value
diff --git a/base/traits.jl b/base/traits.jl
index 53ae14b12c61e..47ab8ddc0c7ac 100644
--- a/base/traits.jl
+++ b/base/traits.jl
@@ -11,7 +11,7 @@ OrderStyle(::Type{<:Real}) = Ordered()
 OrderStyle(::Type{<:AbstractString}) = Ordered()
 OrderStyle(::Type{Symbol}) = Ordered()
 OrderStyle(::Type{<:Any}) = Unordered()
-OrderStyle(::Type{Union{}}) = Ordered()
+OrderStyle(::Type{Union{}}, slurp...) = Ordered()
 
 # trait for objects that support arithmetic
 abstract type ArithmeticStyle end
@@ -23,6 +23,7 @@ ArithmeticStyle(instance) = ArithmeticStyle(typeof(instance))
 ArithmeticStyle(::Type{<:AbstractFloat}) = ArithmeticRounds()
 ArithmeticStyle(::Type{<:Integer}) = ArithmeticWraps()
 ArithmeticStyle(::Type{<:Any}) = ArithmeticUnknown()
+ArithmeticStyle(::Type{Union{}}, slurp...) = ArithmeticUnknown()
 
 # trait for objects that support ranges with regular step
 """
@@ -58,5 +59,6 @@ ranges with an element type which is a subtype of `Integer`.
 abstract type RangeStepStyle end
 struct RangeStepRegular   <: RangeStepStyle end # range with regular step
 struct RangeStepIrregular <: RangeStepStyle end # range with rounding error
+RangeStepStyle(::Type{Union{}}, slurp...) = RangeStepIrregular()
 
 RangeStepStyle(instance) = RangeStepStyle(typeof(instance))
diff --git a/base/tuple.jl b/base/tuple.jl
index 134010268c7fe..59fe2c1e531e1 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -30,6 +30,8 @@ size(@nospecialize(t::Tuple), d::Integer) = (d == 1) ? length(t) : throw(Argumen
 axes(@nospecialize t::Tuple) = (OneTo(length(t)),)
 @eval getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, $(Expr(:boundscheck)))
 @eval getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), $(Expr(:boundscheck)))
+__inbounds_getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, false)
+__inbounds_getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), false)
 getindex(t::Tuple, r::AbstractArray{<:Any,1}) = (eltype(t)[t[ri] for ri in r]...,)
 getindex(t::Tuple, b::AbstractArray{Bool,1}) = length(b) == length(t) ? getindex(t, findall(b)) : throw(BoundsError(t, b))
 getindex(t::Tuple, c::Colon) = t
@@ -311,6 +313,8 @@ function map(f, t::Any32)
 end
 # 2 argument function
 map(f, t::Tuple{},        s::Tuple{})        = ()
+map(f, t::Tuple,          s::Tuple{})        = ()
+map(f, t::Tuple{},        s::Tuple)          = ()
 map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@inline; (f(t[1],s[1]),))
 map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@inline; (f(t[1],s[1]), f(t[2],s[2])))
 function map(f, t::Tuple, s::Tuple)
@@ -318,7 +322,7 @@ function map(f, t::Tuple, s::Tuple)
     (f(t[1],s[1]), map(f, tail(t), tail(s))...)
 end
 function map(f, t::Any32, s::Any32)
-    n = length(t)
+    n = min(length(t), length(s))
     A = Vector{Any}(undef, n)
     for i = 1:n
         A[i] = f(t[i], s[i])
@@ -329,12 +333,16 @@ end
 heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
 map(f, ::Tuple{}...) = ()
+anyempty(x::Tuple{}, xs...) = true
+anyempty(x::Tuple, xs...) = anyempty(xs...)
+anyempty() = false
 function map(f, t1::Tuple, t2::Tuple, ts::Tuple...)
     @inline
+    anyempty(t1, t2, ts...) && return ()
     (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...)
 end
 function map(f, t1::Any32, t2::Any32, ts::Any32...)
-    n = length(t1)
+    n = min(length(t1), length(t2), minimum(length, ts))
     A = Vector{Any}(undef, n)
     for i = 1:n
         A[i] = f(t1[i], t2[i], map(t -> t[i], ts)...)
@@ -375,7 +383,7 @@ function tuple_type_tail(T::Type)
     end
 end
 
-(::Type{T})(x::Tuple) where {T<:Tuple} = convert(T, x)  # still use `convert` for tuples
+(::Type{T})(x::Tuple) where {T<:Tuple} = x isa T ? x : convert(T, x)  # still use `convert` for tuples
 
 Tuple(x::Ref) = tuple(getindex(x))  # faster than iterator for one element
 Tuple(x::Array{T,0}) where {T} = tuple(getindex(x))
@@ -393,7 +401,9 @@ function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
     @inline
     y = iterate(itr, s...)
     y === nothing && _totuple_err(T)
-    t1 = convert(fieldtype(T, 1), y[1])
+    T1 = fieldtype(T, 1)
+    y1 = y[1]
+    t1 = y1 isa T1 ? y1 : convert(T1, y1)::T1
     # inference may give up in recursive calls, so annotate here to force accurate return type to be propagated
     rT = tuple_type_tail(T)
     ts = _totuple(rT, itr, y[2])::rT
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index 6a1232cdcd810..d91a04371230c 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -112,8 +112,8 @@ julia> Float64(hi) + Float64(lo)
 ```
 """
 function mul12(x::T, y::T) where {T<:AbstractFloat}
-    h = x * y
-    ifelse(iszero(h) | !isfinite(h), (h, h), canonicalize2(h, fma(x, y, -h)))
+    (h, l) = Math.two_mul(x, y)
+    ifelse(!isfinite(h), (h, h), (h, l))
 end
 mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
 mul12(x, y) = mul12(promote(x, y)...)
@@ -141,6 +141,7 @@ julia> hi, lo = Base.div12(x, y)
 
 julia> Float64(hi) + Float64(lo)
 1.0134170444063066
+```
 """
 function div12(x::T, y::T) where {T<:AbstractFloat}
     # We lose precision if any intermediate calculation results in a subnormal.
@@ -199,16 +200,14 @@ end
 
 TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T))
 
+TwicePrecision{T}(x::TwicePrecision{T}) where {T} = x
+
 function TwicePrecision{T}(x) where {T}
-    xT = convert(T, x)
+    xT = T(x)
     Δx = x - xT
     TwicePrecision{T}(xT, T(Δx))
 end
 
-function TwicePrecision{T}(x::TwicePrecision) where {T}
-    TwicePrecision{T}(x.hi, x.lo)
-end
-
 TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
     TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
 
@@ -263,8 +262,7 @@ promote_rule(::Type{TwicePrecision{R}}, ::Type{TwicePrecision{S}}) where {R,S} =
 promote_rule(::Type{TwicePrecision{R}}, ::Type{S}) where {R,S<:Number} =
     TwicePrecision{promote_type(R,S)}
 
-(::Type{T})(x::TwicePrecision) where {T<:Number} = T(x.hi + x.lo)::T
-TwicePrecision{T}(x::Number) where {T} = TwicePrecision{T}(T(x), zero(T))
+(::Type{T})(x::TwicePrecision) where {T<:Number} = (T(x.hi) + T(x.lo))::T
 
 convert(::Type{TwicePrecision{T}}, x::TwicePrecision{T}) where {T} = x
 convert(::Type{TwicePrecision{T}}, x::TwicePrecision) where {T} =
diff --git a/base/util.jl b/base/util.jl
index 8d1a4a9fa02ef..6f424f80d13b6 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -22,6 +22,7 @@ const text_colors = Dict{Union{Symbol,Int},String}(
     :normal        => "\033[0m",
     :default       => "\033[39m",
     :bold          => "\033[1m",
+    :italic        => "\033[3m",
     :underline     => "\033[4m",
     :blink         => "\033[5m",
     :reverse       => "\033[7m",
@@ -35,6 +36,7 @@ end
 
 const disable_text_style = Dict{Symbol,String}(
     :bold      => "\033[22m",
+    :italic    => "\033[23m",
     :underline => "\033[24m",
     :blink     => "\033[25m",
     :reverse   => "\033[27m",
@@ -47,7 +49,7 @@ const disable_text_style = Dict{Symbol,String}(
 # Create a docstring with an automatically generated list
 # of colors.
 let color_syms = collect(Iterators.filter(x -> !isa(x, Integer), keys(text_colors))),
-    formatting_syms = [:normal, :bold, :default]
+    formatting_syms = [:normal, :bold, :italic, :default]
     global const available_text_colors = cat(
         sort!(intersect(color_syms, formatting_syms), rev=true),
         sort!(setdiff(  color_syms, formatting_syms));
@@ -69,7 +71,7 @@ Printing with the color `:nothing` will print the string without modifications.
 text_colors
 
 function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}, io::IO, args...;
-        bold::Bool = false, underline::Bool = false, blink::Bool = false,
+        bold::Bool = false, italic::Bool = false, underline::Bool = false, blink::Bool = false,
         reverse::Bool = false, hidden::Bool = false)
     buf = IOBuffer()
     iscolor = get(io, :color, false)::Bool
@@ -80,12 +82,14 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
             print(io, str)
         else
             bold && color === :bold && (color = :nothing)
+            italic && color === :italic && (color = :nothing)
             underline && color === :underline && (color = :nothing)
             blink && color === :blink && (color = :nothing)
             reverse && color === :reverse && (color = :nothing)
             hidden && color === :hidden && (color = :nothing)
             enable_ansi  = get(text_colors, color, text_colors[:default]) *
                                (bold ? text_colors[:bold] : "") *
+                               (italic ? text_colors[:italic] : "") *
                                (underline ? text_colors[:underline] : "") *
                                (blink ? text_colors[:blink] : "") *
                                (reverse ? text_colors[:reverse] : "") *
@@ -96,6 +100,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
                            (blink ? disable_text_style[:blink] : "") *
                            (underline ? disable_text_style[:underline] : "") *
                            (bold ? disable_text_style[:bold] : "") *
+                           (italic ? disable_text_style[:italic] : "") *
                                get(disable_text_style, color, text_colors[:default])
             first = true
             for line in eachsplit(str, '\n')
@@ -110,39 +115,47 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
 end
 
 """
-    printstyled([io], xs...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Symbol,Int}=:normal)
+    printstyled([io], xs...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Symbol,Int}=:normal)
 
 Print `xs` in a color specified as a symbol or integer, optionally in bold.
 
 Keyword `color` may take any of the values $(Base.available_text_colors_docstring)
 or an integer between 0 and 255 inclusive. Note that not all terminals support 256 colors.
 
-Keywords `bold=true`, `underline=true`, `blink=true` are self-explanatory.
+Keywords `bold=true`, `italic=true`, `underline=true`, `blink=true` are self-explanatory.
 Keyword `reverse=true` prints with foreground and background colors exchanged,
 and `hidden=true` should be invisible in the terminal but can still be copied.
 These properties can be used in any combination.
 
 See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
 
+!!! note
+    Not all terminals support italic output. Some terminals interpret italic as reverse or
+    blink.
+
 !!! compat "Julia 1.7"
     Keywords except `color` and `bold` were added in Julia 1.7.
+!!! compat "Julia 1.9"
+    Support for italic output was added in Julia 1.9.
 """
-@constprop :none printstyled(io::IO, msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
-    with_output_color(print, color, io, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
-@constprop :none printstyled(msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
-    printstyled(stdout, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
+@constprop :none printstyled(io::IO, msg...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    with_output_color(print, color, io, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
+@constprop :none printstyled(msg...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    printstyled(stdout, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
 
 """
     Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target)
 
 Return a julia command similar to the one of the running process.
 Propagates any of the `--cpu-target`, `--sysimage`, `--compile`, `--sysimage-native-code`,
-`--compiled-modules`, `--inline`, `--check-bounds`, `--optimize`, `-g`,
+`--compiled-modules`, `--pkgimages`, `--inline`, `--check-bounds`, `--optimize`, `--min-optlevel`, `-g`,
 `--code-coverage`, `--track-allocation`, `--color`, `--startup-file`, and `--depwarn`
 command line arguments that are not at their default values.
 
 Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notably not propagated currently.
 
+To get the julia command without propagated command line arguments, `julia_cmd()[1]` can be used.
+
 !!! compat "Julia 1.1"
     Only the `--cpu-target`, `--sysimage`, `--depwarn`, `--compile` and `--check-bounds` flags were propagated before Julia 1.1.
 
@@ -151,6 +164,8 @@ Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notab
 
 !!! compat "Julia 1.9"
     The keyword argument `cpu_target` was added.
+
+    The flag `--pkgimages` was added in Julia 1.9.
 """
 function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String} = nothing)
     opts = JLOptions()
diff --git a/base/version_git.sh b/base/version_git.sh
index 39ebb1b8ec5ee..76092e9800594 100644
--- a/base/version_git.sh
+++ b/base/version_git.sh
@@ -3,7 +3,7 @@
 
 # This file collects git info and create a julia file with the GIT_VERSION_INFO struct
 
-echo "# This file was autogenerated in base/version_git.sh"
+echo "# This file was autogenerated by base/version_git.sh"
 echo "struct GitVersionInfo"
 echo "    commit::String"
 echo "    commit_short::String"
@@ -60,17 +60,21 @@ else
     build_number=$(git rev-list --count HEAD "^$verchanged")
 fi
 
-date_string=$git_time
 case $(uname) in
   Darwin | FreeBSD)
-    date_string="$(/bin/date -jr $git_time -u '+%Y-%m-%d %H:%M %Z')"
+    if (date --version 2>/dev/null | grep -q 'GNU coreutils')
+    then # GNU date installed and earlier on PATH than BSD date
+        date_string="$(date --date="@$git_time" -u '+%Y-%m-%d %H:%M %Z')"
+    else # otherwise assume BSD date
+        date_string="$(date -jr $git_time -u '+%Y-%m-%d %H:%M %Z')"
+    fi
     ;;
   MINGW*)
     git_time=$(git log -1 --pretty=format:%ci)
-    date_string="$(/bin/date --date="$git_time" -u '+%Y-%m-%d %H:%M %Z')"
+    date_string="$(date --date="$git_time" -u '+%Y-%m-%d %H:%M %Z')"
     ;;
   *)
-    date_string="$(/bin/date --date="@$git_time" -u '+%Y-%m-%d %H:%M %Z')"
+    date_string="$(date --date="@$git_time" -u '+%Y-%m-%d %H:%M %Z')"
     ;;
 esac
 if [ $(git describe --tags --exact-match 2> /dev/null) ]; then
diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl
index 0a9987671ea9b..328f368c80b71 100644
--- a/base/weakkeydict.jl
+++ b/base/weakkeydict.jl
@@ -12,6 +12,8 @@ referenced in a hash table.
 See [`Dict`](@ref) for further help.  Note, unlike [`Dict`](@ref),
 `WeakKeyDict` does not convert keys on insertion, as this would imply the key
 object was unreferenced anywhere before insertion.
+
+See also [`WeakRef`](@ref).
 """
 mutable struct WeakKeyDict{K,V} <: AbstractDict{K,V}
     ht::Dict{WeakRef,V}
@@ -21,7 +23,7 @@ mutable struct WeakKeyDict{K,V} <: AbstractDict{K,V}
 
     # Constructors mirror Dict's
     function WeakKeyDict{K,V}() where V where K
-        t = new(Dict{Any,V}(), ReentrantLock(), identity, 0)
+        t = new(Dict{WeakRef,V}(), ReentrantLock(), identity, 0)
         t.finalizer = k -> t.dirty = true
         return t
     end
diff --git a/cli/Makefile b/cli/Makefile
index 5c2de8f2ae6d0..c2e2bcd568a07 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -55,9 +55,9 @@ all: release debug
 release debug :  % : julia-% libjulia-%
 
 $(BUILDDIR)/loader_lib.o : $(SRCDIR)/loader_lib.c $(HEADERS) $(JULIAHOME)/VERSION
-	@$(call PRINT_CC, $(CC) -DLIBRARY_EXPORTS $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) -DJL_LIBRARY_EXPORTS $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_lib.dbg.obj : $(SRCDIR)/loader_lib.c $(HEADERS) $(JULIAHOME)/VERSION
-	@$(call PRINT_CC, $(CC) -DLIBRARY_EXPORTS $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) -DJL_LIBRARY_EXPORTS $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_exe.o : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/VERSION
 	@$(call PRINT_CC, $(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_exe.dbg.obj : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/VERSION
@@ -110,7 +110,7 @@ STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_
 endif
 
 $(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
@@ -121,7 +121,7 @@ ifeq ($(OS), WINNT)
 endif
 
 $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@
 	@$(DSYMUTIL) $@
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
index e9be7c6f2f819..d28958c097edb 100644
--- a/cli/jl_exports.h
+++ b/cli/jl_exports.h
@@ -16,6 +16,10 @@ JL_EXPORTED_DATA_POINTERS(XX)
 JL_EXPORTED_DATA_SYMBOLS(XX)
 #undef XX
 
+// define a copy of exported data
+#define jl_max_tags 64
+JL_DLLEXPORT void *small_typeof[(jl_max_tags << 4) / sizeof(void*)]; // 16-bit aligned, like the GC
+
 // Declare list of exported functions (sans type)
 #define XX(name)    JL_DLLEXPORT void name(void);
 typedef void (anonfunc)(void);
diff --git a/cli/loader.h b/cli/loader.h
index 66e990e623460..b778976cee495 100644
--- a/cli/loader.h
+++ b/cli/loader.h
@@ -45,6 +45,7 @@
 #include <libgen.h>
 #include <unistd.h>
 #include <dlfcn.h>
+#include <signal.h>
 
 #endif
 
@@ -52,20 +53,18 @@
 
 // Borrow definition from `support/dtypes.h`
 #ifdef _OS_WINDOWS_
-# ifdef LIBRARY_EXPORTS
+# ifdef JL_LIBRARY_EXPORTS
 #  define JL_DLLEXPORT __declspec(dllexport)
-# else
-#  define JL_DLLEXPORT __declspec(dllimport)
 # endif
+#  define JL_DLLIMPORT __declspec(dllimport)
 #define JL_HIDDEN
 #else
-# if defined(LIBRARY_EXPORTS) && defined(_OS_LINUX_)
-#  define JL_DLLEXPORT __attribute__ ((visibility("protected")))
-# else
-#  define JL_DLLEXPORT __attribute__ ((visibility("default")))
-# endif
+# define JL_DLLIMPORT __attribute__ ((visibility("default")))
 #define JL_HIDDEN    __attribute__ ((visibility("hidden")))
 #endif
+#ifndef JL_DLLEXPORT
+#  define JL_DLLEXPORT JL_DLLIMPORT
+#endif
 /*
  * DEP_LIBS is our list of dependent libraries that must be loaded before `libjulia`.
  * Note that order matters, as each entry will be opened in-order.  We define here a
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 1fd28674bc8eb..e2f615c684637 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -227,13 +227,13 @@ static void read_wrapper(int fd, char **ret, size_t *ret_len)
     size_t have_read = 0;
     while (1) {
         ssize_t n = read(fd, buf + have_read, len - have_read);
-        have_read += n;
         if (n == 0) break;
         if (n == -1 && errno != EINTR) {
             perror("(julia) libstdcxxprobe read");
             exit(1);
         }
         if (n == -1 && errno == EINTR) continue;
+        have_read += n;
         if (have_read == len) {
             buf = (char *)realloc(buf, 1 + (len *= 2));
             if (!buf) {
@@ -353,6 +353,17 @@ static char *libstdcxxprobe(void)
 void *libjulia_internal = NULL;
 void *libjulia_codegen = NULL;
 __attribute__((constructor)) void jl_load_libjulia_internal(void) {
+#if defined(_OS_LINUX_)
+    // Julia uses `sigwait()` to handle signals, and all threads are required
+    // to mask the corresponding handlers so that the signals can be waited on.
+    // Here, we setup that masking early, so that it is inherited by any threads
+    // spawned (e.g. by constructors) when loading deps of libjulia-internal.
+
+    sigset_t all_signals, prev_mask;
+    sigfillset(&all_signals);
+    pthread_sigmask(SIG_BLOCK, &all_signals, &prev_mask);
+#endif
+
     // Only initialize this once
     if (libjulia_internal != NULL) {
         return;
@@ -521,6 +532,13 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
     // jl_options must be initialized very early, in case an embedder sets some
     // values there before calling jl_init
     ((void (*)(void))jl_init_options_addr)();
+
+#if defined(_OS_LINUX_)
+    // Restore the original signal mask. `jl_init()` will later setup blocking
+    // for the specific set of signals we `sigwait()` on, and any threads spawned
+    // during loading above will still retain their inherited signal mask.
+    pthread_sigmask(SIG_SETMASK, &prev_mask, NULL);
+#endif
 }
 
 // Load libjulia and run the REPL with the given arguments (in UTF-8 format)
diff --git a/contrib/cache_stdlibs.jl b/contrib/cache_stdlibs.jl
deleted file mode 100644
index bdcc3d9535fa4..0000000000000
--- a/contrib/cache_stdlibs.jl
+++ /dev/null
@@ -1,49 +0,0 @@
-# Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
-# Run with the `--exclude-sysimage` option to filter out all packages included in the system image
-stdlibs = [
-    # No dependencies
-
-    # 1-depth packages
-    :GMP_jll,
-    :LLVMLibUnwind_jll,
-    :LibUV_jll,
-    :LibUnwind_jll,
-    :MbedTLS_jll,
-    :OpenLibm_jll,
-    :PCRE2_jll,
-    :Zlib_jll,
-    :dSFMT_jll,
-    :libLLVM_jll,
-
-    # 2-depth packages
-    :LibSSH2_jll,
-    :MPFR_jll,
-
-    # 3-depth packages
-    :LibGit2_jll,
-
-    # 7-depth packages
-    :LLD_jll,
-    :SuiteSparse_jll,
-
-    # 9-depth packages
-    :Statistics,
-    :SuiteSparse,
-]
-
-depot = abspath(Sys.BINDIR, "..", "share", "julia")
-
-if haskey(ENV, "JULIA_CPU_TARGET")
-  target = ENV["JULIA_CPU_TARGET"]
-else
-  target = "native"
-end
-
-@info "Caching stdlibrary to" depot target
-empty!(Base.DEPOT_PATH)
-push!(Base.DEPOT_PATH, depot)
-
-for pkg in stdlibs
-    pkgid = Base.identify_package(string(pkg))
-    Base.compilecache(pkgid)
-end
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
index a000370026eae..d5473ab4c7c62 100755
--- a/contrib/check-whitespace.jl
+++ b/contrib/check-whitespace.jl
@@ -18,6 +18,16 @@ const patterns = split("""
     *Makefile
 """)
 
+allow_tabs(path) =
+    path == "Make.inc" ||
+    endswith(path, "Makefile") ||
+    endswith(path, ".make") ||
+    endswith(path, ".mk") ||
+    startswith(path, joinpath("src", "support")) ||
+    startswith(path, joinpath("src", "flisp")) ||
+    endswith(path, joinpath("test", "syntax.jl")) ||
+    endswith(path, joinpath("test", "triplequote.jl"))
+
 const errors = Set{Tuple{String,Int,String}}()
 
 for path in eachline(`git ls-files -- $patterns`)
@@ -32,6 +42,8 @@ for path in eachline(`git ls-files -- $patterns`)
         lineno += 1
         contains(line, '\r')   && file_err("non-UNIX line endings")
         contains(line, '\ua0') && line_err("non-breaking space")
+        allow_tabs(path) ||
+        contains(line, '\t')   && line_err("tab")
         endswith(line, '\n')   || line_err("no trailing newline")
         line = chomp(line)
         endswith(line, r"\s")  && line_err("trailing whitespace")
diff --git a/contrib/codesign.sh b/contrib/codesign.sh
deleted file mode 100755
index 03866c4bb1ac1..0000000000000
--- a/contrib/codesign.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Codesign binary files for macOS.
-
-usage() {
-    echo "Usage: ${0} MACOS_CODESIGN_IDENTITY FILE-OR-DIRECTORY"
-    exit 0
-}
-
-# Default codesign identity to `-` if not provided
-if [ -z "${1}" ]; then
-    MACOS_CODESIGN_IDENTITY="-"
-    ENTITLEMENTS=""
-else
-    MACOS_CODESIGN_IDENTITY="${1}"
-    ENTITLEMENTS="--entitlements $(dirname "${0}")/mac/app/Entitlements.plist"
-fi
-
-if [ "${#}" -eq 2 ]; then
-    if [ -f "${2}" ]; then
-        # Codesign only the given file
-        MACHO_FILES="${2}"
-    elif [ -d "${2}" ]; then
-        # Find all files in the given directory
-        MACHO_FILES=$(find "${2}" -type f -perm -0111 | cut -d: -f1)
-    else
-        usage
-    fi
-else
-    usage
-fi
-
-echo "Codesigning with identity ${MACOS_CODESIGN_IDENTITY}"
-for f in ${MACHO_FILES}; do
-    echo "Codesigning ${f}..."
-    codesign -s "${MACOS_CODESIGN_IDENTITY}" --option=runtime ${ENTITLEMENTS} -vvv --timestamp --deep --force "${f}"
-done
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index 76956fac24f18..e8901a7b462ea 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 if Threads.maxthreadid() != 1
-    @warn "Running this file with multiple Julia threads may lead to a build error" Base.maxthreadid()
+    @warn "Running this file with multiple Julia threads may lead to a build error" Threads.maxthreadid()
 end
 
 if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0"
@@ -27,6 +27,7 @@ const fancyprint = (stdout isa Base.TTY) && Base.get_bool_env("CI", false) !== t
 ##
 
 CTRL_C = '\x03'
+CTRL_R = '\x12'
 UP_ARROW = "\e[A"
 DOWN_ARROW = "\e[B"
 
@@ -44,6 +45,9 @@ precompile(Tuple{typeof(push!), Vector{Function}, Function})
 # miscellaneous
 precompile(Tuple{typeof(Base.require), Base.PkgId})
 precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}, Base.Dict{String, Any}, Vararg{Base.Dict{String, Any}}})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, Nothing}, Int64})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, String}, Int64})
 precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
 precompile(Tuple{typeof(getindex), Core.SimpleVector, Int})
 precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
@@ -53,6 +57,7 @@ precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel,
 precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel, Symbol, Module))
 precompile(Base.CoreLogging.env_override_minlevel, (Symbol, Module))
 precompile(Base.StackTraces.lookup, (Ptr{Nothing},))
+precompile(Tuple{typeof(Base.run_module_init), Module, Int})
 """
 
 for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base.TTY, IOContext{Base.TTY})
@@ -66,9 +71,11 @@ print("")
 printstyled("a", "b")
 display([1])
 display([1 2; 3 4])
-@time 1+1
+foo(x) = 1
+@time @eval foo(1)
 ; pwd
 $CTRL_C
+$CTRL_R$CTRL_C
 ? reinterpret
 using Ra\t$CTRL_C
 \\alpha\t$CTRL_C
@@ -127,28 +134,6 @@ if have_repl
     """
 end
 
-Distributed = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
-          nothing)
-if Distributed !== nothing
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Distributed.remotecall),Function,Int,Module,Vararg{Any, 100}})
-    precompile(Tuple{typeof(Distributed.procs)})
-    precompile(Tuple{typeof(Distributed.finalize_ref), Distributed.Future})
-    """
-# This is disabled because it doesn't give much benefit
-# and the code in Distributed is poorly typed causing many invalidations
-#=
-    precompile_script *= """
-    using Distributed
-    addprocs(2)
-    pmap(x->iseven(x) ? 1 : 0, 1:4)
-    @distributed (+) for i = 1:100 Int(rand(Bool)) end
-    """
-=#
-end
-
-
 Artifacts = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("56f22d72-fd6d-98f1-02f0-08ddc0907c33"), "Artifacts"),
           nothing)
@@ -197,51 +182,12 @@ if Libdl !== nothing
     """
 end
 
-Test = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("8dfed614-e22c-5e08-85e1-65c5234f0b40"), "Test"),
+InteractiveUtils = get(Base.loaded_modules,
+          Base.PkgId(Base.UUID("b77e0a4c-d291-57a0-90e8-8db25a27a240"), "InteractiveUtils"),
           nothing)
-if Test !== nothing
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Test.do_test), Test.ExecutionResult, Any})
-    precompile(Tuple{typeof(Test.testset_beginend_call), Tuple{String, Expr}, Expr, LineNumberNode})
-    precompile(Tuple{Type{Test.DefaultTestSet}, String})
-    precompile(Tuple{Type{Test.DefaultTestSet}, AbstractString})
-    precompile(Tuple{Core.kwftype(Type{Test.DefaultTestSet}), Any, Type{Test.DefaultTestSet}, AbstractString})
-    precompile(Tuple{typeof(Test.finish), Test.DefaultTestSet})
-    precompile(Tuple{typeof(Test.eval_test), Expr, Expr, LineNumberNode, Bool})
-    precompile(Tuple{typeof(Test._inferred), Expr, Module})
-    precompile(Tuple{typeof(Test.push_testset), Test.DefaultTestSet})
-    precompile(Tuple{typeof(Test.get_alignment), Test.DefaultTestSet, Int})
-    precompile(Tuple{typeof(Test.get_test_result), Any, Any})
-    precompile(Tuple{typeof(Test.do_test_throws), Test.ExecutionResult, Any, Any})
-    precompile(Tuple{typeof(Test.print_counts), Test.DefaultTestSet, Int, Int, Int, Int, Int, Int, Int})
-    precompile(Tuple{typeof(Test._check_testset), Type, Expr})
-    precompile(Tuple{typeof(Test.test_expr!), Any, Any})
-    precompile(Tuple{typeof(Test.test_expr!), Any, Any, Vararg{Any, 100}})
-    precompile(Tuple{typeof(Test.pop_testset)})
-    precompile(Tuple{typeof(Test.match_logs), Function, Tuple{Symbol, Regex}})
-    precompile(Tuple{typeof(Test.match_logs), Function, Tuple{String, Regex}})
-    precompile(Tuple{typeof(Base.CoreLogging.shouldlog), Test.TestLogger, Base.CoreLogging.LogLevel, Module, Symbol, Symbol})
-    precompile(Tuple{typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
-    precompile(Tuple{typeof(Test.detect_ambiguities), Any})
-    precompile(Tuple{typeof(Test.collect_test_logs), Function})
-    precompile(Tuple{typeof(Test.do_broken_test), Test.ExecutionResult, Any})
-    precompile(Tuple{typeof(Test.record), Test.DefaultTestSet, Union{Test.Error, Test.Fail}})
-    precompile(Tuple{typeof(Test.filter_errors), Test.DefaultTestSet})
-    """
-end
-
-Profile = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile"),
-          nothing)
-if Profile !== nothing
-    repl_script = Profile.precompile_script * repl_script # do larger workloads first for better parallelization
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
+if InteractiveUtils !== nothing
+    repl_script *= """
+    @time_imports using Random
     """
 end
 
@@ -477,16 +423,6 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         occursin("Main.", statement) && continue
         Base.in!(statement, statements) && continue
         # println(statement)
-        # XXX: skip some that are broken. these are caused by issue #39902
-        occursin("Tuple{Artifacts.var\"#@artifact_str\", LineNumberNode, Module, Any, Any}", statement) && continue
-        occursin("Tuple{Base.Cartesian.var\"#@ncall\", LineNumberNode, Module, Int64, Any, Vararg{Any}}", statement) && continue
-        occursin("Tuple{Base.Cartesian.var\"#@ncall\", LineNumberNode, Module, Int32, Any, Vararg{Any}}", statement) && continue
-        occursin("Tuple{Base.Cartesian.var\"#@nloops\", LineNumberNode, Module, Any, Any, Any, Vararg{Any}}", statement) && continue
-        occursin("Tuple{Core.var\"#@doc\", LineNumberNode, Module, Vararg{Any}}", statement) && continue
-        # XXX: this is strange, as this isn't the correct representation of this
-        occursin("typeof(Core.IntrinsicFunction)", statement) && continue
-        # XXX: this is strange, as this method should not be getting compiled
-        occursin(", Core.Compiler.AbstractInterpreter, ", statement) && continue
         try
             ps = Meta.parse(statement)
             if !isexpr(ps, :call)
@@ -497,19 +433,13 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
             end
             popfirst!(ps.args) # precompile(...)
             ps.head = :tuple
-            l = ps.args[end]
-            if (isexpr(l, :tuple) || isexpr(l, :curly)) && length(l.args) > 0 # Tuple{...} or (...)
-                # XXX: precompile doesn't currently handle overloaded Vararg arguments very well.
-                # Replacing N with a large number works around it.
-                l = l.args[end]
-                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] === :Vararg # Vararg{T}
-                    push!(l.args, 100) # form Vararg{T, 100} instead
-                end
-            end
             # println(ps)
             ps = Core.eval(PrecompileStagingArea, ps)
-            precompile(ps...)
-            n_succeeded += 1
+            if precompile(ps...)
+                n_succeeded += 1
+            else
+                @warn "Failed to precompile expression" form=statement _module=nothing _file=nothing _line=0
+            end
             failed = length(statements) - n_succeeded
             yield() # Make clock spinning
             print_state("step3" => string("R$n_succeeded", failed > 0 ? " ($failed failed)" : ""))
diff --git a/contrib/prepare_release.sh b/contrib/prepare_release.sh
index 7d4e55e3a402e..2772e44a858f1 100755
--- a/contrib/prepare_release.sh
+++ b/contrib/prepare_release.sh
@@ -56,12 +56,6 @@ curl -L -o julia-$version-win32.exe \
   $julianightlies/winnt/x86/$majmin/julia-$majminpatch-$shashort-win32.exe
 cp julia-$version-win32.exe julia-$majmin-latest-win32.exe
 
-if [ -e codesign.sh ]; then
-  # code signing needs to run on windows, script is not checked in since it
-  # hard-codes a few things. TODO: see if signtool.exe can run in wine
-  ./codesign.sh
-fi
-
 shasum -a 256 julia-$version* | grep -v -e sha256 -e md5 -e asc > julia-$version.sha256
 md5sum julia-$version* | grep -v -e sha256 -e md5 -e asc > julia-$version.md5
 
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
index 664a1e4b038e0..bf99c0fad9da2 100644
--- a/contrib/refresh_checksums.mk
+++ b/contrib/refresh_checksums.mk
@@ -24,11 +24,11 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS))
 NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS))
 
 # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
-BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libmmtk_julia
+BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient libmmtk_julia
 BB_GCC_EXPANDED_PROJECTS=openblas csl
 BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld
 # These are non-BB source-only deps
-NON_BB_PROJECTS=patchelf mozillacert lapack libwhich utf8proc
+NON_BB_PROJECTS=patchelf mozillacert lapack libwhich utf8proc ittapi
 
 ifneq ($(VERBOSE),1)
 QUIET_MAKE := -s
diff --git a/contrib/windows/build-installer.iss b/contrib/windows/build-installer.iss
index 4f5f0259d2f2c..a63cf853d4373 100644
--- a/contrib/windows/build-installer.iss
+++ b/contrib/windows/build-installer.iss
@@ -150,6 +150,9 @@ begin
   case CurPageID of
     wpWelcome: WizardForm.Color := WizardForm.WelcomePage.Color;
     wpFinished: WizardForm.Color := WizardForm.FinishedPage.Color;
+
+    //change button text from "next" to "install" when ReadyPage is disabled.
+    wpSelectTasks: WizardForm.NextButton.Caption := SetupMessage(msgButtonInstall);
   else
     WizardForm.Color := WizardForm.InnerPage.Color;
   end;
diff --git a/deps/Makefile b/deps/Makefile
index 244d9a2b588a0..62bb85e72c492 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -24,9 +24,9 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 #
 # autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl
 # custom Makefile rules: openlibm dsfmt libsuitesparse lapack blastrampoline openblas utf8proc objconv libwhich
-# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls
+# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls libtracyclient
 #
-# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2
+# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2, libtracyclient
 #
 # to debug 'define' rules, replace eval at the usage site with info or error
 
@@ -50,6 +50,10 @@ ifeq ($(USE_SYSTEM_LIBUV), 0)
 DEP_LIBS += libuv
 endif
 
+ifeq ($(WITH_TRACY), 1)
+DEP_LIBS += libtracyclient
+endif
+
 ifeq ($(DISABLE_LIBUNWIND), 0)
 ifeq ($(USE_SYSTEM_LIBUNWIND), 0)
 ifeq ($(OS), Linux)
@@ -151,6 +155,16 @@ ifeq ($(USE_SYSTEM_P7ZIP), 0)
 DEP_LIBS += p7zip
 endif
 
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+ifeq ($(USE_BINARYBUILDER_LLVM), 0)
+DEP_LIBS += ittapi
+endif
+endif
+
+ifeq ($(WITH_ITTAPI),1)
+DEP_LIBS += ittapi
+endif
+
 
 # Only compile standalone LAPACK if we are not using OpenBLAS.
 # OpenBLAS otherwise compiles LAPACK as part of its build.
@@ -174,7 +188,7 @@ DEP_LIBS_STAGED := $(DEP_LIBS)
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
 	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
 	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
-	libsuitesparse lld
+	libsuitesparse lld libtracyclient ittapi
 DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
 ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -209,6 +223,7 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
 include $(SRCDIR)/csl.mk
+include $(SRCDIR)/ittapi.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
@@ -231,5 +246,6 @@ include $(SRCDIR)/curl.mk
 include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
 include $(SRCDIR)/p7zip.mk
+include $(SRCDIR)/libtracyclient.mk
 
 include $(SRCDIR)/tools/uninstallers.mk
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index 2ab10915a73a1..616300377e3e6 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -2,6 +2,6 @@
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 ## source build
-BLASTRAMPOLINE_VER := 5.4.0
-BLASTRAMPOLINE_BRANCH=v5.4.0
-BLASTRAMPOLINE_SHA1=d00e6ca235bb747faae4c9f3a297016cae6959ed
+BLASTRAMPOLINE_VER := 5.8.0
+BLASTRAMPOLINE_BRANCH=v5.8.0
+BLASTRAMPOLINE_SHA1=81316155d4838392e8462a92bcac3eebe9acd0c7
diff --git a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5 b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5
new file mode 100644
index 0000000000000..9c6e4e44927fe
--- /dev/null
+++ b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5
@@ -0,0 +1 @@
+ee5afca99801e37fd3a42a9455ae986b
diff --git a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512 b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512
new file mode 100644
index 0000000000000..69a50a7282781
--- /dev/null
+++ b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512
@@ -0,0 +1 @@
+2adec92de521df1668eb13f2903ffdb01efd6afa5f04ce6fbd1737caa4948f7b629cdda7f75a895853a0cd49dccf8b388860d5c19c29e4d4aad6c7f8fa6b7209
diff --git a/deps/checksums/Downloads-030cfb3fefd29e87405cb689fb8178613131f55c.tar.gz/md5 b/deps/checksums/Downloads-030cfb3fefd29e87405cb689fb8178613131f55c.tar.gz/md5
deleted file mode 100644
index 95b4a593a88b1..0000000000000
--- a/deps/checksums/Downloads-030cfb3fefd29e87405cb689fb8178613131f55c.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-dc5f63b5cdab35d1699bed558229ec83
diff --git a/deps/checksums/Downloads-030cfb3fefd29e87405cb689fb8178613131f55c.tar.gz/sha512 b/deps/checksums/Downloads-030cfb3fefd29e87405cb689fb8178613131f55c.tar.gz/sha512
deleted file mode 100644
index 9c6e0c43c24ef..0000000000000
--- a/deps/checksums/Downloads-030cfb3fefd29e87405cb689fb8178613131f55c.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-99d5fd3a41d8e17f6955b2ff379bf7d2b9fed388b9fe22358a3abf70f743da95301d6790f6d1a3a185b61f5d7e392ef663a3cf52552da8ae69f9d943aafb2df3
diff --git a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5 b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5
new file mode 100644
index 0000000000000..4e70641a4a08b
--- /dev/null
+++ b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/md5
@@ -0,0 +1 @@
+fa2c90db0e7aa73186c491aa2f03bb2b
diff --git a/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512 b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512
new file mode 100644
index 0000000000000..3f54f39d35ac6
--- /dev/null
+++ b/deps/checksums/Downloads-f97c72fbd726e208a04c53791b35cc34c747569f.tar.gz/sha512
@@ -0,0 +1 @@
+d36737b946af5e720402ce4f25e4c69c740bdbdc174385d6448c3660b26fffe34c14af7c4dd4d26ad864ad12771cabdf922c8b3cf4423167a46cdf3001ede125
diff --git a/deps/checksums/Pkg-957b55a896d5cb496da134ea7bf3ee70de07ef2a.tar.gz/md5 b/deps/checksums/Pkg-957b55a896d5cb496da134ea7bf3ee70de07ef2a.tar.gz/md5
deleted file mode 100644
index 29c5e1852d255..0000000000000
--- a/deps/checksums/Pkg-957b55a896d5cb496da134ea7bf3ee70de07ef2a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f4839f375eb20b675d01aa7c98137803
diff --git a/deps/checksums/Pkg-957b55a896d5cb496da134ea7bf3ee70de07ef2a.tar.gz/sha512 b/deps/checksums/Pkg-957b55a896d5cb496da134ea7bf3ee70de07ef2a.tar.gz/sha512
deleted file mode 100644
index 2ca3a097f4c7a..0000000000000
--- a/deps/checksums/Pkg-957b55a896d5cb496da134ea7bf3ee70de07ef2a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8149e5d0f34a0d64d06a3302841e44bb1663ed60a2321a136109d20a5fe5beca5fc2988ded4e5bb5e69eb8030577e655b1eff456b0dbdb3857615622b6d0b465
diff --git a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5
new file mode 100644
index 0000000000000..08f5ccda57979
--- /dev/null
+++ b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5
@@ -0,0 +1 @@
+c135dc6ed97656fe956d9ee5cf3cbc55
diff --git a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512
new file mode 100644
index 0000000000000..957075f0f281a
--- /dev/null
+++ b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512
@@ -0,0 +1 @@
+2ae67fd4c5e1bf83df5df836fcd69afc0fb8454723043d32de9c7bc29feedf390adb76efda52e79937ea801ff21b5f4ea875469136424e2889904130b247b52a
diff --git a/deps/checksums/SparseArrays-4eaa4582569a76c3199849d8194582d948b7a70f.tar.gz/md5 b/deps/checksums/SparseArrays-4eaa4582569a76c3199849d8194582d948b7a70f.tar.gz/md5
deleted file mode 100644
index 9d5d598bbac18..0000000000000
--- a/deps/checksums/SparseArrays-4eaa4582569a76c3199849d8194582d948b7a70f.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c203f4a174f2ec017f3d11dab55d7b6c
diff --git a/deps/checksums/SparseArrays-4eaa4582569a76c3199849d8194582d948b7a70f.tar.gz/sha512 b/deps/checksums/SparseArrays-4eaa4582569a76c3199849d8194582d948b7a70f.tar.gz/sha512
deleted file mode 100644
index 14ed393bd5aea..0000000000000
--- a/deps/checksums/SparseArrays-4eaa4582569a76c3199849d8194582d948b7a70f.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fb3372f4ab06ad376509f5992b54571249ff21cf4bcfaba9f9c629e89d09eed4da8ffb6f0053f650904a39c591f4e9f602d365d93a466dc3d34a2c41db071049
diff --git a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5 b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5
new file mode 100644
index 0000000000000..96861ba265b5f
--- /dev/null
+++ b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/md5
@@ -0,0 +1 @@
+e6dc511b49e07a167848adc4e12690d8
diff --git a/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512 b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512
new file mode 100644
index 0000000000000..f503304f810e4
--- /dev/null
+++ b/deps/checksums/SparseArrays-8affe9e499379616e33fc60a24bb31500e8423d7.tar.gz/sha512
@@ -0,0 +1 @@
+f40fd137ccd6651fc8b697f57cfcbd8e3feccb99f6a6b32fbaa69cc0160b78cefc662b914ff8f4e48478ca48f9583318a6030d922d43ed66f8db59fd5985f768
diff --git a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5 b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5
new file mode 100644
index 0000000000000..7e7a889eecd29
--- /dev/null
+++ b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/md5
@@ -0,0 +1 @@
+6564297a5f5971231809bf9940f68b98
diff --git a/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512 b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512
new file mode 100644
index 0000000000000..bbe9b8bed6371
--- /dev/null
+++ b/deps/checksums/Statistics-a3feba2bb63f06b7f40024185e9fa5f6385e2510.tar.gz/sha512
@@ -0,0 +1 @@
+22d14c82a30f3ec7af09028423cc823808abf86918d5707fd1fcf6ca20dea7871589da9b22e462d194e86fcee380f549aeb65f585048f00bf23281786b17e040
diff --git a/deps/checksums/Statistics-e9ac70b760dcf87b77affe6c068548a3325d6e2b.tar.gz/md5 b/deps/checksums/Statistics-e9ac70b760dcf87b77affe6c068548a3325d6e2b.tar.gz/md5
deleted file mode 100644
index 0e2d0534cd8c7..0000000000000
--- a/deps/checksums/Statistics-e9ac70b760dcf87b77affe6c068548a3325d6e2b.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-62d47cffac86df3c59b3de8dd218aa79
diff --git a/deps/checksums/Statistics-e9ac70b760dcf87b77affe6c068548a3325d6e2b.tar.gz/sha512 b/deps/checksums/Statistics-e9ac70b760dcf87b77affe6c068548a3325d6e2b.tar.gz/sha512
deleted file mode 100644
index 95e88c63f1a14..0000000000000
--- a/deps/checksums/Statistics-e9ac70b760dcf87b77affe6c068548a3325d6e2b.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6354b1e84d7df1fe8d7e1444181497cac87d22d10a2a21b9f7fab748c209bd9aba64f2df6489e9441624fcf27140ccffa3f7eabaf2517f4900b2661be0c74ba5
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index 786085c82769f..011b0f6e4704d 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,34 @@
-blastrampoline-d00e6ca235bb747faae4c9f3a297016cae6959ed.tar.gz/md5/b49ebb89b7f9a1eaf85217c4a9dac744
-blastrampoline-d00e6ca235bb747faae4c9f3a297016cae6959ed.tar.gz/sha512/ac3a767fdb03cc0a9e12ae6df31229e6c5050f2b7ccaee47ef14d6bef34b37a20c2d79956b73bf74d72af1f01a3d1316931db264e1b00cb6cadd57fb842e6f2f
-libblastrampoline.v5.4.0+0.aarch64-apple-darwin.tar.gz/md5/9c084085ecf2f263164ab3557db634b7
-libblastrampoline.v5.4.0+0.aarch64-apple-darwin.tar.gz/sha512/c8233325dc71582efe43a741c7e8348e853e02d77cc1296261abf12027008e1b79ec369575638c775944ae4ce9cc9d5d999e0994b2b2c7ceccd956f1c49d8f75
-libblastrampoline.v5.4.0+0.aarch64-linux-gnu.tar.gz/md5/6bdce10e27dfcd219d6bd212ade361dd
-libblastrampoline.v5.4.0+0.aarch64-linux-gnu.tar.gz/sha512/003a5afbc5f92ec5da518fc33f819b6c063946f75aac347775582266138a0cbf22839e0f4f5b13909185e8a2643d51db434d0d325d2898980386d8c24acfd8e7
-libblastrampoline.v5.4.0+0.aarch64-linux-musl.tar.gz/md5/048ff56f538d56f5cc2ba72c751a1bfc
-libblastrampoline.v5.4.0+0.aarch64-linux-musl.tar.gz/sha512/0fdef61ee05c77722e661c522341531eeb3882e76ae2ce1add53fea813a19b70f1cd50a75643c3324aade594dfd7f5b269f43be58e4ef3f560340f9fe95cdd11
-libblastrampoline.v5.4.0+0.armv6l-linux-gnueabihf.tar.gz/md5/332f6857be4f7840bbb03a78fe5b50d4
-libblastrampoline.v5.4.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/228a9b5fe1ef57c0ac4d3130de8bce184baac702c9df02fa4706558c23973ec8396db39d0d0125638bd330065527c6fe1c205e3a095b401c27900c21e941d1c3
-libblastrampoline.v5.4.0+0.armv6l-linux-musleabihf.tar.gz/md5/5f7008ccf0155c164bf8eec5a184be1d
-libblastrampoline.v5.4.0+0.armv6l-linux-musleabihf.tar.gz/sha512/0395ea3aec6ba4f4e3ce56e152a7d3db78b937a8bee603ed84143c3f35b76453ec3650c733ffd79a3b59424f5196218b33a45939ea176e8666cf4d44593e35be
-libblastrampoline.v5.4.0+0.armv7l-linux-gnueabihf.tar.gz/md5/f184171d5ce4fa9238e11478f54ad6c9
-libblastrampoline.v5.4.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/3e4406f2bb09dfa17b926a83246c45107bfd72776f3d22320985c3f2c58cdab78c674d759e74bd2725e04c7e78263acfc47879598db7181660145a88af5e11af
-libblastrampoline.v5.4.0+0.armv7l-linux-musleabihf.tar.gz/md5/c6996b382b042c87f714866bb1d2ce37
-libblastrampoline.v5.4.0+0.armv7l-linux-musleabihf.tar.gz/sha512/e5c69979743f228ed61931b5e1f8130c832f925a155f04de751ae817c1865d759999bfcfd0d2646ee192de3dba0a8d25626f70be7abd83d4a07c11988c6fd57c
-libblastrampoline.v5.4.0+0.i686-linux-gnu.tar.gz/md5/155937c2f2e9650654d93210e82e5b9e
-libblastrampoline.v5.4.0+0.i686-linux-gnu.tar.gz/sha512/e7e33da75b5076ac7fbdf1f882cc77244b861f5265bcb4f7aec28e578ed5af00d08f40513fa17dd62d15e7e911a948047b45f32e31f062eb4ef07bee4ce02010
-libblastrampoline.v5.4.0+0.i686-linux-musl.tar.gz/md5/206d874fbc0a9590390c5476edfc877d
-libblastrampoline.v5.4.0+0.i686-linux-musl.tar.gz/sha512/6f6dd3468f788d717b0ee58b189172950892a84e7379379863ea9d5b316901084fcaa325b8fe7c472d16f08552aa5ab89ccafefa30c05a362ffb44330f1ec383
-libblastrampoline.v5.4.0+0.i686-w64-mingw32.tar.gz/md5/9adc6d8cd38f9151feb13b21a28aeb7b
-libblastrampoline.v5.4.0+0.i686-w64-mingw32.tar.gz/sha512/13f7a6f14b0dc7db29591d6d9bbd3e41e72b4a079105987540d3393203ed487ebce32d21569c3724df29332006fc32d915e54055f99ecc74829717ca11bcafdf
-libblastrampoline.v5.4.0+0.powerpc64le-linux-gnu.tar.gz/md5/e9dfb0f5a0e564231a75b3fc8a44bc91
-libblastrampoline.v5.4.0+0.powerpc64le-linux-gnu.tar.gz/sha512/fb4c1f953728acf6db4a6a2e93bc5ed8242285cd3112ba1921432bef045b03a375813c34c0d071d19508c226669774afe640acd7d85b10de5176d864eee5f73c
-libblastrampoline.v5.4.0+0.x86_64-apple-darwin.tar.gz/md5/c092da8bc56af60cbd4afe5565c471c5
-libblastrampoline.v5.4.0+0.x86_64-apple-darwin.tar.gz/sha512/3fe0aafcdc51c5f2414f889a4f0970b0e302f4d1f37b36bedd094202ae9b7ea760607ca4f80aa815ca2346f526202ef932cd7d3f43522fc4a823c3db6b41604d
-libblastrampoline.v5.4.0+0.x86_64-linux-gnu.tar.gz/md5/e05d2295208649a55620681241f9a6fc
-libblastrampoline.v5.4.0+0.x86_64-linux-gnu.tar.gz/sha512/2bde6e6b80eb80dd78967dcf6d946b2397b3129b7c6de6fbab2168c23293770ad3d2bbc269c403ee26ea6d752b91eee87e1c651bd7f451f62a8a2acd68196db7
-libblastrampoline.v5.4.0+0.x86_64-linux-musl.tar.gz/md5/4b374750eb2d42a55a39d28cdee70d6b
-libblastrampoline.v5.4.0+0.x86_64-linux-musl.tar.gz/sha512/314d877497462d521fafc92299f1e387a03193c20050da529f3e3d02da9f55063f45883377288750d7b8cc64d8701c94db79798a7ef298a73051cd51f21104be
-libblastrampoline.v5.4.0+0.x86_64-unknown-freebsd.tar.gz/md5/b5549fb2b1ed82ab95b0636a1eb7682e
-libblastrampoline.v5.4.0+0.x86_64-unknown-freebsd.tar.gz/sha512/b94975cef6c1ea26e7635bc70e51a4c53ad1c4610322d0c15841ccfb7e996c8e55b5f060a5ab318d6dda4cfdb615d9c77848cb13bd71c03df8c90c6ac717ff0e
-libblastrampoline.v5.4.0+0.x86_64-w64-mingw32.tar.gz/md5/00bd607714c91a2cbc5e2a2f87e6d5e1
-libblastrampoline.v5.4.0+0.x86_64-w64-mingw32.tar.gz/sha512/e75a3780f65963e6a6baf68af57d7260b57052770d6ac3608971134b449d33d02a0be6f0edd0cddae1645ccb0faf6c744ecc3ff40cf7bcfed8acbf05f756013c
+blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/md5/0478361eac783b99002b1ad985182f05
+blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/sha512/2489ce5770a9861889a2d07e61440ba4f233a92efd4a3544747f83320e0e7a229a8fe01553d99f5f1d98713316f2506daf0adb7d024a46e32b3de1bb2966d637
+libblastrampoline.v5.8.0+0.aarch64-apple-darwin.tar.gz/md5/a28837b9838fef2b3831de3278ec7949
+libblastrampoline.v5.8.0+0.aarch64-apple-darwin.tar.gz/sha512/111ac2fe5f8f8102f2f7c9e9e6aa1d1a12d2db941238c949ff8e64b30335e8b2f6ecce0d5f577879c231eb839c06e259302b709f3d34e94a97047bfa984222f6
+libblastrampoline.v5.8.0+0.aarch64-linux-gnu.tar.gz/md5/9e781a026e03118df81347fb90f10d45
+libblastrampoline.v5.8.0+0.aarch64-linux-gnu.tar.gz/sha512/89469f32a666efd46437351a8fb16758c35e5aecc563d202b480c10ddf9fa5350a5a321076b79b0a1a07ec2cea0b73aa5c28979cc382a198fa96cca0b5899d25
+libblastrampoline.v5.8.0+0.aarch64-linux-musl.tar.gz/md5/b7acda2fdd157bbb183d0dd33643beef
+libblastrampoline.v5.8.0+0.aarch64-linux-musl.tar.gz/sha512/cf4125a47334fe2ec0d5a4b11624b12e1366ec031500218f680ad5a53152b9d752c0c02a0b92d0e07f3eb21f2f8f58d0c587438a4869a72197bbd5e91531369d
+libblastrampoline.v5.8.0+0.armv6l-linux-gnueabihf.tar.gz/md5/eafabd99fb1287d495acb8efb8091fde
+libblastrampoline.v5.8.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/63ff4e6bc400fa8ee713a1c5ae4af0a8e152d49860c6f5e94a17e426ad9f780d41cc0f84d33c75ea5347af1a53f07fc012798d603b6a94ea39f37cfd651a0719
+libblastrampoline.v5.8.0+0.armv6l-linux-musleabihf.tar.gz/md5/9788f74b375ef6b84c16c080f2be5bdd
+libblastrampoline.v5.8.0+0.armv6l-linux-musleabihf.tar.gz/sha512/f00ebf794927404e2294a2fbb759b1e3e57836c7f683525fac0b2ac570da2c75904e43f154cf76fce310a624f9b35fbd40e6c7757882bb6f30db790f4221a543
+libblastrampoline.v5.8.0+0.armv7l-linux-gnueabihf.tar.gz/md5/4492bace63d8274d68ecdaa735e47e99
+libblastrampoline.v5.8.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/8868283e6c5224b80145fdfd17f13f713053ba94e49c170f38f0cbf9f794185d7dec9c107ce65dc76121d3ac5b21d2f3857f619d8279bede86a906230ff59a71
+libblastrampoline.v5.8.0+0.armv7l-linux-musleabihf.tar.gz/md5/d66b6ed1d4e5f6a130f36791063e651d
+libblastrampoline.v5.8.0+0.armv7l-linux-musleabihf.tar.gz/sha512/414ad07574a6e9aa670bbfea13eaea11da13129c9ccb4193cad708014c31493ff10ff427558b90cb16040fa64c8a325c2e375e3310c39fb37bb3e7fdb6a72a5f
+libblastrampoline.v5.8.0+0.i686-linux-gnu.tar.gz/md5/595199a3a01174cfa4d9ce3407bf30dc
+libblastrampoline.v5.8.0+0.i686-linux-gnu.tar.gz/sha512/02c3b0c3c0a411d5090a081f3bbbe38aaae40eaa5fe63d0690e0582e233cd9ce76483922557d4f65dc457e29a4e84d86ee5af20a60b082aec7bec4ca8607c1ca
+libblastrampoline.v5.8.0+0.i686-linux-musl.tar.gz/md5/5832d0044842cb84f4e1e1b0a04b8205
+libblastrampoline.v5.8.0+0.i686-linux-musl.tar.gz/sha512/d28954d0feef6a33fa0bfeb59acb68821222d36a4e353eaf41936ee2c9aace719c2d0f0b0f080eafe2baecc67a29de4cacc0446aac776bbb615c4426d35c9c8f
+libblastrampoline.v5.8.0+0.i686-w64-mingw32.tar.gz/md5/89c07640b6c7ed719199b0cd0a570961
+libblastrampoline.v5.8.0+0.i686-w64-mingw32.tar.gz/sha512/71241e83501ed473af0bf60a3223075e22a48788fdcf0ad5b2932861c89ec0741c61bf6a04c8a26e68b2f39d360b6009a79ea2502b5cccf28249738e7796be89
+libblastrampoline.v5.8.0+0.powerpc64le-linux-gnu.tar.gz/md5/5f76f5c6a88c0caaa6419ba212f8cb94
+libblastrampoline.v5.8.0+0.powerpc64le-linux-gnu.tar.gz/sha512/785071e682075b2cebd992394e66169f4ee2db3a8e23affb88dc05d9abf55f49d597b2a7400a13c83ad106ad825b5ee666b01f8625e51aec267132573273991e
+libblastrampoline.v5.8.0+0.x86_64-apple-darwin.tar.gz/md5/21beb51d448bd22e4608a16b3f4fde05
+libblastrampoline.v5.8.0+0.x86_64-apple-darwin.tar.gz/sha512/620ba64d93ef416e483f813617aa313957282d8361f920b5444702fa911ff0051d1f8a8814b5fa0b082fd4dc77d96cb8b763937c786959bbc97cbb6131617152
+libblastrampoline.v5.8.0+0.x86_64-linux-gnu.tar.gz/md5/14c1045ba4d400f490ddea5343a46f04
+libblastrampoline.v5.8.0+0.x86_64-linux-gnu.tar.gz/sha512/0fdae83f4df93b28951521cf426736367f568c1e76fb68eea42b045cc9a288b6836abb3206a6d61e4f88adcf198553e911c45231aecb0f552e06de28eb3bec54
+libblastrampoline.v5.8.0+0.x86_64-linux-musl.tar.gz/md5/59b110676fcb2fcfdcf670a5d435d555
+libblastrampoline.v5.8.0+0.x86_64-linux-musl.tar.gz/sha512/57a5022e9fabc0637a29f3c32f6180cb4f6a90282191232e299df6cea5265b535e4a0af4fde15c8fe80e5a59edea0fae96dd3a510f5720ecd78e85a2a9ffbfe0
+libblastrampoline.v5.8.0+0.x86_64-unknown-freebsd.tar.gz/md5/cb1c14b4f8754561c5eaf8502582f09a
+libblastrampoline.v5.8.0+0.x86_64-unknown-freebsd.tar.gz/sha512/d3b19a2a9b3dc674119590d920a2e99705de823e7d01a210485b31f8b1ce59253c4a70f2d8fb967f7fa05efb6ac376d94e79ffc6848607a366b2f0caa58b4208
+libblastrampoline.v5.8.0+0.x86_64-w64-mingw32.tar.gz/md5/34fdc53745245887f968f420b2f02ed9
+libblastrampoline.v5.8.0+0.x86_64-w64-mingw32.tar.gz/sha512/bbf478736b7bd57b340ccd5b6744d526a7a95fc524d30fdf9af6e9d79285641be26fae5f9e5302d71a5be76b05c379e969a829e259d8100ba9c6ce202b632b3d
diff --git a/deps/checksums/cacert-2022-10-11.pem/md5 b/deps/checksums/cacert-2022-10-11.pem/md5
deleted file mode 100644
index 877aa5a716378..0000000000000
--- a/deps/checksums/cacert-2022-10-11.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-1363ae92d22e83c42a7f82ab6c5b0711
diff --git a/deps/checksums/cacert-2022-10-11.pem/sha512 b/deps/checksums/cacert-2022-10-11.pem/sha512
deleted file mode 100644
index 5c7b990cb9e4b..0000000000000
--- a/deps/checksums/cacert-2022-10-11.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fbbd8d33932a5d65dd548d91927fc5bac5218d5a44b8d992591bef2eab22b09cc2154b6effb2df1c61e1aa233816e3c3e7acfb27b3e3f90672a7752bb05b710f
diff --git a/deps/checksums/cacert-2023-01-10.pem/md5 b/deps/checksums/cacert-2023-01-10.pem/md5
new file mode 100644
index 0000000000000..92063050b50f3
--- /dev/null
+++ b/deps/checksums/cacert-2023-01-10.pem/md5
@@ -0,0 +1 @@
+e7cf471ba7c88f4e313f492a76e624b3
diff --git a/deps/checksums/cacert-2023-01-10.pem/sha512 b/deps/checksums/cacert-2023-01-10.pem/sha512
new file mode 100644
index 0000000000000..d3322e5890f81
--- /dev/null
+++ b/deps/checksums/cacert-2023-01-10.pem/sha512
@@ -0,0 +1 @@
+08cd35277bf2260cb3232d7a7ca3cce6b2bd58af9221922d2c6e9838a19c2f96d1ca6d77f3cc2a3ab611692f9fec939e9b21f67442282e867a487b0203ee0279
diff --git a/deps/checksums/clang b/deps/checksums/clang
index 3cb8e59b6791a..c16dd849e6fc5 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,116 +1,108 @@
-Clang.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/298e05bc189e33877b76a7a6c9ed9478
-Clang.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/c460899d649982b6cc6c79ccfdebdc98257f7077e2f2f04597f86f3be98f2643400258035614ff7d434639c5861671ca1410945662d00ba1be8f3a887e2e0f59
-Clang.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/7189c71fa493fa40253a7b0644869c55
-Clang.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/5bca9174562f8f1388321e501c9ae36389a7b07a112bddac3c25184b535dc5324b8c7c56f40c5b6a31772dcc87c411054d6817d9348e2d38375887c339426bdd
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/a170c242afd649b37bfe17196baa1455
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/e77f0a8a268297d9bc7c164f7c89e351c1c839fc7ab52265552171d7d73b0c974b8a1c2ee200d7773a331293127b869d635b7cd6273e2db58bc4d60bc163296a
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/8234f867f72c39bd04cd47a4659a22a1
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/accae8bef10e8499b86073d0d50b5dbc2accca7a5a0acccc214d55049da882d705ffa936627a339713fe1aab29e9078888fd474ee41c820316efedca1f35463e
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/42423636e6a7a726477cf86399227c88
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/8cee8754ac597861ffd54b7a0f261efbe44ed3d3ed56711355b3c9f14a21fa0883b5665e4f55f82eabb2eea20a03ab738eaf32589322dce06f3788fbd943ee39
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/0151cd8dcc8784154a51a333aa1dc4bd
-Clang.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/b2cc6fbeb2ff4079596c09ced2b856ec7be2db64420b09f3b52b80cac1ba900967af611709834940ae3118adf82bdbcb2d5a90d8b9d5b5a1c1aded8e1b604dca
-Clang.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/8ed62616ef3e1a583e9095130ebf2ce8
-Clang.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/0ffdae3ec39068e56749da47636dffc21a601df9c2bfc7421c97c6924c6107fe10d2eb641b660fde50ba5fc0a4ceb922041a0adf32cc8aa553d0ab9aa374f11f
-Clang.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/a3518f44e475e1ac8b9fcd6fdf470bf3
-Clang.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/2fd3a8524f627a4d7967b6646a9ad9d973301d060883e2b488a67c2b4bb3242c44e46f78a63257cabbca000690e3659e7420173a40af6439106dc1806ce9cfa4
-Clang.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/598475afb9320f81dffd8c2af89564b8
-Clang.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/6a360ea886eca3c7a60bda7a41b305afdcef00593f7084c50a44456b1ccd079c2d6990e90c081c716eafb2c5226d880a9f1bb855e61941fa4acd0590b63dd2fd
-Clang.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/22bdaa9b14a7ab40cc0de4c5fb174f20
-Clang.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/96f45265f947357cd3f79d42bc05c891570f62cfa4a84fef6c99a8db14a845a444d857d4e84985a2b741377798861e714252b61f7676269b98caa5e304e63ff6
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/70f47a6695b81d157c87668ce3b1e453
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/e1ef1e288aa7fad7dccc2bc4bdd0cbef0d134e97b41f85e5e85fc56e6a276beb358aecfe0d0791d745d2e3834ffba269b7bb289716d39ad3260568cc10e9b3da
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/3141a42822b55f86d0f075ded553be8a
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/cfae9ff35702119c0cc5de6cdde4640629d020512b086280c26c974b71c024f0555b910a29c95f00a9ffb602f12512f21dbaae10278dc15e6ff6546f66ad1a97
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/8b1f2da8ec4768c9426b15cfeed00dbe
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/ca4efad2aea4339fda792e8ad3cff7ad891ff2ed4a2647d466a6aeab67d61054da79fb39c1f3bd0456226dac5eb8ef1306ff70f795e21725d3611846bdd124d3
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/468e0f231d30f64f40866a719d281e5e
-Clang.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/91e205aa53337a150d6c3c84edfb06e408aba0c39843db5c3defa18c6684055139c4c40c7714355cb6d7530d40c720a13d59e9a7f99ffbf2ee389ef090687845
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/a4006665b5c955df38a251281429dd94
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/f67892698dfee1861c0837b6439ad897502e3441f534b4027eb5fda6a73eda616343a8d8d8273f08b7cda0ecebf88eadeea1c2b9df96bc807767dbb455144e17
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/6662732339b55dd2aac965b12db07969
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/b66ca56ce67b653f903bf85a274b11077f4967946719b71a306ae249867cf22d2f22e8fe7adf67be29b4cff87ca54a8dc910aebcc8325f9188854da5907b3b2b
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/392e4b35796cd085840345d1078b6c43
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/78aa79f0ede37280acd504ff32cad3ea862ee20118e00b65c53d6eb2c0a99d307be7961abc3e53b01a4e44a4a26541c62bc3ba6c1213f17335beee71e905f6bb
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/679b0cffef21e3e8a3ac41f9c0d1758b
-Clang.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/3df8edf277753d5a27826d0614a7e359d44a48f5b086641998d9b0b1f4bf575c03cff03ff59b7dc3ca773af3b91a487755561a5964c7884896a885b40d5c40f3
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/5a7526421f59c8626b84fbe3c7adb686
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/ea8230025f212b081696dcdd094c7f44f86413c5b6547a31e166b05e119a82fc4afa811fb6907037c07de309f21e1b36c266a65d6f4fed49d815036ff578bcf1
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/a385cf886ebf1036e465c54575ee45a8
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/2c53f5c1bb19f33764c0c65d7e337fa0f96213fd98527db1680ab2f036ccdef0a51c008667892300413f7ede68b7220da9f36420c1188fb58164497aad41e22e
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/7c19b127843bfad76f981534395e9b2b
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/d5ebc8f781f1147a5e459856a2888dc0525f1f63f6f3e53599faaba20c5b6ef75ca01261c8cf8c6917178658e2f38a70720299c5bbbb316b4ef631a8129ed7d0
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/5877d43223bb2629934394bcc803c580
-Clang.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/1ebeeb1f491463acaf12d2f1ba10220ed215df80ad79e392f99989466f34d395fdce87fa3502bfdaaca1e4feae7998d861bacd4fcfc12b5e23467d1608cc27cb
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/b277b57e63f1025bef77786400c30909
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/fbf0eb469dd91f9d480417e86ce341215758c48adba98b4eb3b53d9321e2ed81cb551549614da722bdf62eefb8145b55d160a2563cd4523c43ff71276fd45f45
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/d973a878a00a38fd892c9e697e4aefac
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/6557bc7ea57a09ae6dca45cf90f925983f30afabe4af597aa2a397a9a3182b61d0408bf16c4cee5ccab3907a644d6ad5542d30fa28cf3fb5b790f66f43031b91
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/79284554223c4866f204bb7704e99bfe
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/b78b1c7234096d88f061253866300a8db94928ddb8867d9d5a03f263c32fb3ade36f77c351b04ef3ebfd07131e9dfef7afa0d81cf5cb88e87848cbea354f15ce
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/e9235c729a76570ad2a919f2c4cb2415
-Clang.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/651b512a8cae627cb30575f0907ad1488e3838fa106fe583962e8399883b5b02138e29bcfdb30456ec3e30360efce7283018005ac6352fae4e2564db3b50aac1
-Clang.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/7a7e56421b13e36ddda5119def5cf104
-Clang.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/fd45e46f44bfe2e6346e4795cd1e08bb775c243ba015394e5b9acda2fa0db704cf96191a79cd48c5bbecfc87118c6165ddc3b74f91ef1fa651e71df6f610042e
-Clang.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/f59a37a860a38dbdd6f422d9eaf24642
-Clang.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/4e1eca8784e42ff1ac1fcb810579746f49bd54559ca9cb20776fb84e7e42f5fc924a975d4941c1e061b31901f3f9522fca3e8bbeac45fd8717556e5e70fb4b05
-Clang.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/e3c0338c9b592c67562afecaee5eee8e
-Clang.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/0dda3627cfec247825103ce25d51457893eb699a6176151dbc0874ef1e087dcbad98790ba6400e2a8c5187b742d2e7a2671b15f3c63b9c17b9eaa8777795eb01
-Clang.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/2359790c7f6de7fbfe8208c3f3cddf34
-Clang.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/c4032bb8322f9daeb7d838ca66868bd5487242ac2e3854d47a789f17211a9255efe79e3893511099ea77f61e85550b56b8c2c3b206fb632c95527ad391584e51
-Clang.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/e8321015b108eace4abceedc289bd6fe
-Clang.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/a760c2e1a6ff33225a42ee4beb566d38c78ccc333d58129635e96009ef92d8c96740e164e0305215542bdc3ae0339e698a899c7cc53c260411f1ff22b60d3dde
-Clang.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/985f7225c38a5e7f68d759b2929d3fa1
-Clang.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/f74921af797ef0d3d1ec394ce1b672b72d4b25225207f2c7f7f227f0f033647afce139f710e6d4d23d474528d9f1e223f286d0c2b1b1bdf82c38b273bacc838e
-Clang.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/9ab44ae551b230e83232de13e2a63203
-Clang.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/f017dfec0f088c7e0fb714164ca4e4f73cc290e8bfc4fa1838bfb5bc8f13d2cbddc1294863febefbf83dfdabf72b6b6493cf8b816b6a7c25d6a29b658d757e80
-Clang.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/d772e714293d4f4a49652413783ad4e4
-Clang.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/b45610cdb1907d9b1aabe6dabf2a6e7ee1e2e796caf5c62f504f17f098a61d2913b02f30570bd7ca62005276c2a2861f6eb347bc93c78e4878e433f13eb187b8
-Clang.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/a91febe6ea0dc6e45a1972084cfd9b55
-Clang.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/cbd826d9843db662e5ab74172380a7d1000822c9c5a821fcc54746909dca2fcdccc7190f723e6aca60d73fb204422c95edd01bbcbe0b355d998f84f40d899ccb
-Clang.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/15dfb79ac059279303528fb9bd60417f
-Clang.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/15236f71d74448a81f08e9cd9ac503e17e6e8ef679b11219f6d42b8b4a74a8fcb0093f3d3bdc36b8041ec67f1ab30754dc73bb54d498ee3ad52c519cd260cf09
-Clang.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/035734a134fd46a5fe558f264f838299
-Clang.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/2cb35b0907129d1d8d6468d8f9a981839afd829cd16fe5fb539fe50f79560e852e5f0873b577ef0827211a51b07e26bd6090c98cde24fadda58ed28735095fbc
-Clang.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/262c482c5af85f15cacd7d63f645589a
-Clang.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/ba70b123c105edd4ea1906f988582c8daaf0e625d645ad881976b68b98cd57717143f4b4bf35c3ca90f582ebfdc07c1ca208aa7c7aec330347f1baec74a79262
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/935168f2b01f3fc9ab11396ed2d4a0bb
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/e602720c37e841f67ce2810908943a1bb68d59a2f17ca0ecf772de4a94880459a467fff263c15e310189c12bc573d1d3d2a0264274965d4c5f2957fd36daefee
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/76275a214835cf423b53bdb2d5d483ba
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/e231f87d73f32b9f08c1dfc5a7f6407b6a214b28c77d4604c1358ac0ffccb7391e005e4f4e88c03dc5fbe7decac6df77e5d9ec60cdfa18f47bf51c70b0ce3d32
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/dd6eb853ba155972322f4f92cd019146
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/c48026d655f5a8f896866299e8cbd4841bf3a1a2d00957309cbcdcf137bfd6e41bbbd8bfaae51265127e7922c3384512f6c086060e03e9bb1bcd22586969c9db
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/a8d6871f004cdca531abcd14a783f418
-Clang.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/e9e33eaa5af1485715fabf281cdf4c45f985904012db4f31a4d6ef70611a2ddecc13cc0dc4deb1ed75a6dd4da4b29b1cfae761c108f661e9df46f04ad9e011ed
-Clang.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/12d3c0d8d84a41630198eb69a06651f5
-Clang.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/1ed2de110db980e6daaad2e821fba6653cf1e72ed3b69d41a423cd597eac5ac18f88cd83c2afcc000c41effd268bf8b545c292096449630ab2c091474be42261
-Clang.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/a5b6ba0d493b4542e3c5374e982b60ab
-Clang.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/d65a6a6adc6a47be6dbc53b1b74e8ee0065cdc5e593a99f9fe40fdb8d23741146720c89de4dad9388dab801f4797e1524a39d778400e622bb9c03f23622d0708
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/c7f5a6766b5f9aeeeff2a10748b35627
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/1717d54c6328fd27a87aab0f120d85c066e74cc19f6e77f57f138d75d5da02ca9fc9956e620173385b89badfad12dbb6d5b90d676d000323024060b14a4a2212
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/3e415c55a918b9fb20e7a9159f2a302f
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/be58bacc8fcd1f0cff745d8c7e555feba3e059274925b9361005603f93348d8c2d88c7f9249bc7d422e2bce52cdb280a2f1c5dab93044235cf8959ccfb193f95
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/65b32b4dc28112dc57a1d62f5cd8740e
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/497eb5cfc826424468b3e53af7be3c0ee9c1d7a9ee85f30660dffbc728319301373617f9f7d9d09a300132fc93df8038fe2f099846d6b55ad07263afa2334b96
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/066451584d69234c5096edf29421a713
-Clang.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/1e3c7dd8387cc5e5e0b1bc248c6d4dd7ca67bba2c681708593f395158546a305f9f7ea9a12be35376f020c768db834a3458625abe7c7ff3edfecb3b1425506a1
-Clang.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/51d8ed30230760dc228e3f4350cf8527
-Clang.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/37b0a003eaa618782f3ecf660829a1da8ec5d06bff9bdefdc6371a99156f0ab9778cc841c03b6ed1cb6e97e66123ce9f6d91b8c260a27f55e1d5d3371869d45c
-Clang.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/4cac304533ee927f818f6f2e8804c6b4
-Clang.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/7f642a1da1074683ac8023d1f2bffeb7ae06d09bbdf31d6cfaa089ba44c459f71326585fce3587f0b1c98df122f635de46b3a2dcc9cd245449e453d47dd3f0f5
-Clang.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/33daf6fbfc468f3e0b013cc43b1482ba
-Clang.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/1aae2a79622e821e6a8743786a0191ccafa7fe11c71741cb8cc41029890def518d41973f74568c6d8d4a6c8e3ddb37cbb29413c79517e4cc0458c2b636f92171
-Clang.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/debb98a67dfbac8e7f57ef6ab242816e
-Clang.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/ed85d0a7d880193421f132082f46facfb9750da60c7860c611c37947212b7c7bd5393899906b0e21f58d98056f8d0611dbf25e06c6d6021acb4f79a7b6956100
-Clang.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/1456cf6c22c78537bd3feb556319a05a
-Clang.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/0cd46d6e0e135db0f2961d614faa59b8442e56f7507f07a27675dd400078d6556060ac13ad40b55f41393ab5be6d1db027038e97d3fd32c833ca1ec64ea3dd4d
-Clang.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/8b3ae4d75b49ce372f64011af803c32d
-Clang.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/2de4e4521887d6dc951ab29bd25cbaf5d8dbd55630b63682acfb0929ea8a378e051f61f3d1b4cad127b8f67f65848dfd5aaa2ad38dcdee39a0c3f0d0a2962dbe
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/89f575a07f9b42b659af895d66d36dc0
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/cd3a201eedc0c685d6f11537b050bbc8aa29583391790a4d54ba8662a4ddb27574bc588bba52cac899a45733807a879d57f1caac380c0cb6401a7012602aa345
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/5f3acbfc31fc032a18799b9738643f44
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/ee631163849ac5b8755477c0b485b3bc9a24ca07270e68b374beb5c2ae10aab1a44586ac4f40fcab80a08a3fdccee66584688e98859bf9a07d23c1e14e4a4ca6
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/450e2f2c49f97fbc0e18ab3e0daa183d
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/4ef4adbef50c5bb77699c8aec3f29a8faffbf5114c3b45e6530b4180e97443133d19f02358de99feed58cee37c88830c76600d2bc81fdd0318c3f41540f3190c
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/ef9b53f0fbf0a71c45277a49104a3939
-Clang.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/326db9bdc978940e36580e3478bd64473bcf157840c9d6eff67ebc1f2452e00d41acc1fa6489c7ac536b000c3c6fa2e86198077d3f95bab32d71cfde6fc1a368
+Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/5dce383804bd3d404b8a1936c12ba457
+Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/5661a1cb2044ded03566c9316978595d692667fbc4e951feca658f9986a8557196557b05ccddf1b00b818aac0893696c3bbbf63a35dc9ed7df146b4488529f6a
+Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/549cbc6fa28ebee446e99701aded16e8
+Clang.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/55eea0b514aa2e43ad2f373ad25ea4fad5219ff1cd8d5b639914c218a0a454ae9b27b8d022ae73771d8ec89fa329f5bfde538817653cc59e569b600148d56842
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/ac3cd40e47702f306bc42d6be5826029
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/73b217caa53702bc6fbbb3286241b7a20c111358cb9436283e9f7f9fec90436d5b54cb4c332afb7e447867a40ba46c9e3b93464acefbca7c0bb6191001525cbf
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/b1a656501493c15b98442bde584a34d7
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f424254cc887301d4d5b04fa71e2c7da6e4d561725d5b06278925e05be1c62a74769f19c37b431c2e2d73e7e5129acff07ac54a0b7fd381821aece27f260c116
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/969170b1a791e89a0094154f34023e86
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/d6ae356c9b1b80cbc5cea4eb8632b77ab3ce0d060b103cec4a5f1c73feaaf60688c2253034b2a6e132273fe04c803de93f415cbe2ef40cf1d6f6a30dcfa03af3
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/22d599b774af41dcaa54481cc6325b1c
+Clang.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b0f257d45f1a920f46b18049b762b5a3cefdf8683c4dce46f48ce2993e6a622dbdfaaa6cc9a9cda8a7f047094a6f804091d1ba6c83e26cefc38fbd1ca5c0a536
+Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/f2f5064217c14700f0f933b704fff233
+Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/2284978d8cfe22aa49b1f3b161c75cb0c9d43f84674ba58a1335edf818b91c6ea1684a9c3580f2e1918fdc050a624c698a4e87dc163e9076b9d6c0023c989d7a
+Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/eafd72ec24ec81d42cb044e4e4d638dc
+Clang.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/bbfc6c9179fc43a1db0ad82fc8c1fcc8ec8ce94d5c32b38cd1f88490dedc67953283995c0dd4db7262a9206431135cf2671c6ecc6580da65ba8ff4ec0323ab64
+Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/0432eb21283647995e35bd0d486148ab
+Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/561beaf45770c06b35bc1626e93a0cd89874026a8afa22017b40eb1e6ba306b05305619d42a4a2145c576b1dcc77ade80cd0bf0e0237761f3517f4db402f9b74
+Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/653b9b87f2573818d66992f969f7811e
+Clang.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/517df570b40b51a4f4cbcecbdaacdf0b592fce66ec328139d95eaf8b63c89a1adb41a9cfe4982f5bc032fb29a6b967dc1b16b0eced98cd78756ced36ff2257d8
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/4b1a5cf46925575bbc6765f3336e1cc8
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/7afb23aa5ce823b1f2371e038faf311e8e21c3843cc50a0b1473038cd746fcdc77dede67130631bfaee778c3d42ac1eaa23ec664a82f43e2ad406962f3019479
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5a6200aef0e6660bb156ecf3e53cc3c8
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/0dc564fe753fbccfa03ac94e19828ea5ba2b8b74e7adbe7f501ac8b11d1ed8fd85a65572dcdf957018bfa1be3a6babadb1ec3937966347fe49fb38596a4b1728
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/ad693e5cf8f2583c3311a39c095b0bf8
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/b4e1120c960bd69f2643f185607bb2139095fa7a2f943fffec65ccad9422f2bd801131185cbeea1b75298c64cbf109fe28bae54c1b9917fe1ce8b2248d623668
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c04cd594e25324c42d97739d72e772e1
+Clang.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/5aeeedbc3f0f8327f7760abe3eb6fda368353a7b429e31ff47a7bf42d612d070cc86f0e97031ca0c2fa9f9f448757d59b2652d89bb05b27fd380f2116a5beb6b
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d706ad9062539a37df1e5cedc084086a
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/4862bbe0c71fe0e8cfddade0f881637ae5f58263208e1154f2284884ddf4ad43d76d98bde57904829f2218db21e4fb6ac038e231b682455fa22deeabe65f1336
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/6cc35754a4378902f9f126139bf299a5
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/4256e9c3f58dfc896d56eeccd7495601ec585e208857de14f91e2d95295a4d03009149f49254be40b27affd5a2250323c6d0744e1ddfbd5fb924fdedc8a993d6
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/128bb901686224fb6d32c9689c03cc21
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/b7048ff3d8a3b3e3cddc49b2cd9fbda8ad308fe10e932e8d90000e76c12059547342a1132149254628077d0efc36b34479688b3e9f32e7364301e85a18304cf8
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/d860412ac46bdeef203a578f0bfc5b05
+Clang.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/497fa51af138b3c645d5017165aea6d33410262d2ce69e322b259b34fbdcf52a131541dbac66fae8b9a9027b70771199f9a76869721bf18760065ca7cb3b5364
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/6fb13f1cc2aec210298c3045f8a7fd94
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/085c94f43fb46ecc8cadfed5c5d91978c9ddb9d647eea6e82ff0a548eec53dbddc77721faaa8c43ab5b0674f83fef7aa3b34ba0dc273feabdbb8cb95bf5534ee
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/63d765b268e792df2aa92f3689de23de
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/32b2397bb4b627f0ad9b00838e30c965feca902e417117d0884244a2be6a50e0d4d40e55a27a87616e33819967455f90ae0a4319c2eefefd49b82e9041835444
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c00e93211a1e470f1b00a53e776a9e3c
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6621b3ab12302657ef2441482e8bc6335535964fda472ab8378221e4a9cc0813968589f457e1af66141821cdedbf8eff3080c20105eec810742e5539fc329fcf
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/254fdeddad203954ec0531875cecec8c
+Clang.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/84a19469231a9204a553abc58073e423173ff828445634501a61837c0e249ed003f9051fcf1da4eb16201f80d755e7bb4b7513536c749eb1e7ea78c7ded59945
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/0475a3e401b59e1a34dcbd9d9b980823
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/e0c9e1e18cc4f7106effaeb04e0e6f41fe8ad872d67d3d0da928ce36d1bce6be3d5231d149b2d404b3a4b99900b50d280ac6f7dd8965d30c4dcd3913590144a6
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/08c9e802640582af0b79bc04702c9771
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/d4f413bbb5d5c3ae01cea2b87ef4e46816023fcf4373f00fca13f2edc6278eac651718feea3f8c7d04d3ef82360417dd93b6c7163d54ecd79a3811a0ed588054
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/e7c253db924ea5cb5098be57029e009f
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/373884c492e5862aaff27f5782ba44e202e581e4faeb2cffe14bd696a590c0bc72459fccf3342aadbf189282af0c43efe3db113caa47c27c3ea556f0b3313e7e
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/9c1867e316ac258d9199b389ea053d2d
+Clang.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/9537f285d2a06b8c86ff21aab9daad1ba7e71bcfac55d780c693da8cc250707011ee22ed021e387422543b1e2abbc34de1a7fe49175a27a9c11e43b00549f1be
+Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/f9a13a80efacf45f49d6d7591d2cc3ea
+Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/c7edc55c4f76ae086080ba639d83793738884b9385618c52b30f5c3fadb0ed2a31bbe95ab80c5eee8504ec6301d73fc7318a8c0f877ba8b5f51170de51179d9a
+Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/c9911680ea55b36c4b9f59cfda2a8e33
+Clang.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/9c3722bd402627a4f51b4c98c8712a85031aa79380fe38be0db9df13a5cfabe428fcc7d5d5cf804ac4387d738cad1796bb3f341ebdcf4726ea7f699c6de586e9
+Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/db82d62c163f69038364189a60b18d09
+Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/5dc415426bd99dc2d7b5fc4fe3f2bb1aabc8961fc2b03a2bc14562f330b273c4d1942d7ea5f05b38c76ee753b440cc4f92015a25f9de7980aa3b1d52f7d0f2bb
+Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/67b7194b31f68db8ffcf5ec250948740
+Clang.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/a032c2ae911b6318ab23950ac74dc95f2c8bf815196be62e410b20cd2e271c4154f916388d119ca91c77e07853ba2c56bd5e75a4ce6742d2a7bbd9d3e61853ea
+Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/50b4fa021c1c9b6bdb29eae63ea22103
+Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/40b377df590521e5291c3f2f9daa8d60863c03253b07d0e537288324819a909ab3466b710b10b1a92ccd6f3566702c515d808f03e6d9fe9d01617b9a836bb63f
+Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/d2da27ebc23793c107cb03e176f02d6e
+Clang.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/3ed297cfd3c1ec03cbff10d7b54f9f4a374a9cf8c699287f179ebd5fa000dd525fdbed3c31b59a8ae32ef1c56115c3a84640d776f01c8a92bfae979c332043f5
+Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/aefacc80a5f704aa7498b35dfc2441e6
+Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/76c7fd64fc4323ca442fb0aa30b236355b26328f897ea8cf3e3be029246574d150a9790ae1c45b289e4fc3050fdacc20b6d57b588a707f6d0750e6da91815edf
+Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/59048d333a8a261d079673828c174d96
+Clang.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/bcd0c3c5e04cea24383fc2472f6190e48f8738fb7fa625ad700d1997f8aa81c9b6909af0fc38a2287b80756fbfd01300f3388c19c8df791d78ed913d8d59dee1
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/bb4007dc5b0c0d545f457bdf35e868ee
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2f686bdd0bbcc62aaf9e20d3804c83291ad7c41a0a174516d7a83dee7f969f7d50f19f70c0f35901a3eaa8d54fe83204d832a901586feb9eb8e141631c411b3b
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/08f088ab3498a4f7645393f43098583d
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/faf62bba3a282f218ea569d3064d6c0cefde9232d055fc3a08c994fe424f2b60dd9bbf1655f6ca101da701e3d05bd813695d6a66628ec2b6b4d11b89f773f0e4
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bb8f05da1e35ab358a96265f68b37f57
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/30e3789ccca1fdc5eecaeb25345c30bc4f752cd41b8725c5279654d9b3f500d6e8693c6d1dda8b3167fcce15443682994d66922a17986419eb48bb09970f02e0
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/ea9fdfb7c8d1a9c973ea953d4e057f0d
+Clang.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/5e5d9298a12e65a7e4d401a0e404eb172c96e70fa906096f549e7eda5dbfb294189e4f3526246f28f71ba3bcf35d1bf790f05522150c5877bf8f186d8c503795
+Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/053334d0c5aabaccc81f22c1a371c9a6
+Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/aa8daa99a4b52985d80e57d175b6fc4489058ed84f06fb2fd67710a873d5333ee77b64ed0620df099ed5617792fb3eab23d9cedf3ab3c79f4eb6f04ad1fd9588
+Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/b80918f03dcdfc5b5f1e8afa90dd4e88
+Clang.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/c0397541e06878535b41ba7479b603699d78f1ea3345d9a1146a0e7d17f42078e8365dc71a117981b2d2b25f35a40aeb707ff9ee8a2145303f3cb6567e82bd54
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/78b9e190d5cb7e6fb172814eda2996f7
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2c9a764ba2427faa8e67285205dd1b8c211665046c9a4a19aea02de46d02a6d4287467bacd1260b7996b2b85d3e571e750d92f02c21b180abe37709ee9da78c1
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/ba6dcd205dbd7c0301855f2a892c6467
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/9a98c10943a8abfbe92b151f184370d21a10ce72afb22f131bd0522672c65875868357f60650122e1a2cc91254adceaf8044de4533aea08c4df400ded8c01669
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/ce62f8e67b89c612eea35f4ba0e09d45
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/9c3afaf0dd338abed0631b81d5f6c197b5dff6aae637996f5bc2f85f2f7dbf64a7a4bdc07dee9ab72abada5be576bb0466550280a9ee9093946a469a2b6af648
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/543ebeb138123ce190e74cf0ad17d43f
+Clang.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/aff131b5d0ed372557e3195e15701543ec32db05d5fc18117c4aee789a5cb967706d28b2dc53588bc7566f3a4498fd9e2293518ff28387466464ee07c10e9fff
+Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/58617f16466bcb1b56b204dde697cd89
+Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/bdc0c52168beabc0552ee941246b1d4506fec50913030965b374f4cedd67d6fd2b5746f04505aa5bbd4e6d61c5f684dd22c3b207e364578fd8538aef8efe0b14
+Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/aa6f0d9a455f5f0109433b9cfaa8f009
+Clang.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/b267bd6291fc5830ffee075af00fed9a37177141b0cdcaa8ffd602e6a8bfc58e191408c3a6a12c0fb3ea7a5d825adf1ef99122399e8246e0312b4cd056d49a2f
+Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/ee2d7c4dc5c95e46c6d46c4fff112e9a
+Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/cd11acb2dccd2ac45a53fc48ee6a58299b5e54e80a5b9747c680e9b068381bf87cd388ee75cb0a51ccb1162ee8af03acd4c3f730a5f5a3ed5f443dd24ee91cde
+Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a5c16a8832f5c28346912f610932ecb4
+Clang.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/91b244ccd569597fe42ec45e5a62f6de0ab2c4da048b8b3ed191bbdde0a8ba5a710054d9f40c31a405a6c494a25c7546748870d1170d76e2d3b22dbb0c618e87
+Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/2d789f91744aebb0deed9b91202c1abf
+Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/fb991942325fcbfa1ad4903db43e81fcfeda5d007ee664d96a0e0d2ee5f04b5767d6ad5d37e0273f5af626efbf1c6fde84d54536b74cb17433d29b6772bcf7bc
+Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/ab8fae829b5822e9123fc3d763d327e1
+Clang.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/1b24b03f6a81fba7400bdaa57899e9cdffd6da7e476832870460a12ab6188662c15a3cadd80ccd7dc0790834aa76ba0df098b400c87fd067eaa9f9fec0b053be
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/d5638f87a6ac840d571a3973e89316cf
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/0f07e9e8dd75691ee73ab0e78a29047596a543c5886a137a7503c916ee6792cf7d6a7f279dbd864a2ad36d36aac422555d408381e3781ec004bcde5525abeb68
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/e777625c3c7efe2dcb029e74ac7d1ba7
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/25e0a48a4d8a2ad7f5f5adb7c30429655ff496e6b5a224fc5707f092233239d4c3f4cc17432de12815e546bb595caf2a70b18ff208a53b9f0236accbd83acda3
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/22e03dc887f6e425f98cd66e0859ab2f
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/ef20886b841ba8b749ffb0c5780a9dc25d5f563ef726b1026ee77607e0572c45b8eb3470e252f882e2c4c23a2159d88ee83d31aae5081c6e4f4c37a61a7875c1
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/5d8f1390ff66b6b357768b1994a43d1c
+Clang.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/5fd2fc0cf888d95c38531d236564109b284f20faed222d1feeab2beae68662073c9c59baee310e2bd67908f267416cded7b75f73e28969e2a16d2fcea0b03854
diff --git a/deps/checksums/curl b/deps/checksums/curl
index 0f235d8238e8e..85974ba0bc8a0 100644
--- a/deps/checksums/curl
+++ b/deps/checksums/curl
@@ -1,36 +1,36 @@
-LibCURL-fd8af649b38ae20c3ff7f5dca53753512ca00376.tar.gz/md5/f082283e6a35fcba5b63c9a6219d8003
-LibCURL-fd8af649b38ae20c3ff7f5dca53753512ca00376.tar.gz/sha512/3bea5fa3fb6d29651daa923ae6bcb8eeb356ab9f2a1f3e005a6b746b617b0cf609aed4cadda4181783959840873c04b18e34e45ab973549169d19775a05ea01e
-LibCURL.v7.84.0+0.aarch64-apple-darwin.tar.gz/md5/0e1d2884864419df574b61a6db15ef9d
-LibCURL.v7.84.0+0.aarch64-apple-darwin.tar.gz/sha512/18986ce04a39a8935d3b2e595e9c7b6ecd38340f1f886cb5b16880ad72b9889a5bba8720c30c2775add115c0385ca1f98956df2cb89cd4ffa92d67e433a8f12b
-LibCURL.v7.84.0+0.aarch64-linux-gnu.tar.gz/md5/e4d57ee8f1304b8fde272a373a13cdf6
-LibCURL.v7.84.0+0.aarch64-linux-gnu.tar.gz/sha512/88ee9129a3053b8221808f977561541be573068c5abf388a78b1c748b6c7cca2cd23f8bfcb779541fc83dff07a7a3c979194359f6cd4d0cb6d6696affac03c11
-LibCURL.v7.84.0+0.aarch64-linux-musl.tar.gz/md5/f40a48d02ee841d7393477ef63163c43
-LibCURL.v7.84.0+0.aarch64-linux-musl.tar.gz/sha512/9998db3a896fa46a51d2da2a07b48470a9719fe301fb0589f04e2bd0e1bd116c5c74ca8f03d4dff6529339fdf68a42788ed33c629794bc3886e5147f51c53eb7
-LibCURL.v7.84.0+0.armv6l-linux-gnueabihf.tar.gz/md5/223727927aff997175d1d8bdcea39c79
-LibCURL.v7.84.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/f856ca8a63f55d455ae161e58cd5e195ffb80ceaeeaa7cf306a3d192ae51a1ebfb93e87e27aa90f513294e27beb8e1358c7a07eb5a3a85d434327b4331211426
-LibCURL.v7.84.0+0.armv6l-linux-musleabihf.tar.gz/md5/efc2bcc500edaaf59542f86119b9a090
-LibCURL.v7.84.0+0.armv6l-linux-musleabihf.tar.gz/sha512/297f2999f1544816e2edd1fb78aa5f8abf9dde9b782a62054b0f61974f3dbde7ae67cf4d8dd63c21082de5f89dfeb32aa099e2228851242c3379a811883f92e4
-LibCURL.v7.84.0+0.armv7l-linux-gnueabihf.tar.gz/md5/e5a0a5b7f1e664675bc2ac4970b39297
-LibCURL.v7.84.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/bd9c602b69841dd1b8625627c774dbf99e7c3fcf951b00299dbe8007e8ea2bf5a312fa34f0be9e21a7ac947332652ffa55fdbcdf21096449a8ab982c9a7ce776
-LibCURL.v7.84.0+0.armv7l-linux-musleabihf.tar.gz/md5/05f04c53e4a04ced1d6aefc1e9493332
-LibCURL.v7.84.0+0.armv7l-linux-musleabihf.tar.gz/sha512/7ea517a048d8d7a940f5e32d1476366d9e63bf0103276c8208cd23e1ae7e4dd70e0acba4cdeafd1e9a5db90dfc213bd0895ebef755ea237cab3fc9d39808c325
-LibCURL.v7.84.0+0.i686-linux-gnu.tar.gz/md5/97cffa9e6e771e5b96d77a0acff157af
-LibCURL.v7.84.0+0.i686-linux-gnu.tar.gz/sha512/84b81c69c021e8aad542c909c81ace41ea96650ef1dcd46b1ef29b683a870abddff96b8d2ecde593c8cea427256dfa194cf5bd4e5b610b0b8ce779e383aadb76
-LibCURL.v7.84.0+0.i686-linux-musl.tar.gz/md5/3dccdbc2cde661c7d868f2bd7d5c0316
-LibCURL.v7.84.0+0.i686-linux-musl.tar.gz/sha512/7625d1ba19e69cce185d61ef09374af4d433730f4908f1ce5da7d3352c96a58e1543dc66a0cb01000c4ced9033e2b2137877a4d7c9f8f0fa551613e436cb574c
-LibCURL.v7.84.0+0.i686-w64-mingw32.tar.gz/md5/bd2b06eadacaf984cc25993c242517eb
-LibCURL.v7.84.0+0.i686-w64-mingw32.tar.gz/sha512/21aee096ff42e3c4dfbf6b8c9e3cbdcc4cae234ac784e871d4ca55424263eb59cfd2b159287861a076373017ab5454d0c9f93c99d87e90f263563ddee28d737d
-LibCURL.v7.84.0+0.powerpc64le-linux-gnu.tar.gz/md5/221f481553cdb28d97a7caa69a895b12
-LibCURL.v7.84.0+0.powerpc64le-linux-gnu.tar.gz/sha512/90caf2fe245a0e1f5816fadf2c0b8e7bda5df38d716c309aadb37721923f57919af09c6a7396ce2888dc02ae02670da9300c0e5814d5ad851bdb4e661c48bc48
-LibCURL.v7.84.0+0.x86_64-apple-darwin.tar.gz/md5/9f609374291fe24ec9bd752c967d3072
-LibCURL.v7.84.0+0.x86_64-apple-darwin.tar.gz/sha512/8a8461a8cf7591a798d7ed32423a33b38425d32e3a7fd4feda06095237ae6dc43c6737dcc55bb86e260080198d5295f11fee88883354425b132c8e04bfa9feaf
-LibCURL.v7.84.0+0.x86_64-linux-gnu.tar.gz/md5/c1cc01bbc7aec5b272f7dbe803fda257
-LibCURL.v7.84.0+0.x86_64-linux-gnu.tar.gz/sha512/e6f9ff29a8ab46537054e1fa364ece163fd4376d16fe7e22dc94c0a640397b45659c143b8e170b1b01ef800ab7f53a9f4087197f2fae9002e061530cefe6157b
-LibCURL.v7.84.0+0.x86_64-linux-musl.tar.gz/md5/20dec1cebca3b2ef188a31ae50a40b42
-LibCURL.v7.84.0+0.x86_64-linux-musl.tar.gz/sha512/9d5675f90eb348ecb637ee7ed31d68701504efa7871c9f55eacb331b6717eae893e88c63cb5abd6ca9d13d34a055d67d0cf36ca173f2bd58e19b65cabbd816e7
-LibCURL.v7.84.0+0.x86_64-unknown-freebsd.tar.gz/md5/a57884bfdcbca83c1f14ece9d501224f
-LibCURL.v7.84.0+0.x86_64-unknown-freebsd.tar.gz/sha512/f8bf1755b3a758b351532ede8f19af6ace8cfcf59b656067ddfd1135533052b340ca35e9cb0e134e1f082cea19860af2029448fc1ca231a32bf03bd07698d4da
-LibCURL.v7.84.0+0.x86_64-w64-mingw32.tar.gz/md5/71182295492b38bb419a71489f01fa54
-LibCURL.v7.84.0+0.x86_64-w64-mingw32.tar.gz/sha512/9d84bfad36ca69b3ed2519bef8845cece4d9b3e8c9e1e040f744c6163469c732cfd1301cf5e5c9e23c25420b1b17a844bcb43bde858a501eb6133dbc266f2f75
-curl-7.84.0.tar.bz2/md5/35fca80437f32dd7ef6c2e30b4916f06
-curl-7.84.0.tar.bz2/sha512/57823295e2c036355d9170b9409d698e1cece882b2cb55ce33fcf384dd30a75c00e68d6550f3b3faba4ef38443e2172c731ddfef6e508b99476f4e36d25bdd1c
+LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/md5/e8c53aa3fb963c80921787d5d565eb2c
+LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/sha512/8e442ea834299df9c02acb87226c121395ad8e550025ac5ee1103df09c6ff43817e9e48dd1bcbc92c80331ef3ddff531962430269115179acbec2bab2de5b011
+LibCURL.v8.0.1+0.aarch64-apple-darwin.tar.gz/md5/f697b4391608c2916ef159187e0d0b29
+LibCURL.v8.0.1+0.aarch64-apple-darwin.tar.gz/sha512/41da87eed77ffac391a60a4af7fdc707f117affebe54960eaf43e3077440ce17d95fbe0f47de41bb1456e222e7a126d687fa0beb26cf98713b3472e9b3ba9e57
+LibCURL.v8.0.1+0.aarch64-linux-gnu.tar.gz/md5/9d3e7e7601ac21a587bbb4289e149225
+LibCURL.v8.0.1+0.aarch64-linux-gnu.tar.gz/sha512/67ac7bc108cc274ee5e088411dd9d652a969952892236d6c37a6dcd710a1887f9ff83df2c01ca0f5b16b2086852077d6c62ae7a13f7b9ac4b9e257cd1aacb0ea
+LibCURL.v8.0.1+0.aarch64-linux-musl.tar.gz/md5/bd2b62cd40b9e87fe149d842d4ff55ca
+LibCURL.v8.0.1+0.aarch64-linux-musl.tar.gz/sha512/7c6bff3dbe341e2a271b61e02767a25768b74631894c789fffdef580605d821518274a04d9441c9b5d3255b9a9297d0d35f22310dccaab367aa92d928f25c062
+LibCURL.v8.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/9effcc21c5074ef88ad54c8b6b7a3f8f
+LibCURL.v8.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/9327fc8e0db9edcf941548b0291e0bafe9b956e92f6edf47795ca961303a24ed305b30b09f29478a70149056411c4ca4652facbeca89c2bb3db41a6c97df14a9
+LibCURL.v8.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/9cb716973ec75e2a2fa7379201aad59f
+LibCURL.v8.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/3e4d22be628af7b478862593653a5d34c2d69623b70f128d9f15641ab3366282aadee96bc46ffacafa0dcbc539fbbda4e92f6ff5c7a4e65f59040948233eabce
+LibCURL.v8.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/95bd98a64034f8dfc5e1dda8fb7ac94e
+LibCURL.v8.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/6a7898670e71efd7f06e614cdf535cf390eb6def9e93409d4ce2d9811a8e1f892959c0f6ca8e370f49e215df495ee8f95e1b7d9f92e2708ca548344b6ef9cc22
+LibCURL.v8.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/42aeb569e80865377c65bba6cc84b262
+LibCURL.v8.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/fa46e52d8abd49e22636e48fb43f11be95bfdabbc13142e0cdaf4bb892ff982eb09abd9f3bf1c33ad374efc18ce21ab9968ed22c084411a55afddec0c459ab3d
+LibCURL.v8.0.1+0.i686-linux-gnu.tar.gz/md5/ded5d6d6580b979c372992c0fcf0aad6
+LibCURL.v8.0.1+0.i686-linux-gnu.tar.gz/sha512/f8a40285a25d61878e87d525bebcfe6e8c30cc5a40f38297de774c8e3191490c38716b3938cf81582afb23714a38405c20ed0241bcd3d41c68a5594822498b70
+LibCURL.v8.0.1+0.i686-linux-musl.tar.gz/md5/cd2bcf96545c783f5012611824169a93
+LibCURL.v8.0.1+0.i686-linux-musl.tar.gz/sha512/318dd3adcbf36c7979df9f394e78b7fb876dc60c9ec87d6b0edf47676c69df4dc3e73c07b2434b15c6e7497b385dc0fbf3fe7e3235b291a369f6f1d883c99645
+LibCURL.v8.0.1+0.i686-w64-mingw32.tar.gz/md5/276cc56eaf744ac0a5cec6c8c396ede7
+LibCURL.v8.0.1+0.i686-w64-mingw32.tar.gz/sha512/55cd7882ad976aeed1acaab7b1d59279ff3a0d2456d0bffa6240957ac6f152e903485f0ca05baafa5e97e0d1474cb204987eb9c94b1b2ddd657b52864a44c646
+LibCURL.v8.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/cfdc41294b2f4aa85bb8b27beced17ca
+LibCURL.v8.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/24f92091ab44a3be40228a9d9a57febc026f49b12c538c98e46a06dbcd679086332b773662126c68dbe4a60dd90a77c970c8a398237afbcf06c660fdbea16a76
+LibCURL.v8.0.1+0.x86_64-apple-darwin.tar.gz/md5/10a19a4f428951adbca7cfee91406498
+LibCURL.v8.0.1+0.x86_64-apple-darwin.tar.gz/sha512/28ddbad4310ed886c65edf28ccf01a5aba77fe11784740600aaec2aaa5c10c5e5915e297a4d72dd85bbc5304bb2027f5d18b95f13868b4bb1353fafed7bce4e0
+LibCURL.v8.0.1+0.x86_64-linux-gnu.tar.gz/md5/a68df850605cc9ec24268887e4b4ea77
+LibCURL.v8.0.1+0.x86_64-linux-gnu.tar.gz/sha512/f532dfcc84dbb4b92229a79b5629b16198061158e1f12d2dd37948cd0ceccc095221b5fc9a8e2de30de19727c727ee500c8ea4508722c677c7938ddef1c40350
+LibCURL.v8.0.1+0.x86_64-linux-musl.tar.gz/md5/023a2d8271173de0a02bdca8d1d55bbe
+LibCURL.v8.0.1+0.x86_64-linux-musl.tar.gz/sha512/e3195f917c250f31ce9669c304918b33664c5b03583f328929e73377f4feff525cedac42dc74adc9ba98a704630294a5697f07eb95ca520c6db4a67f0f83383f
+LibCURL.v8.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/ecd39a1cc45ee76751e1e3c5edf469d7
+LibCURL.v8.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/fa06afb1173bc23474f8f7992268ae9a0df52bc3c1af86d2b60da2cfff43371bb029b51debe638d81d8a1dd334a95dcd3c53dc12923220ad9b1336fcdad1ff8a
+LibCURL.v8.0.1+0.x86_64-w64-mingw32.tar.gz/md5/d9a735335e3603635a56eb3b86e6ea87
+LibCURL.v8.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/8fc6677b1be27a900d2a984cf9f9f4b3aa1555bfd732da2bd6553c28da98048c4c86216b57744d7156de94c522b013768e57f42e662845002e5bd9f730c818a8
+curl-8.0.1.tar.bz2/md5/b2e694208b4891d7396d118712148ff3
+curl-8.0.1.tar.bz2/sha512/24e84e922612ebf19341525c5f12f36e730cd21a5279cbea6421742d1ba61e5fa404f2add2e71d64e5692a1feabfa92c5a5d56501f161d1e157718fee467e0a5
diff --git a/deps/checksums/ittapi b/deps/checksums/ittapi
new file mode 100644
index 0000000000000..896e44d8f2907
--- /dev/null
+++ b/deps/checksums/ittapi
@@ -0,0 +1,2 @@
+ittapi-0014aec56fea2f30c1374f40861e1bccdd53d0cb.tar.gz/md5/932501cdb0e1c7841e23c12da7740419
+ittapi-0014aec56fea2f30c1374f40861e1bccdd53d0cb.tar.gz/sha512/4dd3343837398ada0cdcdaaff630d8d91738d166897d86b77770facde30da99dbb90931b58a4a887399e6bc9a7a1c245057d0a0f63762230d577d71da871701f
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
index 383d1142ecbd8..a70a404ae6843 100644
--- a/deps/checksums/libgit2
+++ b/deps/checksums/libgit2
@@ -1,34 +1,34 @@
-LibGit2.v1.5.0+1.aarch64-apple-darwin.tar.gz/md5/a6f909d459a3783abd181b105deddcb9
-LibGit2.v1.5.0+1.aarch64-apple-darwin.tar.gz/sha512/4576464d1a9b64beac0d5a7067b6afccee4bbe1debc7dd340b1bf4b4cbc916ecef7b4feaaebabde151bd0d9ca92536f30edc05a928e36c1741ed4e5fbcf3aeba
-LibGit2.v1.5.0+1.aarch64-linux-gnu.tar.gz/md5/ac3f90441013850c5b65c951e7d7a987
-LibGit2.v1.5.0+1.aarch64-linux-gnu.tar.gz/sha512/a945e7bcfeb41471c8c687f6f28aa340bd78c5a7aeaf5c3ab35fe8c7aebee4f3d823bbf5e3d0f44cf566fe1f7a7f5dbd2e5b3007aa158af863e89f7a77357984
-LibGit2.v1.5.0+1.aarch64-linux-musl.tar.gz/md5/6892a30e270b2fb8c46fbe3b60f152db
-LibGit2.v1.5.0+1.aarch64-linux-musl.tar.gz/sha512/f43029515e457d21d4dee8fc9c0c79ffde7143af2df1c12ab788b6dd7ac3ee28028de4f3e70ef71f30332d35a939012142f26a680864b4d8befae3c821ddd3d2
-LibGit2.v1.5.0+1.armv6l-linux-gnueabihf.tar.gz/md5/c9e1133af6a095b3288603f4591c9814
-LibGit2.v1.5.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/a571a6e7fcf9e02a222c27fd1d0eb3e47a380493e2225d8e7879972c34ee571463a2c3995c1c6b7b723f1f7957a0b230ec0fff1eef06b7bed0641c4bb4594817
-LibGit2.v1.5.0+1.armv6l-linux-musleabihf.tar.gz/md5/99deac8e5abe948c9e0c13035a851c2f
-LibGit2.v1.5.0+1.armv6l-linux-musleabihf.tar.gz/sha512/10a8b77dc9dee91046093145ad3b602a8da4aaee1bc68198557ca7197206a8c6a158300610fae5d4d0f5e276cab3411ba29304ac5eaf8d63ea41b5b7085ca241
-LibGit2.v1.5.0+1.armv7l-linux-gnueabihf.tar.gz/md5/58bfcbf4b3adf5736149c26dc14f429b
-LibGit2.v1.5.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/4bf37cdee3e79a5686563b875056f168c6e33c40b5099896601b190a569a027815e1da5168c0cd03ebe2ec952e0673e5e9d9bda22372ae12a74e16d219e5b537
-LibGit2.v1.5.0+1.armv7l-linux-musleabihf.tar.gz/md5/d82a259ea8979479471483e64b2edc10
-LibGit2.v1.5.0+1.armv7l-linux-musleabihf.tar.gz/sha512/9a5738e811a016dfe242bdacbc6b34c54df2bf6c7564dd91e79b76ed22b291665aa403b24ebda0979193c4b2f0d402306cb351360a89627f332409d3d8fea00f
-LibGit2.v1.5.0+1.i686-linux-gnu.tar.gz/md5/5d7da5ec4132390905c7c26f3a4c8ed0
-LibGit2.v1.5.0+1.i686-linux-gnu.tar.gz/sha512/bb1437e08bbf30f39bdfe87e2a1e2259bef0ac53802ee507c613d32874f9f2a0e30966fbb621edeb0ce62be805b9af11753861523f2059a697c2132d96187913
-LibGit2.v1.5.0+1.i686-linux-musl.tar.gz/md5/b3233a398ffd6d635f2fdf6f5af775b1
-LibGit2.v1.5.0+1.i686-linux-musl.tar.gz/sha512/83bde361346b28e4a7ba6922cef90f40c6ea6f03b0ea5f491b8cc0de815f62ca3a37020cde05c6bb3fda701cf8c06fd2e05c70857fc916ec0220cb11f6121422
-LibGit2.v1.5.0+1.i686-w64-mingw32.tar.gz/md5/a6ffdeac30e97e684bfc460677d7f222
-LibGit2.v1.5.0+1.i686-w64-mingw32.tar.gz/sha512/29ac5f44bc16b32c33c68fb02c08bdbcf4762c288d4b9fe901c33beeacaa972db5c2c1b0a63cf307b9d1658a6e2fe71cd76ec8b1a7c6ae57ef1a7c20ed6bfd1a
-LibGit2.v1.5.0+1.powerpc64le-linux-gnu.tar.gz/md5/c87f1d5e5d49414b6ac39b9f02a39446
-LibGit2.v1.5.0+1.powerpc64le-linux-gnu.tar.gz/sha512/5e7982caee4c6093f58f6ce438974f4c5d2ea7c41f33ed75dec3e169f55ab547b15fe96771a278f80c31c847c42489a2e3c0e9c2c9745bc8f228c7f5dafe60c3
-LibGit2.v1.5.0+1.x86_64-apple-darwin.tar.gz/md5/4679839c80fe653fbc007ada1f84054a
-LibGit2.v1.5.0+1.x86_64-apple-darwin.tar.gz/sha512/d66b8686b591968d4cac2c2e0d2013d37f4b73043cd77908b6716e5647ae9d092cc874a616a8862dbc0e114f19a3ccd596b669e72cbd37f3371dcc518d48aa40
-LibGit2.v1.5.0+1.x86_64-linux-gnu.tar.gz/md5/5d0cb8c5746a4417ce51437c5dcb75bf
-LibGit2.v1.5.0+1.x86_64-linux-gnu.tar.gz/sha512/1a0aa9b537d03a0849401551e1a34b938879c2bf70c30dbf43cbf76b1e4cc1dd4dbda561741b7f1a48ad33d8bbec200252f50583b3aacab10cdc128e48bd7744
-LibGit2.v1.5.0+1.x86_64-linux-musl.tar.gz/md5/bb54d5e1b903f90f0c7dbf323f819ed1
-LibGit2.v1.5.0+1.x86_64-linux-musl.tar.gz/sha512/72717ef4c6c7385db3fdba192201f0e2fe7b680bea837f27b5b35aaedbbe43e527f72cd447d061848061e06ed0e6ab348d4b28c9e3dceee6d913949923c0e317
-LibGit2.v1.5.0+1.x86_64-unknown-freebsd.tar.gz/md5/9b16f78a52838c68716eb0f311edd309
-LibGit2.v1.5.0+1.x86_64-unknown-freebsd.tar.gz/sha512/fe29f9dac5bde9e3f95e1720ad44f34dfb0b269aeb2859bff9cde46adec99104869a7dc4e536e3276491c3a01273c42223e37e5ba6694581c27b588029903158
-LibGit2.v1.5.0+1.x86_64-w64-mingw32.tar.gz/md5/84a38431d01ccd6b0f01181e9ecaf5ef
-LibGit2.v1.5.0+1.x86_64-w64-mingw32.tar.gz/sha512/ffccbc6bc01eb9900b2a43cbfdafef7b1d1997285d46786b1373def1f091a41d8fbc3fc746fa20bd70ee619d6cfd357fb5cd6d9ac040f1c301fe6ed49d07a3fd
-libgit2-fbea439d4b6fc91c6b619d01b85ab3b7746e4c19.tar.gz/md5/b76d9e4cd2d5fa636143ce9252a6eb3e
-libgit2-fbea439d4b6fc91c6b619d01b85ab3b7746e4c19.tar.gz/sha512/97ce3066cd7de077c3ccf0921a29afb20250b354ab02d3ced4a80ed2a294784e07933072ce8f819c3ef8200249d0a7ea8b500957ace498ef64e9a072c92782fc
+LibGit2.v1.6.1+0.aarch64-apple-darwin.tar.gz/md5/62bb842de0ede8a7c2b119cfa7402a61
+LibGit2.v1.6.1+0.aarch64-apple-darwin.tar.gz/sha512/e5117912419fd73138779322d5cb84454c641aad87d0df7d44b5074c96576fe1ee3822dba18c8207dacc9bae2b74cef87353d5c519fb7fba8ea89c858415f993
+LibGit2.v1.6.1+0.aarch64-linux-gnu.tar.gz/md5/3f42f283a9f550841b285216d681f3d0
+LibGit2.v1.6.1+0.aarch64-linux-gnu.tar.gz/sha512/0a793bb239976946941af5794cb45cfd7d1d99b9aa125800aee9337bf9d9c5152bcad258f75d987a7af9b547ea906ee2beebe7b8d2c8cea111e6878df0eb3ea9
+LibGit2.v1.6.1+0.aarch64-linux-musl.tar.gz/md5/0f20cee604380bfa789334b5544b1cab
+LibGit2.v1.6.1+0.aarch64-linux-musl.tar.gz/sha512/86d7e6a64bf24f3e69dfa4383ed896c5d8a915e19f6f0351e8cf38361352347c827f79032fd8576ca9bfb94dc8db4704d35540ae67b46d671f44ab549c6ceb49
+LibGit2.v1.6.1+0.armv6l-linux-gnueabihf.tar.gz/md5/5c025b4c9065c0b481c7b0f6dd7666a0
+LibGit2.v1.6.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/5b1d3472df47462b3e38c5a5b3400d90038b1637a7f479e9fe04ef046849c14d12301328498429a9f290ff82b6343ccd9ae7616c5ff1d5fd83f35559bedf8747
+LibGit2.v1.6.1+0.armv6l-linux-musleabihf.tar.gz/md5/8015b63706e6d5826779f870681ff865
+LibGit2.v1.6.1+0.armv6l-linux-musleabihf.tar.gz/sha512/e3c8c46d8da8df409b2dc7c476da638da2c79974270390b84473ebefb66f26cf60647445c2b141f7b6cf45655de12404deea30731b812952fd9156acbd7344a1
+LibGit2.v1.6.1+0.armv7l-linux-gnueabihf.tar.gz/md5/74672b31da80507609e59b19448ec415
+LibGit2.v1.6.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/6c6365501abeffc7e796f3b67a139e93262dab1550ba5fe6ead179c0a9d32c62bab7b422b81524d7a367ca1032c7bfd2b3385155e364fc267f660dffa8eee39a
+LibGit2.v1.6.1+0.armv7l-linux-musleabihf.tar.gz/md5/057c22b3fc988a98551fc319eb080c39
+LibGit2.v1.6.1+0.armv7l-linux-musleabihf.tar.gz/sha512/edfb8c57aad5499fae88f09a17e905b4c009e2a8781727566321a858f3ed8a4bcb75b990ae5ad4ac57bcb2b01bd2dfbe0375b01a41405c161106881c8859aa78
+LibGit2.v1.6.1+0.i686-linux-gnu.tar.gz/md5/ecde35f4ca6b4a03f8491d90480f33b3
+LibGit2.v1.6.1+0.i686-linux-gnu.tar.gz/sha512/ca77a1b3c381be2286be9134d7adfde51fb38c4bc9dcb3f56cf1840809c40c484c843cf4ed8d77c538889e06cbef2e5d1b4468739bf761cc91c676a0dc5a34ee
+LibGit2.v1.6.1+0.i686-linux-musl.tar.gz/md5/1a56e7832761479fe911b8efd66b5b73
+LibGit2.v1.6.1+0.i686-linux-musl.tar.gz/sha512/e929261ba9564762d2b3c3191dde216caede5c436b84a00d08706a708436023430a9a762cbd94bf96e903a230c690ea28787ee08208d5b50e51d98e56587b30f
+LibGit2.v1.6.1+0.i686-w64-mingw32.tar.gz/md5/671a1c045725877e1a4f55b42fbb15b9
+LibGit2.v1.6.1+0.i686-w64-mingw32.tar.gz/sha512/5b0e78b5f5f24b7ee8c88d704bf58043626174d9e8e28226b72873f62d0ff6a6f87d6200adfd613e35c27f6d127d967f49a1f7ef26ded8d1b08c89589b59ce85
+LibGit2.v1.6.1+0.powerpc64le-linux-gnu.tar.gz/md5/4ffc17733025ac94e525f8d9416713a4
+LibGit2.v1.6.1+0.powerpc64le-linux-gnu.tar.gz/sha512/a382f7f15484426d6e913c9cd54facd63573650449f1a2d7b180f1905b79dc75280fdb48ff9e47ffc1ef70c9941d43a6ca35e21bc9746172689886fbbc9d65a4
+LibGit2.v1.6.1+0.x86_64-apple-darwin.tar.gz/md5/af4192c866787ce226fb7a6d5229bfa2
+LibGit2.v1.6.1+0.x86_64-apple-darwin.tar.gz/sha512/18bac55bd7bcd9ea66002c98717ef358710aa689c9bff63be77de1cce4db2082f023ee577060f6ed11e3830c2e751bf2adae1a9b232570a090031c5246f29edf
+LibGit2.v1.6.1+0.x86_64-linux-gnu.tar.gz/md5/d26008f39b244ab0caa804ae0365d69b
+LibGit2.v1.6.1+0.x86_64-linux-gnu.tar.gz/sha512/3d6068d2165c012ce66317cc0993c374df43cdb2dcd584ec7966f602062428d4f5e18d157c7aa19572affa1e9dcb0346105a01c64f8e5ac01546aaf7b5d99439
+LibGit2.v1.6.1+0.x86_64-linux-musl.tar.gz/md5/fcbfc9f15ffe3c4b2ea055e198795e96
+LibGit2.v1.6.1+0.x86_64-linux-musl.tar.gz/sha512/16bb30defa9d23e6025e3729e313766940105e02f00168e61bff81ae38beae9ae050a5fbf2307083b3cd89d364aa70a7042b94062160fda2174aaf5018f3e2f3
+LibGit2.v1.6.1+0.x86_64-unknown-freebsd.tar.gz/md5/a4fe2ed51c1ac1aaaa4f46a00714d85a
+LibGit2.v1.6.1+0.x86_64-unknown-freebsd.tar.gz/sha512/bba31901fcd8b2e69f43e9645c028be4c840b3d9afb4e92e64c9ea46c7fb44dfecf14f99cde586380ae0508fdb8402d3bbe93ec7b38219fe7806299b70576949
+LibGit2.v1.6.1+0.x86_64-w64-mingw32.tar.gz/md5/11ed8da2cb4c7ef924b50768cbb54678
+LibGit2.v1.6.1+0.x86_64-w64-mingw32.tar.gz/sha512/b39f12931d638809af27e446d7ac25b17bfd5c003cac89bcf83dc4c5331d14ec12b07ae410cfdc636546a3b1edf0f7d360bd194aa58c835261642b51edb4afd1
+libgit2-8a871d13b7f4e186b8ad943ae5a7fcf30be52e67.tar.gz/md5/831f4d09a6a22662dc0043063d0305cb
+libgit2-8a871d13b7f4e186b8ad943ae5a7fcf30be52e67.tar.gz/sha512/17ad43e6f80e87e8115cef89919475a9d9ea11d679e107221e6d82623577fc8e4002876a33c7eb2a52a47e3d8142976777bc79f81e4c4cf2da6adb1553d17b00
diff --git a/deps/checksums/libtracyclient b/deps/checksums/libtracyclient
new file mode 100644
index 0000000000000..19b7b26c5461e
--- /dev/null
+++ b/deps/checksums/libtracyclient
@@ -0,0 +1,34 @@
+LibTracyClient.v0.9.1+2.aarch64-apple-darwin.tar.gz/md5/08881ffc565e099903e2e972a7f7c002
+LibTracyClient.v0.9.1+2.aarch64-apple-darwin.tar.gz/sha512/a9dcc7f9ed7565a769dd1080513eec7439cd7b03d68d48f570ac3f396769ef0a7f9b07446045ce6536b7e67860096eb150670256c311c0a77ac1a271dc4b4422
+LibTracyClient.v0.9.1+2.aarch64-linux-gnu.tar.gz/md5/d6a8dbc7cf871f772f848a5e515e6502
+LibTracyClient.v0.9.1+2.aarch64-linux-gnu.tar.gz/sha512/cb9b3065f581a956d318d71a94216ca0e57599262a12a25bc2e6fa0234505fed5a9cad9c2eb7ad30d7ffe9c4ee3d26d9f645887d3f7180d69d3bf1d0745b4f22
+LibTracyClient.v0.9.1+2.aarch64-linux-musl.tar.gz/md5/0d74193e3571fbd80eb7d9e884b47e53
+LibTracyClient.v0.9.1+2.aarch64-linux-musl.tar.gz/sha512/18821911a96129486cb12726018b33fde1da345228623b7f326b92ccfcbbbb2349d79a35e6fa7cb4b6cf9283a860e8ac44c40d6b54a4dc1ea4373b869491b6d6
+LibTracyClient.v0.9.1+2.armv6l-linux-gnueabihf.tar.gz/md5/6111f3b3c696d9d07139e137c2ec1d08
+LibTracyClient.v0.9.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/135139c221cb2d4d6000bd1a3771bd095e93487c7c649ebdf760ff5cb03f6ae003c33c2a36a52bbdf70e4c349195f78a97bc963336a36f33fcdeee33e4fc1eb7
+LibTracyClient.v0.9.1+2.armv6l-linux-musleabihf.tar.gz/md5/5b3154cc849b04bb3523f04fa4481b83
+LibTracyClient.v0.9.1+2.armv6l-linux-musleabihf.tar.gz/sha512/7f62a546c7cdbe3bb6a0a446980371ff340d5f530907a2434eba2a14bbfede8c740a763b0c68a252d7a3e357d9d933bcc6313919cd9bfa385715bc833be56cce
+LibTracyClient.v0.9.1+2.armv7l-linux-gnueabihf.tar.gz/md5/f6952d495c5b699226260e065cf2703c
+LibTracyClient.v0.9.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/5fdad7f8ce3a03ce05adb3deb6bc8347aefcc8a7fe0a30e0f7684fe233eb8520aca138e0b8a6cc5555a1f2316a6e36bca32cb5de37f2aac5c5deddfaeb0f8570
+LibTracyClient.v0.9.1+2.armv7l-linux-musleabihf.tar.gz/md5/84924c2e32b39ed580b553a968e97360
+LibTracyClient.v0.9.1+2.armv7l-linux-musleabihf.tar.gz/sha512/2b81834b91472eb9897abefbe77e931782e8c14eaf7193f22fce82024610906b6e96122610edfab29a9c844581cc4ee9124e330af9eacd97fb8759c1de421472
+LibTracyClient.v0.9.1+2.i686-linux-gnu.tar.gz/md5/9f243a9d10cd928d45436f634d020c27
+LibTracyClient.v0.9.1+2.i686-linux-gnu.tar.gz/sha512/c9512030d83f32942c7fefd598bfa597ce758f39d11bc9551fbf565a418a3000d23f899f1e9411cddebb3642efef8cccfa3cf3f629bcc11fcf50585e1a80549e
+LibTracyClient.v0.9.1+2.i686-linux-musl.tar.gz/md5/4aebc58f4c8101640d9e450338a4e12a
+LibTracyClient.v0.9.1+2.i686-linux-musl.tar.gz/sha512/2085b7c0658bb39dce9a9b511c209a348916ed8e50ed0d51eb22f7eac167b890a87d357e433e12eaf7034c15842c8d2893a0c128443c4f25fa90fd5ca83e256d
+LibTracyClient.v0.9.1+2.i686-w64-mingw32.tar.gz/md5/dc6f911f5cdd2789ef9f13a1a9882243
+LibTracyClient.v0.9.1+2.i686-w64-mingw32.tar.gz/sha512/57894c759db949dc669e23b7d5e015942630328a3dc754185a0f6bae95a66f0c3e65e365317bae95f3a216f4dcab681203e64dc8c9a0b5478cc9e27c9dab2e56
+LibTracyClient.v0.9.1+2.powerpc64le-linux-gnu.tar.gz/md5/a7429f900f7f0a14fa355186d99a24e1
+LibTracyClient.v0.9.1+2.powerpc64le-linux-gnu.tar.gz/sha512/e37ff8e8de9b74367b9f0d6fe49d983900529caf9c2c55d5ace305d5896c2de6589380247dc85017d959901864d4a163fe110e6d860340d949c6ea4dec50f47c
+LibTracyClient.v0.9.1+2.x86_64-apple-darwin.tar.gz/md5/b037ea1027e6466d5dd9c0fb41f65ded
+LibTracyClient.v0.9.1+2.x86_64-apple-darwin.tar.gz/sha512/81e2d00bd8eaa1cbcbd5c0ee4552028ccedffcc072beea3dc08ac3181677da93406e8dfc581a78434175fa5bb861df06848dd3012f8adbbb6dc72efcbb5094a0
+LibTracyClient.v0.9.1+2.x86_64-linux-gnu.tar.gz/md5/cfbe122083aeeea6bd7ddc4591b1cb53
+LibTracyClient.v0.9.1+2.x86_64-linux-gnu.tar.gz/sha512/e0418a0b50d64990d6f1b80dfe65e2360817211e1225c4d8d9fc9c871a95bbb62c2601c617adf1d55305518f5ba1dd05baee82f6934d0011269fab21b89336b9
+LibTracyClient.v0.9.1+2.x86_64-linux-musl.tar.gz/md5/f152ba78f2461fec711144ae66380c34
+LibTracyClient.v0.9.1+2.x86_64-linux-musl.tar.gz/sha512/f59f837d2beb4df4d3d65352a8c46261bb5a92ae88a62e2d1bfb7293184e02be982fbefe20736456719055e718a26003984224d0d74a0a6244dcc59e0d350556
+LibTracyClient.v0.9.1+2.x86_64-unknown-freebsd.tar.gz/md5/83c7b3d9438dd04d25573a386bc5c3df
+LibTracyClient.v0.9.1+2.x86_64-unknown-freebsd.tar.gz/sha512/f22d0d4f4171067bd1f56bb63dba801e262d0ed4809538dae907296d1a12817954ad759cdc9e61f710fff5802fb7371d8283d6df52c9e8faf6b43c713c23e371
+LibTracyClient.v0.9.1+2.x86_64-w64-mingw32.tar.gz/md5/83f3db14b65b8e9942c754bcdb430060
+LibTracyClient.v0.9.1+2.x86_64-w64-mingw32.tar.gz/sha512/8acdd1d407ae927925f33eb75891684d6687e3577d5f8ac77e738daedc8145462b1f044e31edd9e2db4507673a0abebcea19e171833042cbbe5a135b0c0435cb
+libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/md5/51986311723ba88ac305ad2c1e3e86c6
+libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/sha512/f92c5bd71fd3e933f03e3535c0668a9afddc7ea19531aaee11b22bde09c57cc8a555f7f17f489d4221645fb6d73ecf9299d5bb11949d7529987beec3e7d91763
diff --git a/deps/checksums/lld b/deps/checksums/lld
index 588522e1cdb62..1b238fdbd1a96 100644
--- a/deps/checksums/lld
+++ b/deps/checksums/lld
@@ -1,116 +1,108 @@
-LLD.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/fc262d76d2c8b848713b39fda7d55544
-LLD.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/86d584699333feeb574e9c00116a9bcfb728ecd905e983ebf02eaeded052c03a148fcaed1b655c07edaebbfb256f376f6451e1167503b235bf557836a9ddf7f1
-LLD.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/cdf439f1bb444adc506fb844230709b7
-LLD.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/9cefd451e0282d9d787fb79d70430bf811297a81c045af386a0b685f34627a31631d036d1b67dd32360dfffc51450d0498e71a03302e0cbba3e60d45cbd3112b
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/0e2d3659de3c546073a52db675b2f00d
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/6d52a3b56f3bdbb59addca2c7d4b0776f8f414191579b59938c5715b14b1d1cc1e76b873c098ce98a28bed57a0a97974805f158ec952a83551adb61dbac3891b
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/c319caffaf1ae4271e86354661eac133
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/f908988258050f06e4022b44dc9e49fd66221abe0c205a92e0fd270705b9b78ad7892ffc9adfc69b9c2a70f955e98678ca65dbcc3ebdd748d08ec1c414e90892
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/69fd74156fd9d4c32596f8ec8743f24f
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/04220b61f3a9a93be147a8e73f044233bda56dce7500c2225089c1fd1e64092f8af7d91b9fd41b4f347950d787194e9ecda0fa3f09e9f0dd3f1f0836d39bcc95
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/0848225be33d9f436d6cab9fe0b1a6ca
-LLD.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/d1bf4cdb1f47c28f0ceb86606cdf073141e2e5a249756bbc4fb862aa4e3476b9b6c436e994c5702019b82b773c2c3d2f0e78d22a3cdd905e159c9ff753d2619c
-LLD.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/8abd66714f15f7db949da104a1ad0fa5
-LLD.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/9edb1dcb32e5133634db932dbd04d29256a4ee636e44933f63c1585113b06dfa6b38eaf87b72a4b3efd044a25f0f173083360cdd15bb964d4f8ff3b4d5125d32
-LLD.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/4c8249e6976e75c7790b8a120a57d8f8
-LLD.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/405552d7d102a393c44d3386cef9a2a85916cdcab88b52bf3918f131b860bead5f6aadefb6794a879e9ae553a6b3a6d6444bb900c33acc77c1f79d60c024e772
-LLD.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/e86955bfda5ae339a22b959d1c97b7f0
-LLD.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/0cfb78274857b1f5f34ec0407dc52a5ec6083a00d9e9b959099839d7467f5ba304dda8a974ba4f3281b66ec3aee5d7ecf0cc774f26a6d059aeca39d850cdd17e
-LLD.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/b17725f5c189699eb325506325ad7cc9
-LLD.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/cf7393cb10d023c7d1a04eee85e706c383ed8fe03b66b2e6a46f5a7cd0e76ef5cf065b94e612f6b46f4e2dade6782f8f2ea2b0885fa7dad2d2c83b049b376ce4
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/73e8e847ec3126fadec0a6ba79974ec1
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/7daface02ef1b12bf738ecc026c33b7568b415b91c452c64125d74db24f97f640a888c313156363de30b78d2c6a2593e3b4d683783b0a63d057b58ebd2a29047
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/d141277d0d02d820c17634331bf0a40e
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/d0e0f145852fbd971ffbab7a92c24e435c581e6945d49588701f9e930d2d16bd6bd598b638b23f473f764bc57248ee9fa5bd725c35249a298ae30063d26ab0b3
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/bfb86c885380c9bf0430ae21c5202057
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/f5d7fc12102e8c728adf271b2ddddc67d766d1ef7477b57f8788c218f568bf93947137c0607008e9b8e8e7ec5c4ba9cc92688b0b8a15af96b3a54574b6d9f3a3
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/054302b380b9b91d0ddfb09426ce44d3
-LLD.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/354c10a4705fad5c32a16140eba579603c077e725c35b1085e8d99a7b766b4a732b5b26f44bf4877f7bae477543f38c2222c3e4b610e901bcf70fa54828ea4e9
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/50f02bd884e32ec088f279f99c4536ed
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/d0094821adc5254ca279f664199c76fc4754c5b1c4d676053acbd490ce1d84808817218b5e20c0e5a07243eb62e3876ab0b5cbfd1c3e80e0b0343153f0d85bd9
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/ceb31cf8a3315a2d1c9ec314848ae5d7
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/c83f85f36d61076c366ced1b03009e5695b7fbf77dedafbb5efa42e8a167a7841ad6e5c946d5d614e38f259bbc564bb24edf6a041b85ac52e12a4025d9cebc0a
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/2f51541d7a59b166d5c875c14ed9b5be
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/a092db3050dbae96a8e85dc5078c13fc415bfaf68800ed8c27871a04da19ac96ed5263366bdcf3f75b42d2c329ba473f1df6a38af3d3968bd1b165f9bdb50e13
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/f2e13021c00a2ce98de6a153a3661944
-LLD.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/b1273890b740d9a9fe194d5351b74db261b7c1077e02c02bc6be47b4e61f5b069df248c140e46f5e4a8c735503ffb84dc7ea23f673f3b0943af1667cab836381
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/4ded55f6eae1fa6a54e5765af6b99df9
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/772c087814ba18418357799737ebf650ea5944e9c1a8f6b4c10770cf14f3ed8ea152854532b7975f6326b81d640021a63f8e0334e64ece776e41c5741591ae52
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/bff02de59314ad554f2fd01924a80693
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/114519b9ee153a495bedd174b42df227e1f41375511c8a4010a06013c73a3aa5db0938d764e0e639ceb86f9f13513c6416b3291f53eadfe0e1ef5b4a93b4ca03
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/70e16637af23ce7f6c33b061e073dafe
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/2f3afd0cf1ae8a0c0f331a9dcca0e1e69d7b49397c226f1260ed38b2b5a2d400673578be0371cbb2a028827d9e22e6a8890e34110967250ef0f0f907f63d59f2
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/b00c58a756363edfa9bcc6e26991ec74
-LLD.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/6e5d9448272aa69ec72a201f5d4b90b0a4804f654b510c4a6d98393cad3c1b352d6bb9f47b909ecf46a8afb4fc582176f0c26c028203cfc72ed6635255a1da4a
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/955be6224e721378e90a5c78c7c0557f
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/74c4bb8419a75782721c97d4af893ae4f976ddc7b159937bd6b3a1e00aa63708a227bd02b95685d681afe2955c7bec080873b1fc1fa4507bca24a09edf7adfb1
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/5e3394ef7debe390219a4ce95df29741
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/17b8f0a257f857093fc70c16821797107b5b1ac62239f28998d4c355e1d0e5541628e917464ad30ffd07f4c8ec3ce262125bcbabb0d39044fad73acdf96ef1e8
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/9de7aad1857b8fffe7bd6476b0ce881f
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/876963d34b883ddfa13e59286d08ae7a6aecdf6a35f77b0d12867435e48468b65008d8c8b1f5bd931196076fffde615971efdb3774b5c7aa68ec08b1d6f0ebf2
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/b9ac0071ec9b36819c77529559635998
-LLD.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/c6f7112b680c80d35feb633bfba3910405b0fc0914e05fbf5cf8fad001c5868973b7269b467aa1724d6c2b15278ff54a14aa09808b26104f54eb5452e3f78c43
-LLD.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/e387441aeaecb5f587f2e1edef3717c9
-LLD.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/e9b6e42dd132eff9f539108f9329ce29821d8101879d880e7cff587d3c7982c57eecd6e33d1af18edeb18664e77e6b5bca8f62d69fad57a176f7edcd43a51adc
-LLD.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/70aea168615be9cf0868e9a504b2e572
-LLD.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/1a555b1c354ee690718ce08b1f736140925b06cee1b9533962ce7eb7f6332bbdb9e25e1281423772e0fdec8d34b5b690eccb6835cf6b764ada492ab20ad5088a
-LLD.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/ca1e12e88613e2fa5f70a9b932306a5a
-LLD.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/e2b669f1d5f24673f85f95dc53a041c3b5a34b05f3113803f53fddc9f8637cb92867a79fc02b19ce5e6cd99f0c0a7b6d351fd68994b244c1c35a1ed7058cb0d9
-LLD.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/b6833e7ca5dbf8c46ef536ec834b8f23
-LLD.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/995915b3cf655618176a98c41b54a3345797fb5ace72771ce963644dec67060ca84ba20779b94fc4bc48e8688d1f911b20abfeb459832b279ddcfc5afc998776
-LLD.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/ca9848c652737d4119d6f2f1b83bc807
-LLD.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/465c88336259a472fa49c6ce88d7965e44aaf34d0260e38a832f27ed5b99d77d9653c2390dc12f15db549325170c59be108eb9f41f99ef88d5fae47edd538abf
-LLD.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/07c8437e8af4e120268242fe1ceee853
-LLD.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/ead33174285f85e4d8f40baf2f18c88ea51894dfac528be47db16a4885ad658ac5b92431693ef24690d9a8f7a9de7d3fdc348ea1f505e29f8e8455f1a4e57ca8
-LLD.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/13395b2c3c4077899229e5b7dec5e535
-LLD.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/a3f95af4b499f0b4426a109cafc1c9bb4fcf2a600d6aaedc8472d26aa61b04b1aaa3a801d39e165a9e7253eddca6009006e2b8030dded6e592cae7a477015d64
-LLD.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/02f86c3d3e21b8e4de49ee5632d42c1c
-LLD.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/ec796d467089ebbb0e5a6085c0f5b15e5f43247335661b22fc95d7656b860ad34bf5dcbc3d3c14898bec871347eee565c18100a872f1150d25120e25702d5613
-LLD.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/055a29a5b3e7bfc69cc4455150d2a765
-LLD.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/0a51cf1f1b1c825cf397279916a0bdda789dc9f8917a9cca70e10050bd253f286fc296725ccc17651d72c304458356c9e0c7744e85ea0961fd5a895a2300eb26
-LLD.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/357377a9b28dbe542141528ff21df505
-LLD.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/199182506961dbb552cdb7a40bd53dd613f9a15bf824d96813bfcd26e0cce1081651314211f99dbeb7145d250ee90eaad760bdfee27ce8e14cc40561ff8e3028
-LLD.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/50e1465dfdd73cb4892dbc84dc2bd407
-LLD.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/bac02215501595510bd92f59bc5d6f707a79faa360823afc82893e7eb64b42ddf035ac3083dbe37f87b3dded5c5f06269b3fdedd2ea1eca0a41738178492fa46
-LLD.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/c86e047af65383a802f9f40f0366486d
-LLD.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/24dc600035ac9fc7fc94dd47e3bcb197ea64557565a962bffe683ee040a089a4f0a6618e6ff06c9225ec0961adbfc810706d016a0dab659d77d2fcc73c1e302a
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/463c8a2b34c01c1964f9090d476ee1b5
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/a2d8da10ad2b81b9fb5563ac98e992a7500d35c4999ff51e30dabf662199b4bf47c3b8191a87c6dcbd6fd3fb7917f680ca9d9dfcab92fc66afda42d93bfe7a1c
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/02871a4b77f564a1562fd1b8766341ec
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/acf16625e34e0f686bbd02da34515ab9ad1cebbc03fc2cc4793728d153c3d30d5e684179293e0df333bec54c35c02f63b4e8b39373c4a78b4dc496cb84168953
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/6ccd870609a949083245a0a469a256c6
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/e79381809dfbbb457f6ab542aef7bd20e758f92c6306d8efa900f3d951cc37857170fb41d6e264d8fac903aab6b1b3c2cb6cd7b29b12db05df23a3f0136d3149
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/a26d060376eec9f52ca65cc9117de48d
-LLD.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/b7ec3789ae9afa900a332e7d545576995de14ebb31b00ef9f8d16d6f8eabdb8d35a508c283b9dc49cbd2cbf0aa99c0c081750ac9d4d80a1fbff71e044361cf72
-LLD.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/c4063d74231b368d5e4dec1f8a110187
-LLD.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/0cedd30610c042e58a91e1f4a46fc973a781a0f432292d40fd87b4907dde868574dfe7cd372d8a05f7e56e73d507b20df8c89d49b1bcb5edea161365aaed04e5
-LLD.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/09f1070e327911a6eb38e4d7481be776
-LLD.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/47236e8449a479599dc03d198c28da352139cb62d08b7def13328a32b5209a29985d7f0044c74d716a3458adbeb8ce2845a760bfe3923a50a4d4eab1f832dbcf
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/214314e0359316fa00e5a770b55daacb
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/b25ef3505996442b5e4d4c20e3cd9b9fdf385b8e86a8f5598616943fc8aef8b96307206d6aa836f3f8d65818806eec6901b1d26fb339320f538e3ef7568b6859
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/32760e37872e2353c33c175cf42fab39
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/6b38c31a34cf4b1384f3b24cbf7e4ebb20a112465293bbb37e33bcf06d998f2ccc0393c94a95a1b39147c8e6eba84b107ae934f207aa56512f16e992a642714d
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/1e935c8f2b36fb537574c2c14baf51c6
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/ffaeb5160830e5859b2f650d818422b80ca187f0cc43422915bdf1dc0b4ccc4b6d0cc8caaf570105ee531169fc494a6fbc9656ea4ba9f9cade8e38b7ee339fc9
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/ceeefef634d597e201047041ac330f43
-LLD.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/f4b4ba04f2a72744f59dc26d894956f8af267e7b26a34a658fbf6ebf681b5d414775aa7137e2641ef0e9a0600269926c1a45d98d9ea2087677901c62b94cb414
-LLD.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/a9dbcac1935a74f3bb3ad3a879098ca6
-LLD.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/7975edea6954b6168de5d05f7fdd5d95bcdd3c826b5b098baff86c93928eb3d9b169b3948fd94f9194f01f859cef1f1bd3db7fb470c7296b0194c37adca1de71
-LLD.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/b9690d75d244393b4990c68ff9e4196f
-LLD.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/d062cf3d89bbee1597871e2d7921cd4fef31e955434005f300a87fdb6d1245e399e417da7a1093f99ccf816f22873c517937bf7a139efce350e66a01368c0c7a
-LLD.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/9c5651ed5d643dd3db02a7183453d3f6
-LLD.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/a6e99cb649cf7d6c81958ba1f2bc8460e3164e0cee4fd5a62bf62bd3040b8641b5665f0eb47933a4f13e1b1034ff6a167938088bac4b9b2eb75dc1060d53fe40
-LLD.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/6fa5219c6a38dffb193ff53d5b3a3d1d
-LLD.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/f7191d625f35d8e8a147426c004b1c7bb327e3394b047478c8d003bdbcb1b2da492cfed0c71ca123fea68c500c17d10cb6f157080228ef1517d88a6a2c8103a8
-LLD.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/de8e66dcda15ce77c82350a0c708358f
-LLD.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/ead4dd1b926ad903c99a3ca5280397f1f866356a3c2e0c92143165593288af8e29796cc0909e72123b64c58cc522bc49703f5039f731e8805944f8bc8f318104
-LLD.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/fb78f5da88875c423fe9c4e897db7547
-LLD.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/9a667f0d6d1436dcd61e6d83dbd749b5d156cea5b7286667f723d835a93db6409f5c3df3b77e8816707c8d779d9571a7ed1ad764409204a45cd4ff01df252e79
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/ac4c0898727e017239bce35420ad80b1
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/7e80a8b7583c28b3e92c7f0d1c8b8d5b3ffbe00d5df87e3a2c4a4877421f28e4a9b658672684d0f37164209a9e74191be687571db6c498edc902bd104bc2dc4c
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/b70020b2b3065478ae37e309cf4e9e8d
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/56b99742fc2ae442d3d3e3a80339fa016c4b6f53699798aed0351b1e6bf75c8300b18ce2e67416443f7eb8f110f98d3aefadc140d2c9f906e77b69ca349f954a
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/6ec819571dc37ca63d590bc0a3cb4e54
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/e0acd7b407f8dd88572dab34e30d385fe23a8c98dcc4550811db5667e182f2ddbe773b992e4f83015033b0ab6c38071ffe0b6f68e0a01e8f9b9d627a233c46fe
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/0668a79e8d23e48aa5380fff43436d82
-LLD.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/bd78a518126c715861d7f27993ae26e8082452d4ad18a9d3a0fa39ef46fca8b6e98ca14ec715470161a1c9d64ee71c7ed4c815be1b3c480f1d003ed3377895d1
+LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/0edc0983135da9e37b18fa3fe6d56237
+LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/2adbb4eb76e72be28951c96140070b6d16c5144f689631d51b56365549a5d38535c1dbb5e351a6bdac4648ba52da02297591874193b1c16e7078060c99d23f04
+LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/59b06fca083f1a5e9bf9517ae4f6a4d6
+LLD.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/7f1dc641df9288dfcd887239b86e7fe2871220b9d7f877b24b3197ab73d2176c4533decbea427b09e8f70ddc6c7570d31f5682eaed7215193e95f323769276a8
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/c97e607a661b9ff571eba4238ec649dd
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/7c7add8a0fac379b580a19a02966adca4932bd4573ba0111262544c0d935fc121c5aadaeadc97f9564331202b08c7366ceb170bb2b318db3425c157772d283ea
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/d55ebbd25b97a4e4628fad1e04782056
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/681729b4d10d8f66b0cdb89ca4500ee8a417561cc886608d06af0809d946bdf7cf5c6bda2b6d5d577bae3a15dc347568a3d7d7428568f86ca61327041026fbd2
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/78b06e5a351e6eab372ae29d393ffdcf
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/37a8b5fa3491ec8ae74da88e81a0c229d38166acbb46ff3f5a819034c40fa59ca2ebf4c0ed58e615baf7bf7da789ba86114738252501cfbd842be95cc2104dd4
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/7ba5b76c83d746a3c62354bf753db697
+LLD.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1fa403c8923487e2d6a8e8c1d86c2ea955ed32bcde2328cb1167a315cdcf704af896505e9c44b750ffca9e3ae66e805f60831136eb79fe1c6d58eaf81a78b1a4
+LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/f052208026a0fd5120ea838843b244ac
+LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/fd9ff2d5836300bcf76e4aeefb1e57860b3203fab0c32e668dce3e636dc362876d0fba1f2c23bf55a342ac17294c73e839a8eaf065d64d4397582dc212b8b9f4
+LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4d1077835df0f592a168c140ffe6299e
+LLD.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/8dfd44113b817f607bc38ac1b4ffb192be340c826b9bc8f9d41e92e0f0333d8fc4227f93aaed16a4b9e94a5ec8b79628f2d3a73fb644684a595921f36ccfbeb8
+LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/0f31939f4ff00c572eb392b6e70aab38
+LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/581441087ad4869cfdba13808b2d6adaf929ea1b38ce96c357f276d77c3e63439f8edbb822c8f41770cb61fc08837d7eed2466d187683bc44f2cb3c553e2e60e
+LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/ca767173044b5a19a86c6a890dda3b05
+LLD.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/0577785079039b534fd736ea7a51d9b5176693d81e0bcda4fccd760d7c1218042999b6a38b973a903c0ef68e57dfb3b86e9e2f9e307dbaf603997a853f34eed3
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/89bb950f17a5b792a6e60ef98450a6b4
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/54bb68159743cd14ac0fce7f218a66ff6bf29e626df8dbdbd6e8581699d9b1d357a3c10d86c6822bde7299c14728bc55480f91cefd041d1de61cc179ed347b9a
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/735e4dda5f8cc06934f6bda59eab21d6
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/a9b91beed959804b9e121fee786f28808a7670fc5d2728688cca1c7e0fe56e82e47d95712e38fdfc42e02030896843c4b3df9928eb34c2aca9ac02262427c76c
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/30a95179bef252aaca41984daa54c680
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/0302db3c04396a30d1f6ab8d8d585bbe3a9e70342f068747ddb875b024c173bb9bb34518da7e76a10d3a325dfd741118f36f67fb83251bdb8a9901c4799ad79f
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/4386c746c5d9b1408dbe7df04bc6a08d
+LLD.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/d71c6ebf5d3eb42368ab336cf8520afcd05470308ea117fe95797171e5c573948412ce777f62cbd45ee99ffa59cc769c276a60393a22fecffbeaf8b77b50ea35
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/49287977de61b100979355e458c8970c
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/85ed3b2c7d2478a307a393a2003e694fc3097cc6812143abb3cbdd73a7d36bcb6f06a7d341ea639b9849f714c2d8f418a8b96035ed1c19a3957b42d005c0427a
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/80a97341c9537b8a58c7df23f86d5cf4
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/5774b246ae820de4230a1f4f65bd683145dad5cbc4d326fd75649e06e773c74c2cffd48108a79ee0cc93175786450b6d50f7ac532e6f68961c18fe6119ef94f5
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/6f84d6858aecdfd95726a37c9b6a0e0f
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/2cdac9a810c777ec6d85093926292c75e4287f83b7224246f6fa248e3874a2078c46377cd5ccb0f36a5e25b139691f1111d705079e89ea4215c9bc8659414094
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/d40f0956cc36aa7846630755a672a91c
+LLD.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/01368311a0ecfbe3f23514115f0bce7ce816c878815d937f3fa067b9daab07da0c02f520a96ad793212e5056bfb6294dd0129dae75f274dfeb48191e504c5322
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/689120b8091b9da8cc9528c96f5c5df2
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/ab78810af7d77116a4973b5825d5090133218cf08d5d77be14f83e028821e83493a112adf71094cc208f74cf4deabda63d7fff98866cc0304793aec9b27b7222
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5627ccf1677c48b7ef8ac9e5faac1d20
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/454d2636cd72974c79c2d907e56e3c69c30c3fff78b199591c9ebe4f14d04c40c4bd7331f8dc2c957c37e214da8d28ef3a47ed8d3dd4ca9d480d52bab3429b39
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/8f50e5f684c41845308c123f8e45a0d5
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/21baf8a00fa65473ff6cf7ef2974ef88cd5b0eadd06ff85598de10d09425074297bcff3472ef001047a5440065a2de2fc6b1eefe3a32c7c1b3e3261165dc063c
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c2e0a5f58e38a9acf2c3914177ceb827
+LLD.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/2a1653d171a2ff08bde55c53973e62955fe9d9629388ae014a645d3199d8f4bcf0fb923d06812ccd62e224032b261c8ebed56ebebed750acbc87671203d7aee5
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/fa3959aa413a2b707d8831edd2bd7867
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/8b74fef916a72c2f4933c21d3344410c7e03e64265a44dd62cf2ef2ac0feeafeb2b443eafa5dad3d3d0028be96b9424ff67b16391f1b3a2185826de68921adab
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/b0751bf7eba4f7f7a28dc22993eac9cc
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/7510f7349b06365e9cd260229e7b8c84da26bac072c5fe9a4e59484d82a0753d4ecf1066ffe41343f881a682590dc9ee4ef4a49cd83dba45c21b8d76dfb80f67
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5abfe9e960bab4c8a44f41aaccaf936b
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/efda0e0a35e2774af2f2df53f89d61f146a5730086d40865d448b009c833934b23ea4b296c3dc3f2039527b72ef40493fdee6f7c630484f64cec2d1aebf4a4c1
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/bfe87378e965050b1b20e993c8b13a53
+LLD.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/ef2fd5e81f349673417bffd68c4122a87c09caed3f6f8f0235bc70b75deca7363cad68276aa708fb9ad8f7edd249d49f78d9f5fe7b226b62e8604c7bd3d4b9cc
+LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/4ee16f57d7dc060007250e17ffd55817
+LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/27fd3a21bac676feb2c2c2363c027cf12988c70d889174e52c6bc1fcb4a93241f4bae85d5750ceba5fa971611700a9d15e3e02803cc14382cf6a1ab2918b719c
+LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/06699da5617371442b0539203152405d
+LLD.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/83ba6300d5669b52c1913440598a2577106ea73e0b83549a5b3b0f081a94b6b8ca9fc05687d2be4b60c2d6a524bafd43b839082f0eee58b4685758061b229fde
+LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/a051688aa3a6383b4be4faa4f4aee985
+LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/2059c6ac6579c4720e7167cd547b679a9c1a27a2c68174ed543be935ee23122234b3f2a4555de0abab3a982aba73d1751db336f3e28005ce8e4659d61f9269aa
+LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/600baa66310cf348ef3b4351ada014f4
+LLD.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/52b4718993d8abdca8ab701e86022367655d7927dabb8f3a8e41e43dbc90a9af78caf8abd37907a79b0f05017b6f0ef72314a187dab5bdac8ef7996e74c96e2d
+LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/4bc599fc07e9c7c717355802c1538a6b
+LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/4521e40cf6cca31cc9ec8ad974c6eb922632d8ad0d5008c951e23b7ec193a71dba5f3bc2dadcfe47e2ca29395646293c6559bd88ac286c5d31d5c4521756177d
+LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/462b9c453405768c2d93535fc83308b8
+LLD.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/39dee4d4a0073a8dc4ea63d43bc9a357bcf8e26e3c5c17f1441fa72145f5a4ff6a53e0aae6de687b8fcbace40207ba06e61cb8452c9bfff7882ab48e9f9f5ff0
+LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/8b12a4f5db80b925785f42a97e6489f0
+LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/797d12888668712658fce85ff842d812a255fa4633bf4e78b21488867518a1fc2de746885e2fca1055595ae476670790239a714797f2322ca04027afbf27330f
+LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/acb8716cf94f654078c7dce4a140f71c
+LLD.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/cf64ae04ae3e55575d5781ad30212b1c0ec734f81b42e3c26da8766bde7c47b6a9512515997afd15f9eeef2ee326c7aa589ee1b557c45b4ef955a8afc72fd759
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/331d844c447f564171345009764321a1
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/978349a74fc5498408a5318c87ec6d25c01268b9d21fb85e6bb601243ad0d33be8501b181d1f9ab7663433a740912f5bcb7160caf1011b1a2c84fdd51e0fce78
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8595a49c49e851973fffae7c4062911d
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f707e514843a206b53f380c7bd8d4d8203cc62219344c1234416462dc1cb3d3f8a7452ddfd0f07178d43dfb193b4402a018cc465dc76b43b687fd20fa1ea5222
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/5b4463e81c156dabe3d182c42eb647e1
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/995db577d4a78d62cfcfca3f1fafb333ff26548b41d8aa8d763e4705dcdfe8005e2f68873faba4040599a6d15821a523261d0451d75fdf6e1c5224e8e777a71e
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/d2f9f08cc952c0639f7ef1073c8630d6
+LLD.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b1cab7b813fe0f7c26c55261e8561295cbdf1e812db3844b87605fb527d09855f2bef4a40ddb0a7cd354c7cbb626293d4d4012f33acc242f9af4abe1dbbbeeb7
+LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/e82e3b67a073cfa6b019bf5604eabf2a
+LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/9bb18adf78afa9dfa0054e6511f5750a9e2fa9138aeb1bd83f7a51d37d031e2f3c151463ea8f682dc7130cb98fafae0b84c60d3befe27f9d0d3dc3334ef82420
+LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/56da3cbe81ddff089ccf6b6392a9396c
+LLD.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/2af483a1761022dcad414fa7cec7fb5c6fd54be28185e49539f4824cb0b6acdc1cfa5c78de31268dbdc444201936c5a6d2e04f39ef6f0b9fb184985ba4e3daa2
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/15cbf5eaf89c7b834ee19629387515a5
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/75ce7c398bdfd57af2c09dfc946b024d5a72e90575ed92f28e015e620ca89e421dfc9a391f4a78277c3e06c38dd696d572c5601a2b1866e521dbc2fc5a60da56
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/b895da29b6082cdff6f0324179352fdf
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/e89a97dfd6c345158e3e12cdf97d33c22f849e5438401cf5a3670c0d1cf0252ca03e4c52475a42c3e6c2b2d689c2f53fc5cb7c925a23167ac51fa1a5e01e3d7f
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/7edda2d8c2eaadec2d262ded2456934a
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/0b1d60840d638c0b0269b901a3f5198e18e244da338aef2fb49b474b3601d44a2b4dec13e258909985e363ef8a8749838b01dd195e05a266ca36e6d9f274ef17
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/e26138e3491a053ea9a998dd00ad728b
+LLD.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1215861fa52b1ee21196bbce0e99912b25f887f5734e0c2628ac78c1af5fdf57c4d7cf099cddcd7031a26c60cf141aeea66a0147428008cb485c207e90801835
+LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/a1e786ac775517b8b483bbe3f6571d37
+LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/3937f156fc2fb8eecb13444c71f380753c16b08f29124228808c91ea4258ee2195219c4a9b601d4468cc24bd584403c16175518a620bd94a7dadff868b3771d7
+LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/976d840de14ef6ee2c0a538197fe8f10
+LLD.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/7f58f975dc3d69f502537aca79509bbc3c4f5da2ff8ddb1c7e27180a6bb2123713eb42da61cfabd7a48a31fc464fd74554b34935dfdb3ec095d14ff443f514f3
+LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/ab0295ba327cfa6b9a252b0e7a4b50a5
+LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/7c750916d4157ba0a37cd1277a0f8faf32123dfc626ea76f848a7c567fd889a7801f8402a307c190ab34fc21b156f2a23967abc9972fc103e5847a200ffc7305
+LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/6827f38ed653f33953ff7ae510a517d5
+LLD.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/f01c655f6433ec6808b62872b8fb4c5a2d8e187643c11f0b4f5c06e2302e462353b516f431c1e26ee60b579c0f8c8c6385f018db3011c619745a39f9ef263436
+LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/385cd2715d29de3e85a3ac10bcbc88d8
+LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/5c90e8e583176ed9dd563f794073bb344283284a10e303834b6c5a9b71369f50dfbcbac61400ff70f34f3065279c848dc29086309ad38774e50eca3fdd5f9799
+LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/241978345735e3b57a88918693c0c0db
+LLD.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/916c6a4540ce9a2b2574d92c3aed42171f9e49f776ab97d3e5be84df832d463b7e542529c3ae81e4d6a31d5789d55b96f9559f48c0e4c8be36d70e3ff6f4292f
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/a4f16e809240c1837b90d28930e3f711
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/983201793e0f9e6416bcea23b4a70a5a1a36fbdd72bed2cc60ec267eee441aa3d9c850b4aa3da6a232f3de451089754138ecd5411e5431f632e48c1993513ef9
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/70f47c2be55741f754ffe89e4749dafa
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/f2dcf4f6ce888801e8a14875909f78b46d8ed853a7063a185356c7f21e42e15323d847d9a9d4b020481a7fcec9539d979e4c7f2b083ac1c1bf75a275a200562b
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/becf7c6cc39a98cb722899c94b32ca34
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/84818621307779e27cc149afbf958653049e47a62ca44ff78552878114c2fb0f7c40cc83722394ee8d880a6ddfdec79012235a6ed20bbfd1e5d9e83ed0a0199b
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/0117c05f8dabf41c4628532d59cccd3b
+LLD.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/b276dff2c54fdb6403a461ecf5435978e2cf9c9273934edcf3a31e7f640ecccf37de672f6b0b3f296ddb6a7059b0d95ca6c5bf62d62ca545cc62a69ebb84b8ce
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 905a88f80a2e3..6380397ffb84f 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,119 +1,111 @@
-LLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/533cdc265cf2625457f3655b8589b43f
-LLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/3a0c96b2fc7c16fc33741933654a564574a5059d806a3999f0c0c0af31f99acc5948ef09edb21eae9f6d4362a7968af55781048029a875ea92a981669c6e8cda
-LLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/4fbb96de11f9f64d5bc3f467a36b5584
-LLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/bf08ae144e7fdc7f5055f98ff2a4e8d5b46670db00ed498cd3323e10df86506172ff41aa6f0815259018168bdee40a73775b962c5f0ba8639c28b18d65cbf927
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/73e92eaf551cc50ba8b1072ea5a177d8
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/691090c34cb5fe5217c44f1f7f0a411b733bd8197baab7c5cf2eadedb4a6838bd39935795a7715521c8edcf0e611c6555068b49e17c4b2465201aa1772010bab
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/3f0783b752b25d2d47b557c3504f35fb
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/ffce30b3286ddf0c09328b66876bf3c2f2330ec0adf5bccb4039be3f09cd55acead7c34feb6f9473892338768da4fbc3ee8589197f420d89fcfb2039ff15d889
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/3ec4084b5dcad58981a701fbeaab02e3
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/cf95a368f5a6b9ddcc368ca91631546a92ab374d9da74aa6e2036d61ac788f8348b50465c241853c37f64608bc2d067b96d17990c03ad71ce69032cc012ec433
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/cb4072b14022490456636e0fda20e569
-LLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/1750a2be132a0db76db43b91592c5144ede76c5b205693d5eccc2fd340534fd5d90ab358a8c1af08deab8138e6c82d382e3e95c13ba027b1b92b6f955da1ced5
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/f2f7f1b86007e297c8827d5ab58f5c7d
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/49a9efe8cb1352ae38169a3545ce1cb719d0f7fefc29a24b40fd3d59f99c98483ff33e869e283463f17fb63b883cca792f618296a840eeae82a5855a9dc67e86
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/24c659a871de64c3f8d54e7bea029e84
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/3e4487be7762672a4dfd5f7675945d2b640e81660153036ec2b5cf44fd278266233a94a0cfa337ec11c5b4ad6fd46f80406806bdd3a1f1eb9e3da43184af83d6
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/39412690e1c3da7fcf4416184feea3be
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/142eaaf10bc19b5e786bd2f8edbab31cd5dfd6045e86c6244239fd7288b7556347adbede12cb40fff02da52295edd85c172fe17ea27126246ff4c8fec05b29d2
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/0fa28f8c44961f43899886a6b6b0c0dc
-LLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/9dae70462e8fab0fdb0cd589470bb058569c5640e60bf74e600821344561afbcbf1191e47df9d2117ff5934bf707e57e67fcb9d889e470531505bc18d996b2fa
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/1de340a831cbfcb7d026a77d6f91070e
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/a36758040ca5d84a514b4764dc60c97e4cb8ff7737d1ffeace3b9f0b0c73716ee7202672291d7bf24da03e193b52292b0c2cb74e200b2eb15b3b982c8f67c3ee
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/03042210289cd06ead94a0d84234d99e
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/7b16eff41381880a42c6c13f6241aae4184ebd9a5fd696afad4c030f815a210ef54eb877a4e375d9eaa31e53ba71594174edb4c17e60854034e190a6a6ad084f
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/c2de107822fb76243378e9de06278775
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/e04a608ba0c7ea6bf827aef2f060241c0891908dd495dbdc675db81114f07c7ecaa27c0df630aa1118f56c71b59ec3f81e96e84336cfcf4cfc16464da0871675
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/58766a44a4f74bba6204c20a6a00a10d
-LLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/5776b898b4b988d1fc44a7961fd759646aad17d0f4b5a3544857183ae5e863a1f42e632cbbb7712b95fd418a2c680497ba2c23dc8fc5d6080e25ff94ae289646
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/4a78da7a5b639353e61e47559072e190
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/82dde74099a64b35e94f53765f2748eb65e815a7ccd51a8d288c37ecc306eded95cc4b424812e9e59f247f3f9183c3a1bc7f244ea51f2d1912445db4611c030f
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/c49efd85a0273ad684d13101f4dcfff3
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/af25f90527744970458792697027250f543d8ab1ea068767cd1c240a251492ce33b2211e6850a7cf36f16f6b65ba11ccb799f6bbaa777fc92c51785d0188e101
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/167f874b6eae226e02f32c7ac5859b2d
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/1a3b0b128b5b8fa26509d3b814f03ed1f1a6cfbc0017e5751761d0aa3b3821dfd4165e7687b09ba03d11c29ea533d866bc435e7187c1816405df37f897ae6d2d
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/29f396d657b0e5340443c71d59faf366
-LLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/d43fcdded977428b5938aaa6b6443326cee9b522ceaf5d871c0ef783773e20cd955baf95d0639db7273a8fcccaf17259b05d77a347aa6ac481c446969b436f24
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/c17e06330348f30d3f74f26db2499612
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/ce308261d0be8f4cdf3c639085a38779a214abfe6bfa38626810f9e99c696b133af20a592ccf9a301edd2a05a99195154a76910d8a120178764c8692ec9dc4fa
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/203772b8f6063cf6d8d4a7c249fba457
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/ed7574ab6915db25c0b9675be8ab8db04f71cfd775626cf67142d82a2b32f73ba5e3689108bc10872863bcb6672b2cce3502e1bd941ef602559d7fe2c9d8d4e1
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/6a08b5cec8c3147ba678db787fc4d2e1
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/9e8c77bddb64c0bac8326750e81cecc38d54168e1d7760f69d17a1bab4b4b69305c2a75e03f5f10e40a2b2bdc0f07eb2cd5e48e3f8630722e7a30940091c7a69
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/b018658105b8ff058a1c8aa04654e895
-LLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/a182645261fba4d41e89473fa18510778c236c27ac8bc5db1cebdfc1da2617e9b4b940f08045b057c271d44b9a61caee24f4204e1a98cac2c2f40284f14c3e05
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/7100005784dc8202c966c4d9b0f8b4ff
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/f38e939bb1cdbb4d895cd8300022094e16b1940eaa450b4098c6822126e83389f52235dbbb22fa776930ef508770db074f5f378848057c693ad1690337ae43ca
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/4c8003cb2fac076627ec325340792f5e
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/cdb25936c38077b0b486821158a06a0d182e756cb7567cc9e0b0e696fcb10bc2597c41e7ae6316f4945771ddb18a03864ea2ee6ca93cd1eb737eb365933e3a4a
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/f6e0156ce3a0dd264668aeea0b6acfef
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/01c31900e79786b719d535bb1f57a36e54d56f0690361771ede98f2806fa30f825dcf6d4c176b33d73940c838d8e69440dd49180d3d29954ae02e1525ad05708
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/5ed27717d90e862b22226a11bad4696c
-LLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/47a8e6aadc5e736f4b78ff059c628488a685ad3d97a0ac2b8c5d048b116dd0116514399d66983f3f519e8701ea4a851986b94b17405ab31480f09acbd0edf9c0
-LLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/7d154fd2adb1cba4312fa2ee20d2147c
-LLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/4a26b459baf5ba801ced522b4575c81328c9212cce9dbd1af233931c95c9b6d869e81964778dffb5d376dc4a258adb8f2986c868d9c90f480d6fdc021f252187
-LLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/0deb9f1cb47d683fc4250071bd9490fe
-LLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/2717b2c15b715de323d4a7d914f191e017aaf38224e41554f60c68885b1aad625fa8fa8b3e305e8703e8772407284d03d229cb2d2f9ff219d7dbe5f91366ee9b
-LLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/4d9a0d826ea67ab20769999783641abc
-LLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/7cf33e40f6672703d88e8a0ce656e955c4a7d010b857cef89f7dc56291b1af1003c0dbb5ab32e0285260216b58e30a38cb78da28d1bf08ee66cd7a8218a835c9
-LLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/54736c04b06b3e1f27673c5b552fd8de
-LLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/942f8f4c4191b9ab75e2a03174c0c1241c4c6af06b6f5833fd0c56d57ad195b45374af80089fdb1e2e431f9cbf256a7856ede7e8f76712e0d3189009cae5995b
-LLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/9738446d7d909cfaed0658cb104526b8
-LLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/8c8db654a9d2da00195ec1e1beb89f10447a0a73e8d3e055b456f0f7d8e1dd90d7873ef9da2e2b27528b316b334166f2286755abb33acfc0a9eca06b23a26b0e
-LLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/16134c865661a0f29d9cc693ed3d5510
-LLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/173407929822c567e7841f24040b82d9981c6bf176717df6942d14ad00757871c1d2a81ccc4467abcad59a1d874d611b7cb5f0cff83898a74fed637781ae0a5e
-LLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/d9a7eda0ebfd108c6a1cf435674be3ba
-LLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/381bae57c71c387c4608de3cc8a3477b826461a8df1b98fe06259c4a595066a816e96c6731565ea1c3166377a0d9aff722a483e47c76ba01293d408f2eb3b577
-LLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/0d312cbea5545a03a49dabcf7519191b
-LLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/39c86e52d6298408fee6ab3de6416b710b782ec0810602ec76eb76a87facab57abdc9d8a60be9522c0665766a24ef0af8c83437493b778f028012577188572a5
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/bf5eb915b604825b04ca84b1ec3e9f1d
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/0907a5bfb790c34a8acdd28aeb28ac36a6bec25210b85e2f617f7145ebd80a3d6d4718c633d45411218a5d49545c0adf69c922c19c4674b2db527ce7e7a0d084
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/0972a1aa6efa0accbdb1be9b799aaa6c
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/4bcfd7cabdd5ce119cd848d8838644c8f4ff189e2998b4d3ae69193cc9c64ccffb31d08d66b2f81f86876b19266c6d2c362314f539f0612efb69b6b6df981469
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/df4f0d07cdf26759104686d4f36e2818
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/5983d1e9c1a072045c773dc438da13715faad6e0999fa9a3405821a4922ed8fab666186bf1a8dcc45743e27e5065825df8bc92a06cf3321354aaf022191f35c8
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/216857bad881f6a50678e2079d93f9bc
-LLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/17cec3034d17551eca798b6e6fc355f746ef71ce2337439b55c2f55b63f0f89168cdadfea578d7971bb1f6eb096bee47e94e34f85ae99d88e39d2052d2a51a6a
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/a2b9db6135bafc8f80d275d676859d13
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/bb98b51aa3a3e2f4f58ab6ff0ad36536e4455a602045f811cf30e04e87efc4be4be27b905fc1716b4ed3e2971a5d9b4bd41c438541288ed4240e608adbbbddec
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/a3a189210c2b6e2bd32ad7ee6d353a82
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/3304170ea4c369f24a99e6249401a2ed078693c9e6444a03c65dd033bd539326f0444e0ea71e4d8e84dda9cecefb46b7fba87a302365497115e4359370b5fd76
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/dc0be3ad6e188d471bc1b0f7a07aba35
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/0d9eef5b33ce0bd2bd6d7467d198e2f00f6c31ea0cf116491e368c78882f8437442cc18663d96f72e99fe201041d08e79d61c13b3998fdecffb1a7d6f2843a35
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/2563d77bfb2317192f5cd0a00148b6cc
-LLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/d0572501e91cce51d662a1a6c780adf148f34e0f4a151c1fb7bb55bc064f7f6f29a6423715f9e2332205e50f076a561ca4b0992e834b234a77f7709ab4c92786
-LLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/a096186819d3f06c70d40498aafc5879
-LLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/67c83539c0272d090e1a2221748eacb4fad15350bfc84f7839227d623ed234878752c38f412f0396b1dacae1543dfa9323e184b98cdec3fb9b436aa8a907bce3
-LLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/c5402ce51e61f4aa46dc56942c374746
-LLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/f9072dab2ee52b5d8116cefbc32b023745860af644de867eef658d0fb9308d5868a5a871489c399cd95efcef9075c7a20b877933e9a243454f0819b4b0cf5213
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/ec70c50570a56b50df05b140f320c475
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/df598d58bb083634b298edc0e4e9006ebfe76029206fda10a58481be1872ea42ee441ebd3c36dd59490c66e89d9db0f610799be4b5d4c96dc315099e2f19728c
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/4688e7cb1c73e5957e0ecd0cc14ed53e
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/b825067f87d3bebf6e50a472ca6629cce7272579d473e36231bb2b765e509d4fd23cb899ad14489ace12f5ba8531089392d5fb9f3541351b162664eb63ab1390
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/d9762bedfee132f62012b31c3cc4719b
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/29a2f8b5e4084d1c10aa15ab7d25812508233cc53c1dedac89d5951bf6488641461507fd769be6e4449fe435c17e933c6a295b00093f158dac97b92b448cb149
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/c68eaa7c015201a8292e1f1d8cc65fd6
-LLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/ccda3083ec0246969824d8c5cfdcb965585fcd1d38306ea160024259e54a433e421d058b6ac2a924f091e0042010ee0512e51af928a6b0762bda0cdb7f99f120
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/853391923e6372c3ec18ff5a44c338aa
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/166189bc0b81ca270bb017e68cdb05d4c9d1d1664bd9fd24b9bc49e14dc1d811fc6565958628a062b509f8784d42632603de31df1d4bf1b1e9ef9ab9c5656122
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/2907097b73dcc8d8999b1df921c4b75b
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/6574d330914a1535b6e1be83f889c6a2cdb474e83ddf00315662a146f1e29657bddcbbf261315446f749c9859d8fc496be084f3dc56572367b0ad8d25f09f06c
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/33e00eb48fba5be418d76f1c1d0ace78
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/10908824a5dda3e69aedd03d0c7695379f465b284b78681d6f8419e7304702ede9c721ae0b54169716abbed429db199450e3fba5b0e5d56e21867defd9573cc1
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/6f8bc1a92fe8f3e85991739fdefaf1a8
-LLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/4ff992b094a6d03256b4eaeebbcbd023a22b54b49976471c16ded0542e1a79e46da43cf0346d54760cd5d18e9b3f108f42f3caa37593a6c1037bcdb4d4461923
-LLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/a3b82c7e8e9af0e7431d7b3a6b3e62a2
-LLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/8fc9c0d2ae985d4da89734aa4b49fb368d245819c1fd4a345baf354d58a4a0b85d6390e1d6979b5ae757e29fdff58579cb7ab6388c596d9923e80d34fac4766d
-LLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/9bc7eb74f530e71a3d2dca02a200363d
-LLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/3beb680621d3862f18441471cb9318d38da108bb7114423475ca67d3e8998652e4046bf7ffa40692dbb63f377c506e41d3f6c621bc3b1f88366ff0fc6cefe59a
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/ba22b4e204e585ff18c3cb57b8e2c87d
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/037bcf1c9e0fe5faf40c4c5f8a06b9f90fd8ea36d3649f4faa6927df8615819a2231b4393963a8f29070b0dcef6e755106b12f9cdb2a9a61610dab35fa5aa4bb
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/e33235975b1a6ec8f69d40ae91d0e4ef
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/1ccbddc51c6d9df883ddb75257fc42ed95c8b3d3fc99d6bbe9aba508e142865bf96678272719f60cb28a3b6f49adf68d390ec50abce47b139e6f7db653537ef0
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/a620552217c5b3b5318be75a3ebe31fe
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/5703664ffe0587035cc12de3bace722e7c93cb920810a36beab49d456ddc6d285abab70172f95a83e952f5c5254dbe4825e465d2efc905c6798d7c4cb258ebea
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/4d90ccd98213c482f202034d16442be3
-LLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/ba41bc5f229e61a87f517f552fce604ef4fce17523b6b1b856ae7aeba4827f114a0eea73bf05262fd58604fad3e746c8aa54e9fb87cd97aafa50cd9d3396126b
+LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/f18fa63ec97c79f3773af2bba51f69c6
+LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/4ee1c3e746177296fbe976976c58b6ca09dec22943ac1e63008aeed94f46619e4e60d8278566e74f4912fa9d3aa21c8b03ae2bee360db54c7dcdfa2381469148
+LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/f482e543971546cd59d946cc33d79d5a
+LLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/d026b746f419e9bcc04daea60b1e66e26d4132e7a551b0f14c95ea95dc9a0f4e645110d8cd5b91b92bce7775ababb715747a2e4a09c0920787e2f25ef1bfbf19
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/5d12f50225285b180274cc89c21e7c44
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/3947f0d909444716a29c26a0645288e0f02ab19e6fa6ac0104c5ffc9659f01337198a5914beca2ccea7c98c9aeb12fc537891d440766054c0b9d3bbc40e24165
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/e555476d3324996897cb0845ca22312b
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/a809d8c455d6f72c2bfc2517ab375d6ce329880ae33c5c1bf575dfd599d6132e38df35fac4300a0e72726ca33ae1db69ae67f5fb03d5c617eb34f7ad20f09b8d
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/6432ac27166a0ebb550c7b000c27e2da
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/be6440412d46252292e6d907f04193ed3f438b06419d0fb8b067a7cd89e5cd2dd9143af4605de9a2a697ec2745efbdaf6021d065346041fec3b86051de42a26b
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/0bfd05e6bd23c92b73751a86826b288e
+LLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/68c08b2624bd0d38c7cfaa8b61b7e1ed70c7a106dda814f146a3f5796cbd42f476ef19f726d3ce368d89e624c7a3fa7f07829c171d79581f3cf565dba28c27de
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/53a9db6445352b44717f7e0f81d896b2
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/ae34208c128f1d4468d8a25b060bd1904f36a73dd0029606394061843f90aa26f9c3071e8281e76dbc10fcfd103f04602fde370a0cb04d435fe2f7a230989cb2
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/f7320272ec2f3cc86a742a8ce3b4cec2
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/612f03f49b04fce2a21e3e0242c3ae591ccdf6398e31aaa63956c40fb805d4a060da8acd6e5ca1d1c0a7b1f994105ad74b1acf78490e31a149368c8a9c96c026
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/db7b7a03c047a6aa7b599cafbf6023c0
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/05474495e73c56a8bf8a2459e705198a6c6e32df5b83ab153f1080a763d2f7d79dbe014592e12f0f3063b30bb0641dcfbf4f161ed988c777c8955ce9bdb89cbe
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/88255189a80045bb410da1eee3c277e2
+LLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/b944ed004867d6bcf48dbc089d6ee5904318d6a2ab3a7dac3c802cb7646d4df21950a6e4bcd5bc57bbea872f99f39ef9e174dde8dfa4f5518f23a1fa0e8cf959
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/a25160098b55d2ec00cde15d088343f9
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/2e84a0b52a4852a69155aa4cdf33366b863caba7ced42db573e401a64c0fd2acd1d27446a3ad0ff94740a5fc4c579e745802bc32f925bb505177afdc64fb85eb
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/10b225be9d25681a36fbffdb5f3e315f
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/6c38d87c8aa321fa08ff9880bb27cedda1806bf6aece891f08f757e6276dd37e450a899c4fca587bb693f683f9ad0d85f388e7c4ec4a76c96e73f0f26ff6766a
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/320b77cc43b91549ae0e6b538ff53f7b
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6b297c643530c06be5ef1d8dc2fd47abbfaa3a7862ba42ee9e4cff1361e54aa7ce77d4d9d7f5d2db38a3c780cd38a472eba1308e1f50aba74d3de3bf188fe91a
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c3e0fe843bfcbe0c03a563bd40a16f0d
+LLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/b62c3d8867594e34b1eb0c16f1db609c4b43146deceeabc23d4ee9af2046d8b2ae1a8566e2613a69691646d1991017f0a7d37ba8636a395d471f8f385a478479
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/be03ae93d0825f335411a4039905052a
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/9e0159681e8ecfe477d3099314ccf2986eb2a8325cee274b6ab35e04ee9e89ea61356e5082d9adab6c41b8be98d0171e41642afca283ec59ed91267e66223c6e
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/9e244718d094dd6b2cdc50be77a284af
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/705668d6b44bc754fff8f28246d8359773f29888c1f6ead6a5f1e10386c88572de27d4d47b8a1bb160211c07fcde2667833615c31ae445d1929229d981e36e3c
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/12162558c4c89913f0486f3a4c969c8f
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/dc6a48cdc9a04b3f0938784d5d40d0b453bf438881895c78a0cad9ebd83090cd9f1d12fc00df6538d053b2943a590a3217a8309aa0912fb3615d728280979276
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/e5012844af1fd76d6cf92ff0921a9f24
+LLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/436ace73907097593bd060ff5674db2e36f7a6e4081033b078554b76244ba0d2caea30dd94a49fb62c96f2a1c3e1f190de440bd2bb9242c1206f4794b65b30a8
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/9ee929acc7c52d18a7c42808761ae233
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/12f07258d295245f2b53414d0df0144c547c60b090354b5548f50bb704a82e1623e55ad353eec233407f1840a50d423d1404fc3e7b87f2386863189e7f886813
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/c94a2e1f4bc031a7c663111babb0f8fd
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/4c82406d8df72312798d95ac0d038b38eb332b4f4f8a586bca7103bdbf7759365daccb6f3bdef9a9c74a06d04a12e96c01ac9fd03aa38f3c586a7ef3c7ec7e8c
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/e038b8feabb2e60b866756a8dc7a5947
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/c3e03bff11db87c7f131dbf7163b414cac91556795e4c5c340bec52409c39f7e91c26cb34a6339c10610d0906f57a209d36f6cfd458b26d24ffca9a43d259f5a
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/b3bf4ff216946ad38ac6be230e0865e6
+LLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/0daba831dda378b2add9607fdc0d32046c0390a0a63758a6cdd9c0b90f660559cad0e71c5ee0b1c4264f3427e523a8c615bb87ebdfb63a65b983acfcb8df43e1
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/33a3c56ab597e6f2c2863842f0103e53
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/fb15d07a66b1f56b73625ead591f90b57a843cd9cb140e5158159a5f7c9249437678c61d0e19a11a65a536776dad37abd6be34ee0ec5dec7c0736079a0fcc7e6
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/7488ef289e45e6c44753a42dc51aad7c
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/6ecd62f0756a1941c8df92605a7edf9fc2e70957f39ae407e5b1b49977301ac6e82d55bcb856668135c27f1a75d156f3dfe7a27c47c6a3594c2c9f032af8ef19
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5a286dd05b936c0a3ab61722531ef5ee
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/80016717959246708eec8588fd6bb5cb4894bf05c2d78cd1641e31cb43f38c0fda866283dabf1d999c77d030b70b89363e2346bd9b9310a2999623e47b2e4e7f
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/b62420d31c65fd8720427900b72f9aa4
+LLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/f63f62a667f6f2c6ea76db2b142d58cad3165a426fd420348f0831d447a9eacfda5ec9c006e05f60c1f2804e8b25e87369e754a0bace28257035a63a1ea23a76
+LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/ea922c8edae65c855e40f6ff924c35d7
+LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/d83a3737058da3c2427c061cac83ad910c43368e47bd1f9ff86c21ef0b40669946b128bd1063a8fcb081563ecf606d70a783a0747ec951c3443077b3ec8e93f8
+LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/7a20fc23311317b85127fa033cb69059
+LLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/36d51f84dcb3c76556b6ee677a4f0fde1610df30a7030d1799fe9681c27e04faf1ecb4b5731db9a58060879076c037e3e5bab65faecc527296b439743bdd7d86
+LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bf6859a7e73fb51bf91e2c7ce5b879e9
+LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/39aa6b1e2923aa572458cba58a328bf6ac0efd5f11974e04343d65cbb56fc5804066f7cedb1e9c58252313f94ee0487d6855a1714adebb3b71fd6c783a01018b
+LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/10c32deaee824ed7a19dca9055a138ae
+LLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b9b14c9ddc2b0b07c07a53bbd3b711737d1a7d71626d3c34812bc3862145865205e5da07b052e119aeaf54fb97968b27e86450d768312623a7a87c6b8179d872
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/caa574701f180bf4dc323ecb441fa53d
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/1c97d2311111f4411c3eedc6f1338a8c899932e7fc3490a03c0c9b2bc4c9a52d5797c50339ec7105d60edca951fc57c6f11bc7198c8e1c96334147d2b2dc670c
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/f46c39e2f848fb5fbc9f1eed7fa695af
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/ed5bfd8057b2d6d543c4a11f0c1c6502dc7aafd07d0c5a96ca2b1d0c5194093f20f995ee38a4a25cc0291b31c682c6dcee460f9fb657b90be5afd43258ce4c43
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/06533f3ac22a8a9be2501b6708821806
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/5284308b46ab1d8ed896e0425fae4288f87a640707c8cd5f298520cb19cea8d6311b0e6d21d5ed016f6d87f47b93d92d371abfe9bf1810b357972b7c9b437811
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/f75c2acc329a9ee041ff2c81aa93b4ed
+LLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/6ec83776bac9e2cf2cbf3f890412a940c9507ba06eb50b6a05148c9c336775168cd5b6ec4aa1aa148703e6724c414830e54c3ae075e4d3649280ada705ce9816
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/7e2ea1a3e9c61976b446cbceadf33193
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/b21830528362115476cec7f32b11f3c1541a5779027c824882fdc00b248ea0f0ca8d08ebd86b938c10fc80a7f7930d86e2cd4482fdce33822613128eb250884c
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/d77b1c5ec7cb8bd02ccd24347e2e620a
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/84ddacf1c222bb5d895939ba4aab80dc6b5c5c596a36fcc2869a87d639d006a156750f04d268b6c10b47d286cf3bb5e8c20804174fc93881383f2512833ad7cc
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/26f634aff16b5c8cff48b0183f3f8ddd
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/cc3619c9c8adf322bb334a6b2c9de1ad088a17f117bcb9aae5b51a4f7613a50391c3478b7f892e9dcdb802067de69b098ba7d61edc9979b8f960028af0fa172b
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/891a1f113e7f3f8dfa56f5f28e1c8176
+LLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/9b6a4a26c8f83764f892f7caf5f09a5453ab6e89c742ae4cb1e831a0711104d131d8fe0d9a8cbdd384b2d881edb3d9026af804f47f5f79d62da1d51dad4ec0e0
+LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/7dbc009fb3ef6ba400baaafa733afb54
+LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/c279c4be6a5e131b567625173b33e1f51a56c53eb0740895c1afc8b6824a00d4331df76bae9960c2143f7bfc2a9758dcbc7898fb49ef4aab56df6bba7030d636
+LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/007fdc357a995d68a01fb45d52a92da9
+LLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/2bf2752f654db140822f4ed74494bcdddb85f4040ae24a753ed9c77efa29d2f50397719fa20de031325823004a66ddc1c00c9624887289c8020d6627ffd21f5a
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/fb17aeedc48fb6a24f0aa2d078ceb2f3
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/bd622d2472f85ac5b0cb255a929413ae3b30ee06ec7204148072dc1f9da7bf451b07960f4905a66d2673db9926797e4bc33b262cff656e7bf4cbcfd132b49868
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/eceea244f8fdaf61c6addac8b8f57319
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/44ab4a30ff65685a121dc54c2de55de441fad95b02f54cb359ad44fb298adbf48fd7651ce871fecb40b08d95e1ca701ad4c857f975a37a5e5a42280dab6fc670
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/b09f19c4940f6fa12ea8b5076501e297
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/a52da2ace1f0f2ce0090a582a267fcba526c86a88be3d8e55020ea07e00a1cbb0323f8b8b0205c9417982774fcc05d667b8330f7676dd40c869f374130dcc50c
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/a365e7fd610b6f6ad2dda2d94a141b4b
+LLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/5242fa37a93dfd99720f9c4966b4f9ac164987cb8de136c01b3474860c6229538e73db7727a6c7c832f651ce7ccb97dba0082cd66da2fe812dbc8ecd44fe2cf8
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/6645a6254d82bf854e50e47d671b192e
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/d330eb15c34e13cad0eeb046e2e27f10eaefcf1d6cb68bc4d55668b55e3c00cfa07bccfb4292647a737ffc69bdf4070cf5a8bb1cb7f6319a1caf0faddde7aafe
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4073ae0cc33b7f803644a272cd0730d2
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/2ea897e3ed3688e2ae45918db51c5a1273afabf46d01a6a27739ac951803645861c549fd438b48dcda05294a4d87b6c39778d42e916301277a0bfc1d9ce53979
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/e223954ddf9e11830cbab24e4ed435c9
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/fb88bfc543ccae5cc9ef737e81757a8f7f61d1a2816501d569456fa62bd8ce30ae57b837ed32dd6d2a7c55fdc26c2c1b1a9965968f784eb3c01680f25ee5bd8e
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/356d2f3008be6e04843a278d182834ff
+LLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/ae5b30925cce41593a34cf2e76b606e978c352f2bc915d8869b01600c8a81547ad392fc900766db2ade06355c2d95aa473bd51dd3d45f6bf20289d9cdfbb126a
+LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/c31804464c51d1967e73f491948e2763
+LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/84ab795067bbe71390f15b2d700ff9e0c4fc124c3d111bdd141643242cf6dd7d3317a92d9c97ef5129ef089cfa3d703abc2b12c6a9d2287c90a9ad58a4de8478
+LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/9f205efa80dbc9d43560830c668659b9
+LLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/54548970bc7b3988142c1a5c2be36f877c4d2cbdb3a58dba71acd7bb32b20cab2ab12c82619abeb6b3bde9a95fb66942e08104df0fb0f59d2ead7eda957b783d
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/ab175b04b9c8dc73f2c06c06bd9d6915
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c28bb2033ce2fe182f6a5a29e34a6ce4cdd22e994245f7122c4efb39cedd491c9d4343d8ba2aa8062eac156ad36d9f54605e6832feadce3c6e9f66e9ed7c760f
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/7e4dedc77bdcd6853d613d8b0e3e9af0
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/e09c451cf018548bb388f9a0b419496a6c6540cdf1e204be391391b3a5645c2198562c2f995c3ae30f775c786e9e59e8b93c0fbb5d00fc9ebf1529dbca9c568a
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/0835b50b6cd53b4d1fd894f27b3e072a
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/8d228561f66feaaa96cf0af71421032f6c241e8a8ce3b8352771072d7bdd972e1b6270e15b0a4f5f4b76764cbd65ec371626cabe8607294041679fe9b6bac5f4
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/bb61fbd156bb0a70184f6f425ba770a5
+LLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/ec310cab20f39facaa6c0b3a8badded0e4ffbd7bbc1fea6b3e67717046bfe6932a94cf562d3e35dba5052d5cfe62c540c6a38477452e535da52e650fe5dd4d6c
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
 LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
@@ -146,123 +138,115 @@ LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8
 LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-libLLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/6c5ee2a38d4ea9eedb10ddb182f99e1b
-libLLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/471559396c160f7c69e39aa027955eeaa585382feccec79efe63e89d63ca2af0008d20fcd73b67444fca8ae17d48f9324f0d5d207eb1b3661f9562f7aeb4534a
-libLLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/12d98767b737c33206b8f06923932f7f
-libLLVM.v14.0.6+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/08babffaa18e40c0df47d852bc063f6a1bd77ba0a3a73e77e4717c71ddd255ca6ed29924fb78fd61bfed64f29b343818d27da44f43400eb83da391d863473533
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/c879bdb130e8a068a8474969ca7f23d7
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/2fa94870a8e72222a72491efa584014d06185ee5c9ff7aef75ae02be246cc438f124020cbe6820023ba5cc823fa60188e054c171cfb80d240db7e0414c63c3f5
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/bd13bcfb024f72a2df65afc61a305862
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/999f6199577362a9e607265cab518d8b0b0a9455e6bd7ef4fc80d77f57d81e6cca8ff3f6651eb3b8541d451297c9d85e38a09cb1bfb2960f1d2ffdeda4f657f7
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/717afe277d8447cc8c9c354d31541ea0
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/5ce37884f45f128fcf814b54b5b7d8cfc0ef2f4ab5dd38cf6bb8acad3d9accd568cdbcfe32f445890a11ddad4614c57e88a9d6c39787f4ee0738c781637811d8
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/dbbd454cc983cfee2fbfd7861117ed4f
-libLLVM.v14.0.6+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/81ea01a7f4d4da96e10abf027e8a2baa2ff8086bf923a9bac82af02416deb543c3528692bd8f470e137669ca58ad05c2224243afca8213cdcf794bb65ed0b452
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/0b9e93cfc2e6a72aa3e7f1dee03c831e
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/f796a029452a6af689b06d0a7b2b746d49d4a95a0edb6b064baa41614c2f16f843cd613f29ced45f1e42d4c600b5ebc435f564adb2ac52632abb397b97517189
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/2b259c04529102c161910fcc38ac79ea
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/26f97b72b63bd93f71fc1b4dc56cbd66e05f6633bb35f4f033b10a150773e6550127a13bf49a67cc0492b6140ebf01c02254678eb4af9e2832f2c757ba89b7c2
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/89e0c9030216fc05b932326ca1d065ec
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/b35b869afff05ecad66e102b1374b9e51a05284684d01a80259a08496bcd1b0d208b4015e64bb55c24f105bcbae0eaeadf188d76daac9bf0800e446782230ff4
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/f61852a29c7f30562b6b9cb660fbb968
-libLLVM.v14.0.6+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/dd7adb9b8b7a7c8a667a3e021032ef601b459f6afff198853dead00879008d24cc67f6785d6ce1ff284ad9c7279e77e817613af40aef86fa1bb4a57c20006a36
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/91c9fc7bfec64c0309c1f3b7126bba76
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/d3b03c50a3d6b56b333efca03f7ba0f4633d2a12acf4685fc30cfcedf3d0483e34784aa1715973ace6ed12e0c2bf1a35645b931afa34adfd75f04959509d9218
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/8a4893e1a088c02f1af8171dbf1e8be9
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/606861d316759450060eaf81e924b8805494e0303cc8ce8e5b03e09e5c09294ceec274edaacb628eec1ac614ed68f64983b574841f56878b9e308d231ef363c5
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/1ed600f5af67def3fadac97fb008ad83
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/12b17d567f426260b6d9dc5c352a609d93a2c81c94287163d67628b3b227410920836a01292718052929f22028cc737cbe7885e430164e5b9bad7aa5fe048d46
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/7ef417149337a8ef1819dbbaf9ce0d67
-libLLVM.v14.0.6+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/031ceb1b9928429674f9c28899c6a0b66eb6f517af0760227587101ba0645e31a772619d8145301a10402784763c07a20046a7e455c4b912e3788e192017cf3b
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/abc563cdf8e7cd16000b0ee872b8aaab
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/45159941d6c9c7fd4e392e500f96dd7ee74fbda9dd29026463bae7d542bb3eb71ea8c4fca49c1738effc1439e54c166fb72962f3d96e351e811efa7aa1770a7f
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/24f6b603f9a4ceb4939a662d212249fd
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/3ebb6c078939191acd90560d1cdc3ba35d7c1d5b77ea699acb9a739b99fe8a2b832af3f9c98337ce435fca31dc7f267cb287a48ef12ca793fec4ffd1ff98e5f2
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/2cbdd592ab2d5a9ee5ccc68f730ef783
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/f976047b12cc6f0c09d6996f1986dd03014ae2848ef8287a9889bbc69fbb1e16118af682f83d1f33548ffbaddb6edf557f8b49639d4e63b8d0782dcfebde4420
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/f627500b3b0340b3510c105e5c26bdd1
-libLLVM.v14.0.6+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/3e60bd5d43996ddba4a8bab4e03f3a29370e3bfe147edd61bc26f82b5019e464e8131c20d336be104dfd067e80955f7fbf610e79550394011803b4a941628edb
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/0eed22f302a580a21105b6111ece2760
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/e14ac2bf1b5087b904e453ab18bc5750f6a8b17a0e247b4e791bea967b288cd5890af748608ef4dfe74a6fbc588841c4c8c7b58587ba6088cff737f19b15af0b
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/4f6f3bded0d4fde726cd2e8e90efcb31
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/a112057ae5b44373101a744ac6d65b4561f76435d7990961f0df692f4b44b792811a96df6d6307dd0abc3c35856ae456ca8f1fabfcc564d8f3c0e812f2793940
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/e922fe5db388a4493ea2c19bfb468931
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/9fa3b3956ac18d1735fc6b78c874415405d808754e96da3bd1a4e43cee7c0f7e6a65fc982f4868c47caf155d4f0f9df5dfee46bdddc6769b641c046d9fdd88af
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/3f1d64de6acd05aaa93efb2c36fa0d6e
-libLLVM.v14.0.6+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/fbf7e4d53bd052eee794de903cf17bd3abd570a0a4a98d14a5fcbe4fd2bc121a7ebcf04110a9c1c7907c61ef13b0d972ef085b7c5a294cd9613c23ac14079b1f
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/9ef224e893cfef52494dc43787963aaa
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/8b8678c14cafe937f3cd4619486c96fee651503571f552d77da9be49f77530d9df98db1863b3970ab3f400b5ca76df538887c2488ba4f6876b0f9853f3edb5ff
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/307a2e7c5689ed7fa05aac413e932aaa
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/8ef7ff87f11c3e6f0daabeab8e4b15e47bbfe3296b7d99aa15a4f2debca7527c5f8dec193bde4d96e0b979bf7438a75c4a6e8faed23acf08debac0ede679f493
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/881fd88344cf429e78519c48794e2118
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/2dae579a7c90d3528aaa30892c0f6f0d7c82c16eaf011bb460602cd80f7b806d114c3417664635919b28ee9c15ef4035cdefed047348472fe8d2190db4af41a1
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/ab01cffaabf4c788355e7cb25b51af45
-libLLVM.v14.0.6+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/c2cd6a426728a5b0dbdcdf7f67e4621aab0f89da1187bc9447d4e5a7cc6c716b649fc1dc957ab3fcc82d2287712ce9d1c1116dea85e0a9324909a68c12484e0c
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/ded7afb1e8c5be1189adcbf84a169475
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/7013d0d414917cd38144afd7ea90d1718a9084d1b8702bb911060a24a106e3fb9e584094f541aff71dea4961b25698d15c1f9515cb44b137ce855defa5e47197
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/abb91475af33c6a97948d4af2693d2e7
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/64b8ae8f399bac84ac092e20597767c79e3427e09b8a8ed8d3292c4d1a233bdef00d2b27b578d1192850b72480e8e9e9fe025cca8aae54208122b492cce1cf48
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/df2c50462c36e9e3647cd6ac98f4f395
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/bbd20df4071e221417724091527a54dede73490af03357b79db7a63c2d925a7b2126dd967eff4bec14b27ebe263f9d88d212eed82d7260f693c67ddf0839cfb2
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/3844c7dd6f9d2e033bb5b98744a23213
-libLLVM.v14.0.6+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/380d56a1b872070a79c37c4d02948a008312a6ce262a38a94206d5b4209d9ee07cddc2536adb8c6dd109e4554ba16c490c96dae8307a1f7699d562b0d686d333
-libLLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/294cae70e45a6a28d31dd524ca950976
-libLLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/79cadf16603e388d9d0f9f925403ff7c34a04acdbd02f33dd4e823249f3dd02b6e37edfc247088220b8b333e3a8fd64c4ee267cff9d073c231109ea51514407e
-libLLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/fff69236c97a34f0fe9d50ed93b82fc3
-libLLVM.v14.0.6+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/4f7b03836bae2767ff910c5c7c79dccaae1c8478de597fb81e2b686f6e7dd195acf2140e3be72cae77509be9f25092fe8c19bd64af041469e45cf948a0baeff7
-libLLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/527c35f304ab42a39df7e1fcecec26f3
-libLLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/7ed37d57cf4f61bc29d7eec25579b17f449c734e658ce779fea923ccf997732b4d07c9d8bc68962fa42c0f66d739b8a9abafe0c5efb940e36c4bcf2bf6a1f0da
-libLLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/504a55204bb0f8b57debd377788aab7d
-libLLVM.v14.0.6+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/03c3bd280c77356d168fcf5837dbe984f1be225a3576032de76dde1d9bb151e3b63efbd35542dff315999b1113c74ae466cc8d7e52ce12cb5d195b4bd24fca2a
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/3a6e6fa8ad2ea6665b184ecfb8e3f8d9
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/21131866e0c4355e9de01c32415b337babf113386c293e55191070bf5b08d64c7cf00d15407e78a01f3a25d557c208774df99d46c9a858e35ce27c5609bf30c8
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/ab3b323eee3d22829a74d966ec464196
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/cc297c791c7367756da76b2f0aa4b272a48d4bbd563f50b6e9d9f6c741b1a060bd3d1637838233a67dd12976d27b1d2e9a041cbdbcc42a23f7ca5da73e38db3d
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/0e8c5a7d07c21fa9070e3c9fdeade6ad
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/3306216a8edb2c91f7bec2fa65737e264fecbb5fa9b6431b493e5e42c9637f52d43018c3126e53d9963c88fc095de26b258e50e3f0a9ca5dd68d1530368e3776
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/81922f4a88c94409b117e6fe1f8e9832
-libLLVM.v14.0.6+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/265aa30a53a68432bc254164598fba8fcd4909b07cfb2a8e80a6d3a71434d1851728329354c8c6b1a5da91559ca0da9718d49b711fb94ec9b974ea5efd97cc3d
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/cd1ce9719cbcaca2ddf6bec9cb34c2eb
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/849bce21013282edd2e32e4b6ab98f8586c2d986656498d6efd0092d4db9358a05f2a33c2200c17b1fb6cff2714318f7f322a5cf1ecf9a16c6bac5cac3517627
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/0d44242a7516c2808eca52cb54b5d01b
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/3cea355f8d754cc712f16ad87f24f637381313f18963c8382cc79b3cf0237c4829f9c7d498d57d28c7aef5106ae7dafdfafabe90351ffb307adeb01e43bcf722
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/7b93f9f33beee190dbaa71508ef4d562
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/55eeba37a5b0724cbe813d5d9ec8e3b61f988c953896cc767866059c8d01e77163526935f16a8e30c6dde17999b2d6ea4088980ac118f6720f4bad8183debfcc
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/24b8b6e559ea15a6bf90c39e2040f852
-libLLVM.v14.0.6+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/180307a7e17edd2ce4758595afa784cccdfc2b09d9a1c601a69ca3a0ac1be420bc1990b8773209410e8da5c5fc81bc3a2e54687898a6d3ef0d496a27a8b27222
-libLLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/a74e1d380cde9950179a4b6a8e28ca61
-libLLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/8f8ff44ed42b5315949d076b73abe91fdf958fd82a337dd15dd71182038e1643337159652e5fd911323af21a4668e46d3a401d85774f6d33fac982945d77022f
-libLLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/85b65eef488ec075920048bfa6d9a7a1
-libLLVM.v14.0.6+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/cf865396678dc95b9d514c286fdbe85ea2a74715f5888e4392250770eb9556e354ecd9e52fc28864df88f87e06b8b39c6b403eda2bf9efd46d205f4c982e1551
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/411a28efef112b63a7e2cc5a7fa79432
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/7d5eb694dd50f9f02d2cf0b2787c66f6b824c3928616759509326c7d85578f984d29ca888d3366cec9584466fcee063f236dcf0a353df280f7abb79352930f96
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/1fa0b4eefa8a57c858dbd9788d222741
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/8c1b075ea01b661da518e309d521636c8e0dbeeaad688e9220db8b22965172050b9a0edb3b1098c3191873a516d03ab86b495550933ac680300ec0b42d3f31b3
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/b4f68d0b04db7c8021a180fe1df14768
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/5c43368be28fc7cfa14ba03cb76b3a30f854e2c34d6b2ba3b9d7887dd2f0e4944f16b6380617bf080fc7bd760629b87f1292b49c07c684bfaf33ff9a48ba22ce
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/e3b764196550adc33db2c15a74402dc4
-libLLVM.v14.0.6+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/1822f35862887d00c162a8fc39e70d9fbf73ff6f2fd5bed44b678a1f983bf20f7a11d524e7bdbd3774d919392b061d1f980dcc12b306fc95cd456e984e81d2ca
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/2d7837e11d7e65ca30878c25b38ff171
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/62be4f20d35aa6efa2d52858d66150205f2b5fc1fc5faa2a49a48b27e78fd1587ed4b62123cdb25486d6a6f87951e628a45356df4263b7bdee820c850b727fe2
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/65d72c92bbbedb202c5e26fb4bfab6be
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/2ec91bc32d89d390d104ff248440435df9cc73d7df79892f93b800feede8bb8d43f2a3e6d4682d72850b038ca75a256d24c7e9e34a751288f89cf5a8d69dcba9
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/f2eb226ef29f4ab2fa303f65253dac66
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/61e2a4d02d113ea9df863a268268d3bdea5a9c9f481b9d4ae6c96a553e7516fdfb23896d4c17bbcfef931689d67daca61ef53f307713f3a583988420dc839af5
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/d8cd2760cb5a2a332488a6d8170ce82b
-libLLVM.v14.0.6+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/a11764fb22d64831ce97315877d10c760697b0aa8fd667c6f3763038037fbe220285db20c981aa63f028b4dd13a8b0b57b32be6c792c1afa93dc531aff297621
-libLLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/33343f2e7fa1169eef570305a4d8837f
-libLLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/49b8189e8cac264f0d7cae54d78e65913f4dc047cc51f074a557073662203a96d15ef64452afb8069018f523bafd724951531d6b03c9034cdf16d359eeb9a850
-libLLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/3d211319807cdbfb6e405f47ec2a1d42
-libLLVM.v14.0.6+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/0173f80d5d6912543b2c3932a47d667449a579aed7f2b291f49bba6dcd0b680705a8f10be6175517fa4e8aecf2cfd027ef15d526bae76c99893f7203b7cf620a
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/9018ceb966b33afecd3f9440e75393f9
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/4bdc0e93f7d6be18c3522acbb016dc2e770d96be60a665f118866263366f1d6bc7702046d65e962d063b41b0d24f5a4fd0d4cfa5c4a9758052538e1404801708
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/f15cfe02c92f22187b71d5f8508a1bca
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/b73f4932df6c36b0e943d6c6048f7a238aa1d28347ee97b2a7daab622d173c23fbf452a026bdbb26eda102f99cd96a3d09a751a462f201d207dcffdafc4be429
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/dfb780878726fc582a56ff433c27818e
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/59b7a0ecb36288149d19d72e79ab4cb27eba5e873617ca4ae020f281a764345aa9a9226c51ad6dbf8e5de3735ef73cbdd6a0447f7d7c58850fafba3c675695af
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/49fb02433cb908da55ab0413eb91b0ab
-libLLVM.v14.0.6+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/f411737f06e3927b199a855802bb840d33471f7d9a011d49fb299916e313ddba10f635ce58080787345f387a9dddd18b368e9f45a233e5ff84426d101434e298
-llvm-julia-14.0.6-0.tar.gz/md5/b262d8da6940024a9f0f717d26023f31
-llvm-julia-14.0.6-0.tar.gz/sha512/19af997a93dee55fd7d53c73d85c29f479ec81343266a81a81fef5321c88455eb3a315c03664f1d9763f2cb3f286f202d77629cf528b3f7ae77d369dc3d2c8a4
+libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/c1bfb47e9a53cc612fe98505788e1838
+libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/f16c9f1faa3e959d486fbb109add976c2a2018597a0b053ac3168abad074ff9c2b23874f8969f0a71c6551c8092082938bcc35ad846913a0a9965dd27d6dc876
+libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/cbe0859ffa50e2de82b8fe86c2540f6f
+libLLVM.v15.0.7+5.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/e864e7d62eb3b62066fe14210c43b79dfab704f04381ba29fcfc2a2e2b839e8db2ad3f61bb257b64cb6a546cc45e95195089e8b734425d9d4afa3168211f6762
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/352f8869f53096a566b387b885a74918
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/67dc69f8b327791ab77d4082208653ca74ce2cc750d9cba833cadf4d0f311dba73dbc951d0ce088a66b06321f7addda34bd5705a6c38d4d901b5813b2d1bd37b
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/220916b081fea2190e372df195daf13f
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/28bc05009335d61bfec33f24c89e67412f13760de72ea9acf7a12b2abf6d89cc3f3067fddb4ce598031b054b33efcf6773b4057d5adad830ab15c88fdbe56955
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2774e9f2922e087d06e0976076d3ecf3
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/2aacbce77120fa9d24fd4026220e610b70c08b36175dee70f718f4d023b0ced9f8ae9dd2d58e35b61db7ca77ae337ed6f2da6a0de70296b4160a3f8e99ecdf67
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/63801b5fa51c2e75abd4b46f4ab1046c
+libLLVM.v15.0.7+5.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/eec9642a9c000d1aa3d298382a5b7c66caa81714665c7a405b416818f2e7a0cf1bedb81bc2a650452424391fe57061c33c2559abfc55bbac9b58e19d82131d5d
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/b3b3975a9a00b0292b9ba4b7fdf5e757
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/c886fff05f76053682a906dd94c6674f072206f37781b1025ec8a977eb952e0aeefcf20d76a3411e54782a6425667ee3a373f0a48d5a486fd4f37c02b0ecef78
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/88cf748f1a8086f949cb6217fcdd40b7
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/4e3d3cef71062b002406afb923f3d16508206662c3835242bf522cc7c881ea236695cee6add1b1f85a0b2708510dab2b59eafe004e67ee1d87a5970602a9d942
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/dae6e06bf26505fff786d0187cc5f90c
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/ed76e52223f84dd8c1ad7190341b167928493c2c617968aa17266c274527d18348865d9289cb82dd1c0d12240220750ac31e6c1354ddd9bc5ec2e226f360ba87
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/1bdae6507ca26b09a81c3b5b89f17908
+libLLVM.v15.0.7+5.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/b2704c0ef478467eb0fa21c7b436d6efc9602e8723bcf194dfcf6b3ac33d316b79de66c0c1c291e92f45f5bb09b6ab579a45782fa1ba3c03192177aaad6c29e1
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/8906c5b197baec7fc795256b92ca0a75
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/b79ec5ef4e59b0016784d31e51a94c9b292d19c36d66eadcfb3be6579244048b2650206836b4e017a63d84d8a0c72cd487f22ea08fd92f5b5ab4cb46d218e1a0
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/bd81f6f43b54364bef1e6486c17e3dea
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/892e4478e672fed55d63bfbf20a959b488e1cfafa332e2f1743cb519594526b5e0f2167b67636714dec6f43c76dcc0eb0bb2775eb43e4d898e63a0d1e78e9c94
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/6437ac1aa63c9b83c72238f4b0eaca00
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/f5e2bdb0af587e5cd55a5a2f16bf551c0e0fbadd2d9232fd5d3b2b38cdfaa80920d25903af5d79cb52a45a703a5bc07e550ca07163628cd1a79d3b3dda0d05d1
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/5616fc6e683ab133ed751d60164ca894
+libLLVM.v15.0.7+5.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/40944ea809c3f4000038b7b26e6297a5ce9d2710995c57b4e0751e74dcbeed9c00b1d89d0c75bf0f0d9094fd4811f5c5ca0cc5b83f54cbe20c1b2db85de44d72
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/dcdb815f425a6ec2aca7f29f601a73b5
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/3619419dbc81807db63e5c7bd7b237a6355ec60d2aada9bf26c1d38f10b4cb87a3cb3fc9a81e7f695ed7a195d2c3c214cd9bf96d3ccca68422898be323797fb1
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/ab2250406d3a69d68755b77b79b61f53
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/f5eaf02c7d19689a9cff2410269daccc00a075abde9287b025de3aff1d5b539b43001d1f2120f88c4c149af27eaf0caedb2942ae029550cc822e6af103b32960
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/77576af5b13b2916dae4e7e24760afec
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/1b3708202ccebd47aecca5a7c6396799ef14c4235b0904d23d6b6b4fdd91fb6b13a1627f65211ee0283a15d96b8a68cfc962d7aa2ddf75c08f2670a767c6cfa8
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/81277b7fde4cf08293f8ca956417fe05
+libLLVM.v15.0.7+5.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/72caccf9933e1790bdb0b6f6dc1ec5da6a84a5fc06336e29f2928142f3182261afd39477be913427d65655c40ddbda5ec5042c360bc49383e88c871db19b018b
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d326fe9ccfbbf179571fdcd684bb7b80
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/a34550dcbb416f79648a5c4306775f1aca041c4e8e3b269e94f960ec0925d5b7cca0ed1934b2b63b9f4437d304d658adc6c0d3e0169c629d50d7c0b5051dbb04
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5ced197907e87c470e5cc1ab08a7eedf
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/b57810b718bbfb1ec48415ec3e727388bb647fa3768ee191d81fbb16248edbde9332086d445ff57ad53e9d62fb9c8fb1f8be176649350f5eb57c744404c63cb9
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/4d5133f794e0b53d563ccfc10ca42e98
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/9fc7bbb8dee022304c4aedb930318db04345987bb7ec9b78c3d488a5616680738ca2b9a9087f60b7d6cc68650234295d18c6cee4a45d1956d2240791993fe45a
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/e5c8eae08bd2defe76e0985687d6f057
+libLLVM.v15.0.7+5.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/d632971cd93131b90d5a26fdcd8a262f2042a2dd59a09c82a8523558f2b292f9a3f285b0a6276f0e6b255f34d855736c0bfb9f426488c5929f2abf6c0b921b73
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/f0fb4b9b0257e0ead2e5aeafebb64214
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/1993c7d6ceb7efd93f2eb21379c046073b7d9d2460d6eab5aca26cae94bcbe07658780a2f6382a052e4d64813614078b5e582a933a0bc9a5d64d8388df98ce69
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/e236983a6c801d33ead6f60140cf1ddd
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/c6b44cd0d9139e0b1d47c8b17e9035099a6b360f873a2fc5c6e84c1c97dd455510f4f4262c746b47910703158fe0ceba0d19b8e6a61117d9723346f4c3e89004
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c3ad2f3774b9b7651078fa3b2dfbe7ff
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/009561d4fecd65e35960843670048b79e70495c2cfc80a7c80614f253bea7ca46d8278d338bdf7719229fa7eb9f02299bf8bc39ace683b862ad005cfebcca0e7
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/6f8d226436a2822eb7e0f25d1073925c
+libLLVM.v15.0.7+5.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/b63a32b1eb4a8af210f6a9511bcc4c90ad39091a6b2c50431253f4fe5e1ab304b68f79e71fe55e173449ebc96a3395dd1ee55a9a8cdd289950b609a5bec8e722
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/a618c88b200fa25434e969a168b93a15
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/940d6b61162bdd2d9ab5c264c9ba71789638fec646e62b9204e9304c8244d10c8a5ea3603c84bff76c5471e7f3184a21e4d1760bfe05deec80c8126a7207db04
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8a4e4c69ff51c941244d0765947dfaba
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/287e59ff6e8e81e1650796da7a01be568b9ef024eef0505eaa34cdaf4cfd8d798596e9396e48fafa39acab5e70c3a41129917e8ec7d625f9acb896bc4e9e7b5e
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/3f131f5c0e11db8de1e0268708ff17c4
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/04d1371a970694c234880ccd826f6a75615793867a3ba1fdce683a844cac3c9d33a58d34163bf2141624dde71f3af0e3582effbfce679ad2134894f86ac3ce98
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/8636def624785ea4b99d12c0d65bc0c3
+libLLVM.v15.0.7+5.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b8ae5cc249664d32a8dbc26a2bf180a782f51ba69126d099bb239ee94afdca7b8492a7458971cc91aef0ca55a1ca38d3bf3c8716234ded81319a2ad5ac082732
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/bedb9f6540966fc382de1a4544ce8c9c
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/527ad792c220e491fcfb46de81b9b15cf4f6a1d50cfe4435296e0f94ae4d8e53165b6f634f85e95a8c7369a1e7b3788d1683fa77b843f56dfb1264313f80dae1
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/62051888207139e47c9a0694cf4de5c6
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/034e4e272d09ae8f573d3a7e591f93dc551651c7a32e2b8923fcd7fcf36be5bb491530f4673cb9bf39a54c1527cc3e3ecab64c79e3fd7075209fd81f32f7f4f9
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/8543a076a97e6c72e7c514021ca5f121
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/fc11ac25945adee135ebc523fe3908bcd5c5a7aa4c2a405e3dba61e0fb59502e5aef3cf4982502da7f7ee1974bcee8354ac675e0e0182f9319ea20c299813a1f
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/6247a9f59f87a2b923aacdc0a7c128ca
+libLLVM.v15.0.7+5.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/f13adadc653d2f8442c8ee4ecca9563d6cad5f958abf2893d8a3eda331d9ed8c33cd4a52bb721be811dec66b3b5566f038bfebbcfea620bf0094c305cd3aef0f
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/873155e60b133d597cf8c40169c5745e
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/a000e1fe4698d5c19bf85b048bcf76cdffea191ee281b44ffbd83230de5dd93c9efb564a51da082df070f2358d6dce423bf0d6023836217c5b34d563844d977e
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/17467f361317ad56762b7e455d869724
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/62a8d601c8db178cbdaa57a23a26cd65a8f3855be40ba2966b445afc9ee223db2ed6c2fc344ea98ff129d8917c14f34ed93158633521780d52763fc4a4f2a799
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2c094ecef656dc6c9317038b0c5a47cc
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/af5972750be3de00df275a0f03c9c8a3b487a040f9bd29356457bc18661ffba9b3aa909849b24ae1c518fd2975a9b687c33353ba927f8713796a6c8eefa6e509
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/c10497e3f473e80e309d4c6102fc194d
+libLLVM.v15.0.7+5.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/2349230301cbebe8c7a7d7054bb4e60d991e1798dbb8bc6b8cf73350738e7058a9eb3c1067ce7d3ece1780e360080d00dd4777359742aff924d2db5c463f2a8b
+libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/15c99e56a9e8ed664deb2d6aedeb7ea0
+libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/c7d3d6d33f0fc0cad0394c02662bed2dd7d5389a6aa21027d7ebee124c3c9f5910316c44bd4485f1d45c6bb9fe12775c697a176602809bb52c8d3cfadf4f2737
+libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/b8d748a34a381d085c962549612a212f
+libLLVM.v15.0.7+5.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/02afa1db42ff68a3ea0443ab539a7c613e5acb6170f7849cce1d36969ddad36e7546427bc55cd289df46a5fd8e83477b70941b8fd9aba0717dd861c84473da49
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/12f825c1c1586a8f7c9ce56e243b6bbf
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/f6c9cb33f129f1ff95800c0c88152d27e6de3fd78e01b29d75a80df9fdd8d95de70003dee0df3868215009cf434006223b488c64d6eb240f1e18799f529e980d
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/19d05d46cd97714abd23b668693afe4e
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/deb786016835fb34967e474235b1ca9c2e9f0258c88394979c41654fc4487ef83ac622f1e049aed5d83da8738b8f1079b3dbc67ca788f6c68b432d7007b850e7
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/0fee1aea27ac30304228af1f398dbf14
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/e14eb6fad8ef734efd5dae610cc1906901b389c7557853e7fad27c4cbf6c06614996bdd5840ee3640b9fcd8a870ea058c212bc978b6b869f4594cd8b06b42ca7
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/dc14c7faeadb0c42f4e9cffcae8c7684
+libLLVM.v15.0.7+5.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/10ef07d1e1fe3bcf8bc52da169156ad10de7b3bd54f16bf1d694bd243bc4c86b4244643f1a71fec94b024ffa2e605141eec9b10e6e65dce5d96aee2b454fdb6a
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/ee90487acb75a33b77f24fdb075402f9
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/6bc8605021dbb23aa71636318468a1f81f8dbf7308d637f551132700634fea208d24608c4afb28a9609a7a866302597f684d204f718fd8cae10a616abc1b7b0a
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/2c96c511ef55496a1044f63d4fdb096e
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/564202d6cd321b8b058124c4623bfa7d7310a5020140f194bfecd44a25490ff9590e661bbb838b1af4f7e40fc15f88363a1510d8f7a2138f8ccc52ad76700506
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/555ea3150d5eeeec54b1d463380447cf
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/9da05a39e8d4d9cffffe85bc2717e105a47137682ede9cbbd2f216065ebdbb6624b68a2e120a1b87247838276cd8a501c83aec63c91673229bde8d207f651f4c
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a1f6daa0703ddcbc87b8f9d17c9ad54a
+libLLVM.v15.0.7+5.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/e803ba34861b600b350bc99484adb619bd75a82162633e8d80f1456a908d42d95842f194a6752fa43e683c26356592fb94b64f7823b64edc922aca154d970288
+libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/364b73f29c1df14d8b942183cb113dd2
+libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/c4966e3607314acbace4b31dc095b81770ac3414ac1bddb43084443191b92b2b96f6702177dec76b70be12f7a3af4797c9692cf872ea7eaf60569dc7fdd92ee4
+libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/d4aea085c08951e0facaa553b3c22a91
+libLLVM.v15.0.7+5.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/cc5cc36d50a342b5692144905ae52676fe9ff19054245152e3fff02276250604009881325cb5ef063f274b51cb2b45dcc88db0a929f6244d81cad1f241bd0c64
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/5cdf36e1300bbc9b032bebe5cba7bd6a
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c732ba652aaf7a5f6aa8cd2f39088d83b78d2fe3121c4e2415bdc935b0a3ccdff7f028d3ef50f0b5f7bccff54f1fb5acbf970fc28301510d09b3f3847556c613
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/c5b335f634ec9e663a7c5d54dfeb1967
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/51c7b1ceb0e235d9d7db9727eb7744cbd8b2e51e189c58bfa6d3b65bc4b6e7a8224e8b7b57eeeefce01c7f65a4df48da97a975dec61fb000d83d23d15737728d
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/822be345af871cd1d5e595b2a83bedf3
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/fda0ff71c7a26e783436da214acc22842fe73df1f9d1d526955f4acd0794c3afa8722df8e4c9671b11948fd96e4c079fe525c9bf3e38b5119a79793a22baf16c
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/1201b56c0dea9d1fd2a5ceb4d62f78a9
+libLLVM.v15.0.7+5.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/550c041f495a2d2439e6c4abcd4db6da06702d32046f6574f6a595fceea467ebf896635bc70d9c3e41c99b42404c87d98e3cd76a34b0f959d21284e3e4f15941
+llvm-julia-15.0.7-5.tar.gz/md5/1ffb5b00586262196d24dcc7baa4a4c0
+llvm-julia-15.0.7-5.tar.gz/sha512/5b5c93b4359cee649974bbdb5c3c191cff5ce5c3862e7cce00e2e35dd0627bf50e0aee454e67ea0fadd21c36065b7c1cae6e77abdd512fab70b71899d369cfac
 llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
 llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/mbedtls b/deps/checksums/mbedtls
index 723b9012bfe00..d0b43ad80ea70 100644
--- a/deps/checksums/mbedtls
+++ b/deps/checksums/mbedtls
@@ -1,34 +1,34 @@
-MbedTLS.v2.28.0+0.aarch64-apple-darwin.tar.gz/md5/ba33f960c7bcc3fda818c84f5e716df7
-MbedTLS.v2.28.0+0.aarch64-apple-darwin.tar.gz/sha512/3878531424317954417d09090b0a7618c6c0a6907bb04db34aef37d55a033972371455fcffca548ac03be41c0b0d1f8e51a9fe6e8f8fb4d8ef4fcbf91f15b3ea
-MbedTLS.v2.28.0+0.aarch64-linux-gnu.tar.gz/md5/9e7c78fc7c39fd19dcb170d57c8c0ec6
-MbedTLS.v2.28.0+0.aarch64-linux-gnu.tar.gz/sha512/59eaeec1a772265e62fa4049e0bc8c96cd7403d954213ac6098921acf6e128b624d6bc1ba5c6062c88ecb92aa8bf9d0a06e365eee241b6516ef0bfe2b4c47188
-MbedTLS.v2.28.0+0.aarch64-linux-musl.tar.gz/md5/44f939956834d5d8130ccb3bd5962b0c
-MbedTLS.v2.28.0+0.aarch64-linux-musl.tar.gz/sha512/f9797a44851222c005fd4068df6e0bcee68133c9a48e19e16d188b8a6927be56c620fec83264398d682eb5c89b7f01683e5898d3cbcb7aecf53e5ce678464db6
-MbedTLS.v2.28.0+0.armv6l-linux-gnueabihf.tar.gz/md5/fc07035dddd51e9c57e62edfc3fc5691
-MbedTLS.v2.28.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/ffb707ba7439050862654316b4388f52e8bd09bbeb7076cf6cdc924cb60c61f871c01ccfe14e1ae1e62a5733490487324ba60e8545d60902f3317039264db83b
-MbedTLS.v2.28.0+0.armv6l-linux-musleabihf.tar.gz/md5/fc54575519130bd468ee4dbe23da0ea9
-MbedTLS.v2.28.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d4b9e1bd8877f7d93d1b4e0d1c4c3d4e5d2af6920e39222667e689ec84cf9817988c91a826755a734a60ce05fed913e5421b8aa9980f257450da7f51c5e9342a
-MbedTLS.v2.28.0+0.armv7l-linux-gnueabihf.tar.gz/md5/0753a99f4645ba7e1ceb27a03c65a107
-MbedTLS.v2.28.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/a7a65338ee6f93117d44975651d77c351f0c919a3ae2eea6e220719dd084f71617946adf04a08a82d55c22af0275d21fce3c692becf87ccf2d932c8aa32af7af
-MbedTLS.v2.28.0+0.armv7l-linux-musleabihf.tar.gz/md5/ff335caa1cec22366cfa2c2bf87f61f7
-MbedTLS.v2.28.0+0.armv7l-linux-musleabihf.tar.gz/sha512/a3ff7d53b45134165347dec209bc27f48be984b4fb58ddd54286a146b837d038ab21e22033f1e0713d359c72adc0b97e979532ebaa734495eb88bfceaf3c2155
-MbedTLS.v2.28.0+0.i686-linux-gnu.tar.gz/md5/c4c9728ee9d875685765eb4c9c3bf731
-MbedTLS.v2.28.0+0.i686-linux-gnu.tar.gz/sha512/214142ee7ca3a5b447a97928ffcbe0389fbb8c1fa68de387656e5c0e4406f02411e4183fb051b2107600b222bd5279b9fd3a5aec43a9d97a9556b08c5338cb7b
-MbedTLS.v2.28.0+0.i686-linux-musl.tar.gz/md5/2684f2bc8a04234ae67603150e6d0917
-MbedTLS.v2.28.0+0.i686-linux-musl.tar.gz/sha512/a533afd26893464bee62dbfa9babf6e4e1119a4be31ecb242e2ff28f5f6e3a3969057e2ce653c98c1b8d2a19e340df7a17dac8693fce270399df92cfbf3a32ca
-MbedTLS.v2.28.0+0.i686-w64-mingw32.tar.gz/md5/f205fd351e94f42cd38d34d3eff6e69a
-MbedTLS.v2.28.0+0.i686-w64-mingw32.tar.gz/sha512/cfdb819d3e6fa9ce3985e29ac733c2af6c988230ae49bbdc13f0fc234e82444d17ce5da4d3b6d8cc6ac45ea4a999f0ce03ac42533223c87bea066a371487ef1e
-MbedTLS.v2.28.0+0.powerpc64le-linux-gnu.tar.gz/md5/41b1f61ebda30a8e8f02dcd955ae0d40
-MbedTLS.v2.28.0+0.powerpc64le-linux-gnu.tar.gz/sha512/25b62106404cb3b9be3e0f778ed953bdcf9d18cb289be823f97f7a1759012c84cfe7240fc936f2e6e858273ce2022d75ecc2554d5696cea110eda6d059362416
-MbedTLS.v2.28.0+0.x86_64-apple-darwin.tar.gz/md5/e7b286dac94bef06915930180b2d3bac
-MbedTLS.v2.28.0+0.x86_64-apple-darwin.tar.gz/sha512/a2acaacb77ca6e2704144d8d99e51df49b1fc69c8751e43973e0c41219d023676d35ae05bd4ff7a3680dc0edf5438e51b67baa76f5b78947560dcc420623a3da
-MbedTLS.v2.28.0+0.x86_64-linux-gnu.tar.gz/md5/39662265088efadb142fdc7255a0b7a3
-MbedTLS.v2.28.0+0.x86_64-linux-gnu.tar.gz/sha512/a3648c78bebf4c024ddf491965cb7707df887ce10dec6f9e42eb6493bc7d1220e5b23c53f5e4e73dfe94e8d8dcf35ffc6860d1992deb9b63a0c4691d4167e59f
-MbedTLS.v2.28.0+0.x86_64-linux-musl.tar.gz/md5/1fbe9f2593bc11af031075b58a108bc8
-MbedTLS.v2.28.0+0.x86_64-linux-musl.tar.gz/sha512/d185ced64d471fba9ae1aa495b2eba0e60738e8e5ef918670b1c40cc8981389ecd48e4f17506229bafab4a11f7a257d3d544cfe87ad198482778931c2a7a8aa9
-MbedTLS.v2.28.0+0.x86_64-unknown-freebsd.tar.gz/md5/26beed62ee2abe8c6e52c1dbddbe0b1a
-MbedTLS.v2.28.0+0.x86_64-unknown-freebsd.tar.gz/sha512/f04a417d99e3b908383d3c14cf8512b2f13e4b226d07235e2334090aadb6aecce40a23ae8f8df9c0ed9618707e839aaac6de64d5fee6d7e3955b290bc564d3a2
-MbedTLS.v2.28.0+0.x86_64-w64-mingw32.tar.gz/md5/cc55fe5537719aa8bf3bbee981c01413
-MbedTLS.v2.28.0+0.x86_64-w64-mingw32.tar.gz/sha512/3436647e81fdb9db138063229f20f47e2c8405e6379ca3e7cf38fb9fde84d2b6618a5f29b8df19cbffe75af7f99e00e9583d67be7b53dcce27bff453b96dcf13
-mbedtls-2.28.0.tar.gz/md5/d64054513df877458493dbb28e2935fa
-mbedtls-2.28.0.tar.gz/sha512/907867edf532ba3b099f4fb7ce31f5773ceceb072a8d067b1d830e879d541f92f401d64f13bbe6b4eb0845e58bb765d7d28896be414bb0fc7ac5b3876066be5f
+MbedTLS.v2.28.2+0.aarch64-apple-darwin.tar.gz/md5/ef83fb4706100ee678cd8af3f7a5c762
+MbedTLS.v2.28.2+0.aarch64-apple-darwin.tar.gz/sha512/03dda8cc9afa3d79c3c733e45c77891e75d939dc2bcca5ba8eb7aa3bd01fb52011ea9323df9cf7294fe6dcf87eb86c1b1c4b2f3b8af6116929b3371698559fe4
+MbedTLS.v2.28.2+0.aarch64-linux-gnu.tar.gz/md5/ac46c3840d2d0cc7c573f31c2f3d0d61
+MbedTLS.v2.28.2+0.aarch64-linux-gnu.tar.gz/sha512/bb458f1dc9b8684a38f603136ee4ba1c51b47f5047c5a5cfe2c552be266e79dfcd8243b216b0831abf24390eeb6f4524bc7e43b2642eb2ad0227399222cd0d8a
+MbedTLS.v2.28.2+0.aarch64-linux-musl.tar.gz/md5/d74732e0bbcd03666243605e60bb345a
+MbedTLS.v2.28.2+0.aarch64-linux-musl.tar.gz/sha512/90b0699477b697b94c0ab1ba0607fb3e1cd40d66a80a51cb1e0f3b927de03ba201e7e280d453db672e6265db5b07d0145846e53ddbcb4b550afcabef1716470b
+MbedTLS.v2.28.2+0.armv6l-linux-gnueabihf.tar.gz/md5/65ce7c51884b50dcb8343a945644b862
+MbedTLS.v2.28.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/e9df753e9f3a08fd645b15422be7cc0ec3aeac3f8d5f76e0c4c5ec24c54e1b653db320ed0c6799411802a05801241a5363bb449a8765fda7856413c7e3297721
+MbedTLS.v2.28.2+0.armv6l-linux-musleabihf.tar.gz/md5/7b7fc8eafc95416d75e3f1bfb2640e09
+MbedTLS.v2.28.2+0.armv6l-linux-musleabihf.tar.gz/sha512/68362114808fb4f986dea673ef1c7f104caad8233bed1c7f6a365d5d69bb7f7c92b234d6b1bfa5b014e7096411841c115a5cfe9932ae9ce642293cab962f8d38
+MbedTLS.v2.28.2+0.armv7l-linux-gnueabihf.tar.gz/md5/4a477379b15fafbf0c05435f5ab370ac
+MbedTLS.v2.28.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/fd34b475bf94b411e3155f5a5166d1ad081fef3622d7b99f4915b592d4235f63a0b910e0559ba2a0c3d596df9ccc2d7ecb61984091debb20bd4b995942857132
+MbedTLS.v2.28.2+0.armv7l-linux-musleabihf.tar.gz/md5/fc6551ef5f189010a84230dd48f6bdfe
+MbedTLS.v2.28.2+0.armv7l-linux-musleabihf.tar.gz/sha512/d3a7199f3e1ffb1c289c5f0a4384f3b5d1af6e868eb1081d66d6cbfc60e6415e68a7e22afb497f2e7c7900678a19bf1ba2a4c888efa1019c03bce376af62154c
+MbedTLS.v2.28.2+0.i686-linux-gnu.tar.gz/md5/335c3ac146bbe8cd862e4737bc362037
+MbedTLS.v2.28.2+0.i686-linux-gnu.tar.gz/sha512/f12ef67a92af27f4021f73171cdf2ef5558f734fcb185e4417fd7e16752dafe3f75be4291854b5ce346abda674252d58064d9186122eb4f9b15ff89156d221ce
+MbedTLS.v2.28.2+0.i686-linux-musl.tar.gz/md5/435b864b02d1d2c96e5d8dc32b433ae1
+MbedTLS.v2.28.2+0.i686-linux-musl.tar.gz/sha512/52e3a79a70b3ff4617c93cafdeb702105c13b34687fc0fa31eebc91aa5cacea356d5b6a6bdbbfd81417d77debe256ea8f0f2a43c8d140154099bde097740dce7
+MbedTLS.v2.28.2+0.i686-w64-mingw32.tar.gz/md5/a238801f7e0d14f4b693aa4b74645263
+MbedTLS.v2.28.2+0.i686-w64-mingw32.tar.gz/sha512/431db4c388d3c52b08795d6fee6e6696cf383506a603816d6a63dc3571dbdc2b673837a1df1d9003c5009f8f8dc6eaaef3f80aaea396dc2fdf54b7e6a3c6aad6
+MbedTLS.v2.28.2+0.powerpc64le-linux-gnu.tar.gz/md5/26c8f09aa65e5b70be528311519d4376
+MbedTLS.v2.28.2+0.powerpc64le-linux-gnu.tar.gz/sha512/2d47567388b8554ce7714f4ded013fcbffbf94726dbc6a1b7287dc17b27d1fa35baba55cf7dac17c555892a5f4c74119afdf552b42b0e8f80f26621adaa4dbca
+MbedTLS.v2.28.2+0.x86_64-apple-darwin.tar.gz/md5/dfc263208b1a8d4c29b4ec3b6f10e5ce
+MbedTLS.v2.28.2+0.x86_64-apple-darwin.tar.gz/sha512/3b2941c4b151206a56a9a795f0f30519676ea4bc0c93f66b419b15568edc91bb976954f584116accb7f9bd067580712e61b3c580a249332640e27e6346ca51ff
+MbedTLS.v2.28.2+0.x86_64-linux-gnu.tar.gz/md5/94b908036eecbe59372722b41f0b1985
+MbedTLS.v2.28.2+0.x86_64-linux-gnu.tar.gz/sha512/c37a4c34eb450bd716c076c4105bd6022892731c470d64a854ac0fca6653dcf5a70b23982050e7d82cdfd67d02902d9efe4c94d2cf5e0d29d497c3c5ac03f8e8
+MbedTLS.v2.28.2+0.x86_64-linux-musl.tar.gz/md5/217866be499144eeb2e0944b0b60cc09
+MbedTLS.v2.28.2+0.x86_64-linux-musl.tar.gz/sha512/144180e1968da627c92173277a130283aea711157a04a2655786658234232e397985f63d5407166377fc5f38a7447c19797c51b66a9c4b1773601d9e7e01d0e0
+MbedTLS.v2.28.2+0.x86_64-unknown-freebsd.tar.gz/md5/74316c624c8106faf7c04e05149b5c38
+MbedTLS.v2.28.2+0.x86_64-unknown-freebsd.tar.gz/sha512/9eca254c9b663b2f5799705c2e0aebb5529a7ff7759b0f3b67516e622dd4561169fface1d08340666453e779133498eacb8ef2dae1ef6332ceb4d8052d3614d3
+MbedTLS.v2.28.2+0.x86_64-w64-mingw32.tar.gz/md5/cdd28912607781f5e6ea6cad73c7dba2
+MbedTLS.v2.28.2+0.x86_64-w64-mingw32.tar.gz/sha512/e5793778d57b725a0cab48dd7e8f45022699b654bb8e890620efa73628140e453c80601e43647a700d6090a4b66d3c30b11634c4224c016c11c7bfde6b8a1b2a
+mbedtls-2.28.2.tar.gz/md5/421c47c18ef46095e3ad38ffc0543e11
+mbedtls-2.28.2.tar.gz/sha512/93cdb44f764b200131b8dbefb9363e5fa38760eaf01473a512f93673cc55db3515830e16b813e03b39cb819323ad78cee4cb7f3fa85861ec5e72e0f89541c7fc
diff --git a/deps/checksums/nghttp2 b/deps/checksums/nghttp2
index 5cc0b22d2d778..6113b23d68c14 100644
--- a/deps/checksums/nghttp2
+++ b/deps/checksums/nghttp2
@@ -1,34 +1,34 @@
-nghttp2-1.48.0.tar.bz2/md5/887336a68dbf6e2fa78dd4fc2a515e01
-nghttp2-1.48.0.tar.bz2/sha512/319b8c4f5f276e699fb04cf2a9aadd07bb0a26b78d8b37eb84e6dab603718b3d2c9bf6dca54816d4644cd6833177d842d7f7d3a1438a1c723d2b73e4ec2fb344
-nghttp2.v1.48.0+0.aarch64-apple-darwin.tar.gz/md5/362b35eecbb86a49b956fa57168ec61c
-nghttp2.v1.48.0+0.aarch64-apple-darwin.tar.gz/sha512/d8c35686ac6baf4ba6038355f1d3a275f2c3a9696d1b751a54c6e671cbd96c38b4600c6ac00d77e43efc4fbb01c7672d917142530efb0360c38a4159703b9156
-nghttp2.v1.48.0+0.aarch64-linux-gnu.tar.gz/md5/2eb064be49b1990250a7c8ebffcc4a1e
-nghttp2.v1.48.0+0.aarch64-linux-gnu.tar.gz/sha512/0fcef4bfa0cea2d7727241961551b0ff73337aefbe8f29a6ca06f856b142681e251af57795ba26edc25784a1845040a0a3865dd0ba26ea65c43478a02ea02080
-nghttp2.v1.48.0+0.aarch64-linux-musl.tar.gz/md5/80f505a5b1f092e9a2e4609ff4b16b9f
-nghttp2.v1.48.0+0.aarch64-linux-musl.tar.gz/sha512/3e260d9bb34058c7c841034d874dec2141e71f40c0e75fb751740dc46fe1cd5668c713e7efc154f1e7c964ed41b8fed9a08b780370e4a4fb44eb564eff1a2c72
-nghttp2.v1.48.0+0.armv6l-linux-gnueabihf.tar.gz/md5/6b167502a95dac6f55cf2d312af09b91
-nghttp2.v1.48.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/da620c8e50ce4ca2fd150c7b83b0d1d40d3d9e184cb5dfff6883723b574e8c68ffd121a74154a0544e5beb7991740634c19916bb66b1349f46d772ddff3ceddf
-nghttp2.v1.48.0+0.armv6l-linux-musleabihf.tar.gz/md5/b9079b10a7f0e190232426cbed35f8e9
-nghttp2.v1.48.0+0.armv6l-linux-musleabihf.tar.gz/sha512/dd0afaa8eed6df8c0b4f78c3408e6a0b881957d183b5dfa4e6d9aa131d92a7485198da316dfbb28280b6e5e281432ee1dc1bbff5906a29cc29afa77390d83e09
-nghttp2.v1.48.0+0.armv7l-linux-gnueabihf.tar.gz/md5/cfacf5fcb143757b6fa64081011807d6
-nghttp2.v1.48.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/5b9acc860474722c07f73b3d049c5d131c86212264a49270253861b897b165e4a8cd608ac3735ee72c90cdd36ea9342208e1eee48e9e2407b3b10ca2cf23f2d1
-nghttp2.v1.48.0+0.armv7l-linux-musleabihf.tar.gz/md5/76dfdc217fb52e74955b6dd95bace880
-nghttp2.v1.48.0+0.armv7l-linux-musleabihf.tar.gz/sha512/05b7b6ae7cee062409eb941147e45e0b1b68a0ddcd8a022bd008a2f04a1249051a6e69dba511398b3e98e1144004bf0e6580fb4417f5500746f5b4b3eb65179f
-nghttp2.v1.48.0+0.i686-linux-gnu.tar.gz/md5/8ec510d34f87830be0cea46378474a0c
-nghttp2.v1.48.0+0.i686-linux-gnu.tar.gz/sha512/c3262180298ebfe1aee5fa3b25a491f4fc6122d0936c0fcfdd1d3f7f884dbcdbc9cbca05df986782e200334c4d97bd5ed5b75a9286910d04b00eac9efa43d67a
-nghttp2.v1.48.0+0.i686-linux-musl.tar.gz/md5/90fa7935261e782dbd14aa858ae2d511
-nghttp2.v1.48.0+0.i686-linux-musl.tar.gz/sha512/790bcac85995a2e5caddaf19023c90a5b9566d166da48b98581de2e12d84c7beaa74e7ef9ae55bcf4a68c91e1e873204328c8672634e5ed3fc79721a9939b480
-nghttp2.v1.48.0+0.i686-w64-mingw32.tar.gz/md5/b7654776af03333caf4ba1517ffd2636
-nghttp2.v1.48.0+0.i686-w64-mingw32.tar.gz/sha512/b8f82c7a8f3ca6cb3cd8ab760d8299b0dcc6a03c7e51be965168c01de07b900891e48e13fbcee67856afddb10c41b402a4b384a06d3fbee41c4d5f3b6e352c53
-nghttp2.v1.48.0+0.powerpc64le-linux-gnu.tar.gz/md5/eaee75e48bb77137c09abc5abccc6db1
-nghttp2.v1.48.0+0.powerpc64le-linux-gnu.tar.gz/sha512/4b99d91a7f751c05835c73bb6b0f49c851ca36ead41c5137aedf5e96bd48d894768b9fdb65f83560ea86e0c3f854e52bf66f8859dcd920446db1a121c7a5e0f2
-nghttp2.v1.48.0+0.x86_64-apple-darwin.tar.gz/md5/1720e70d0e72afbf36900ed75cba45d0
-nghttp2.v1.48.0+0.x86_64-apple-darwin.tar.gz/sha512/4c07a7d78bb1366a913321d8258d0cbd0d0b7d85f43b5980617fd1451dc39e7859134e86ec59b06b3b6dc8b62b71f9890eecf2737f8cf4e441bf08c2e61cefc6
-nghttp2.v1.48.0+0.x86_64-linux-gnu.tar.gz/md5/a94aab74d021578fcda21836c8030c9b
-nghttp2.v1.48.0+0.x86_64-linux-gnu.tar.gz/sha512/c1c31e32e60087fe7facbfea4bd4897649c8eeef92101093df4897f41847461851497e436c4a4e1c847c9bf5ac678934aba1eca0d8a6e17302d9474ca3064fb5
-nghttp2.v1.48.0+0.x86_64-linux-musl.tar.gz/md5/677ad574f615b2d77fecdac0c75111db
-nghttp2.v1.48.0+0.x86_64-linux-musl.tar.gz/sha512/737637a68364096ea6c507e37c9305df875c8830d58a05404ceb2e76d69bd6e44c82483e0f8228cdc7a64b0419de75d2d99151fac369bacd42fc06a71b35ec54
-nghttp2.v1.48.0+0.x86_64-unknown-freebsd.tar.gz/md5/b65cf09003912eb4201db80253fc5b04
-nghttp2.v1.48.0+0.x86_64-unknown-freebsd.tar.gz/sha512/fdf7c733f4247f66733dd36e314cf6772abfecb82ec99c613db66910eb956849851587d74b9e940e1f0d743142555ccf96bf7b990b3502e17028cbdd8bc504d8
-nghttp2.v1.48.0+0.x86_64-w64-mingw32.tar.gz/md5/cfb494369553277c10a7b1eaf1c116fd
-nghttp2.v1.48.0+0.x86_64-w64-mingw32.tar.gz/sha512/066b8a9cbf3fe710704b56af2917279f32cd3cef69808bb56872d367061402832dc1cbb01988b35652751e66c937d29a0190b98bfcd846a50fd80392b5a7e1bd
+nghttp2-1.52.0.tar.bz2/md5/bde5874bd8e7e8be3512a621de27b9d5
+nghttp2-1.52.0.tar.bz2/sha512/019ec7a904d1baf8755ffcea0b38acf45ea9c6829d989a530ab35807338ba78d3328b86eebb3106b8372b7a8c51b466974d423e0cd786b6d6d020f0840c160bf
+nghttp2.v1.52.0+0.aarch64-apple-darwin.tar.gz/md5/e3d9e07029e184cc55b7e0c4d2e27c7f
+nghttp2.v1.52.0+0.aarch64-apple-darwin.tar.gz/sha512/cd098db984f751b00d2cc99d7f7eba0fa830ba178dd85a9dfa679a591e62d57364dcfd74e6a55ef513a0436a8e520b1a5474d4bfa9a8bdcd70e398482b7c9985
+nghttp2.v1.52.0+0.aarch64-linux-gnu.tar.gz/md5/73fe75f3cfa2bd3e804ea39a4eb884a9
+nghttp2.v1.52.0+0.aarch64-linux-gnu.tar.gz/sha512/71f4b2a23ba148b66432797b0db954dbd98fc900045d4572f488b43779aae125f71929e5bba6bbadd30c7998a133c5e5beb70888968bf3b01bb5fe9c9ea0e451
+nghttp2.v1.52.0+0.aarch64-linux-musl.tar.gz/md5/736a24a7eee567851a965558e31489fb
+nghttp2.v1.52.0+0.aarch64-linux-musl.tar.gz/sha512/ab36182b04a590b092fae9e3a912a87467e8b01ad40a628a1d2e52910ee513ab327d5d2836df598d5aa8203f60a605d19d0b9636eb35d12a84a1c9d87124604b
+nghttp2.v1.52.0+0.armv6l-linux-gnueabihf.tar.gz/md5/56fd32e8d77d4c9d9e2355565f4db19b
+nghttp2.v1.52.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/85718e0e5cee35d91a8684ea33d8f965bb30d62dbd6b74a574a2fbc4c1027b1ef23ef68f1dec3f037fa6c5739287329567df9591a69f8f23b23fab2516a0b644
+nghttp2.v1.52.0+0.armv6l-linux-musleabihf.tar.gz/md5/283273d3bf4d53b56d12ef6af2e72f20
+nghttp2.v1.52.0+0.armv6l-linux-musleabihf.tar.gz/sha512/5c1d92cbf5f2f4e1ceb4ee13634c0bceb6ca28abaf9d87cc673f264d274bb96aa095648295e9aa76f86eb0890a426f47c0b942e72610daf722ed8e86b5f0df69
+nghttp2.v1.52.0+0.armv7l-linux-gnueabihf.tar.gz/md5/d7ae84e5365759a42d0fe0360f679b61
+nghttp2.v1.52.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/63212e3ad94d2bc54ca9ebd452d8de8e67aa53c03a3b3033d36da765303e714d8d5c24156ea4fb985acc72fe52e2977e8e8a658cdd9409bd41ecf401c08c1aee
+nghttp2.v1.52.0+0.armv7l-linux-musleabihf.tar.gz/md5/a6ad0f25f43b7f1832faeaaadf683ed4
+nghttp2.v1.52.0+0.armv7l-linux-musleabihf.tar.gz/sha512/64b9075c0d819288345d53c5ce88b360d2ca4d24c3d2e81fb53c55f86054b1a3e95d7831b363a4100965cdbf479268a5993d66ef59089a219a97b4151d8fef60
+nghttp2.v1.52.0+0.i686-linux-gnu.tar.gz/md5/9781f6eeb4d24a291d6737e59e74edc1
+nghttp2.v1.52.0+0.i686-linux-gnu.tar.gz/sha512/2b542cb67e78993ef881694dc50c980b57db3761c5f4e11c381afb1b31d1fb8ab0a8b20e1279303a602c07912f21e8ef9d732366b76ab3f356a74b444a5dc78c
+nghttp2.v1.52.0+0.i686-linux-musl.tar.gz/md5/08603b9364179ab4cbe0637b9b1b63b5
+nghttp2.v1.52.0+0.i686-linux-musl.tar.gz/sha512/0a5b79709482548c6a713843b670695b4b13d2b219b592d029719da0b4187fe884798fb44e2c511c300f02bab03f2b0b289d49d6256e3ce0b9602a66ea2382bd
+nghttp2.v1.52.0+0.i686-w64-mingw32.tar.gz/md5/1abdf0cad466ed0ca0da137809999d8e
+nghttp2.v1.52.0+0.i686-w64-mingw32.tar.gz/sha512/04680895ead989fda56b284d8963e7ca31680492c8f77f4c6bd7ca03b9a66ee7529b78cf35e07b2e106f43c9aa543dffd4081b034339803ba95021293d3df997
+nghttp2.v1.52.0+0.powerpc64le-linux-gnu.tar.gz/md5/ae411e40e24cb3f3b07fe8de211b58c6
+nghttp2.v1.52.0+0.powerpc64le-linux-gnu.tar.gz/sha512/7433502d76646e5761ea2707fa65ea5a412c513c70908a4d9ceb504f08121b1f39bcff984543370c221814785b7064f85dedc777a22df5e30a64a64e510e0978
+nghttp2.v1.52.0+0.x86_64-apple-darwin.tar.gz/md5/59f0de0affaa17898e837b5074de68fc
+nghttp2.v1.52.0+0.x86_64-apple-darwin.tar.gz/sha512/e639c813373b17d95220640ec2a568e9731cfc32df826610357ec9ff8e9d7e7abe10291140eaeb9342ae69215798bf3f999db7647c23efb4f815b54f4da9cfe4
+nghttp2.v1.52.0+0.x86_64-linux-gnu.tar.gz/md5/6bc8501392d47b349c7463e984dc5909
+nghttp2.v1.52.0+0.x86_64-linux-gnu.tar.gz/sha512/522cc2a8464ee5770c01b83a6b4ecbbcce322efffbd738f7c907643fe85342e785bbc805028d41c2b7404d6241168d1ab37a9db15018623c265b53905bcf060f
+nghttp2.v1.52.0+0.x86_64-linux-musl.tar.gz/md5/725a6adc23880b28303017597b974535
+nghttp2.v1.52.0+0.x86_64-linux-musl.tar.gz/sha512/ede5a34b7f71310e4c3cd99b9b61b2453db5dc8117675de12adb1e68c9283cdf821614f49f4d04bdd3b0f17d51a52972ec1e226d0dbdc5462b1a4a1fcc9f39e7
+nghttp2.v1.52.0+0.x86_64-unknown-freebsd.tar.gz/md5/a2b89913c1057ff67e7be6086619a65f
+nghttp2.v1.52.0+0.x86_64-unknown-freebsd.tar.gz/sha512/6b4efd2a0807f19cecf1f1e97b23ade11ed39f651e29586bb21185e17d0c50dcb63e26233ff994bfa934b383468e29f680b1ebe0cc2a2dd09768b14dead399a4
+nghttp2.v1.52.0+0.x86_64-w64-mingw32.tar.gz/md5/e1c8ec6ec2d69b2ac64b114ebf09f8b4
+nghttp2.v1.52.0+0.x86_64-w64-mingw32.tar.gz/sha512/cb43cb138f14717501e852ed388a44d41012e2bb70b6887584b37b4e0f42827d74f17ea85ba4aa0bc09d623dedeef73eee80815c1db2b6858b31251feb0b5580
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 5c9194f5dd404..5cd8d27baf25e 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,94 @@
-OpenBLAS.v0.3.21+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/a89e1eeae1d9539c21598c98da5ac91c
-OpenBLAS.v0.3.21+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/181334363669482ac78b17ed9797ecc62ead35c07348eddd674c06252a7b36a356db48c62da77e73072df4cc21d0c25e0fb568c4dc7fe98e5db8e0f76eed7183
-OpenBLAS.v0.3.21+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/7ed3100359f9ed7da4959ecee3b4fd1e
-OpenBLAS.v0.3.21+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/329e26dae7f2e5ba81ba2271257c03426a349b89831147458a71d91da062bd11fab1b846f77922f3bc65a9f7d3b1914f15aaa0c14f696ba7bf43b55628a5464d
-OpenBLAS.v0.3.21+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/ae71b44c62d42c246a21385d0691dcfa
-OpenBLAS.v0.3.21+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/0598da4b6410f59a69690e6908c80724df4a8c4761194993c1b127f84418f046d8fa0d367fda8a7faed5cec2d6c57bd8872ba216e38b5418bc9ff20af27528c6
-OpenBLAS.v0.3.21+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/fff9e118d250bebd55723e77c492280c
-OpenBLAS.v0.3.21+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/29d831f773cb6a75119c7cc2363fd72d38e32eaef6124505f8b5a1b64fa3ae7a6ffe199aae851de0893259d3bdc480aa377294688ee55d20f0da7dfc49fce747
-OpenBLAS.v0.3.21+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/e93af05f98be926b3000dac3accf5f56
-OpenBLAS.v0.3.21+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/de3e9cee786d1a37dd5807aca81728d67d60fdace68aa17c69efcc7ebfe36dd3a240dea16f7cd3c5021f0f967f15f1154a3d32350f91165a9fcdd08285917196
-OpenBLAS.v0.3.21+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/5a0226c6781c748a8f4d144b0ae4609b
-OpenBLAS.v0.3.21+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/83d9ff97a5651b682ee1839cf0e1aa8dcd7c2e2d32b6cadb184b8d71123649a31519516b1c7d98c329ab9902538a01ffc14ec28f95ada35ba8da77241d74c2d2
-OpenBLAS.v0.3.21+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/f09c8409b0f4e5e3ee9d471538289e45
-OpenBLAS.v0.3.21+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/b41234be953779db6601d5bffe43ab9ea23bb542583b16af48fe3a3400b1e50b45d3c91152895c92f6a1f4844ac018c8003f0fd10e9473c503e70e9fc4ad11b0
-OpenBLAS.v0.3.21+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/78ea013e0ba52d289c919df3d5b79946
-OpenBLAS.v0.3.21+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/50ffb9d99d283db2880e767a3ebedbdc7ca67b18782717f5085e0cfc9f6cc55bdeb112e8dca0011e31954a22272936043ca538204fc9be81cb7a0f22d6726f12
-OpenBLAS.v0.3.21+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/47d016b3a50c0c9b2ed1eb5e49904169
-OpenBLAS.v0.3.21+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/d38fe6df24d658a0f54ab007ac6f9b30c0a02fbf86c0592f2e5cf5a8375b654a7428b75f74c20d97d6c953ae9998664c82800813dfa806a5f16dfc20c798c419
-OpenBLAS.v0.3.21+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/17f22b32a0715f82fd48cc5d82f6512c
-OpenBLAS.v0.3.21+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/d9318a4cd232abc58907522c20ac666e6db2a92e6a25c9ddd1db0f0169be6f94aadc808014545e92e6168eaa7fa20de4235072186c48ade2e7fc672a700485ad
-OpenBLAS.v0.3.21+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/d5a83c8835ad8553d304bf5260b53985
-OpenBLAS.v0.3.21+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/3b506824730c7269eb49e90dc11bfde2b17338ef1504da63e84562433c68586a71b022ad37de3096e06ac24e98828b48638c672789320f76cb33eda4c8e8c9eb
-OpenBLAS.v0.3.21+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/59cc4a5aeb63dd84c0dc12cbef7d37af
-OpenBLAS.v0.3.21+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/1e9cadcf65232d4a4fba9cda0226e5a5450227e16bf2c27a3268452d5c4e4d5d1321601fd6e1a5c5d92fbc3fc4de21c92e231b5ad3b25dd71eb49d5940fcf243
-OpenBLAS.v0.3.21+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/c04f400d9aca82aac956e94d9fc6fc51
-OpenBLAS.v0.3.21+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/8751300546ccb059fb7755e3f745e7e195cfaf90daf28b151ea2a3d540edf8910c97351d428dda196707599361a200f1a647098fdf5d7163380b4ad2b4a4f733
-OpenBLAS.v0.3.21+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f251abd323b6bc463ae4a1989670aefb
-OpenBLAS.v0.3.21+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/4f179ed09a5f5b71033d09db3894ad78d58a4429d65203225ab7a2a8c887222924910756a5fc4e3974a7cc6f9d994af287490f53cd05fe90f86c4bd4c4023b6d
-OpenBLAS.v0.3.21+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/f8ffa30a958448028d1294da9d15f3b2
-OpenBLAS.v0.3.21+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/0a2d9a73be439d78b4af7c70345bdffd531d5687adeea28049adba3c8c9ab7b6ed221703f2a8aff9e7391305355a596dc9a846c84d36d1b4cdfda521f7c05e8c
-OpenBLAS.v0.3.21+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/56a4aa4f3eafff290d38dc251a5966cb
-OpenBLAS.v0.3.21+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/fce03b1c1becdac66208de20e66949aba113ce2695217d34703a9ba4fd79d364cdf05424282b9d1f25ad9c315baffca3a8bd0af239f6284bd37cbdb2ec3463c6
-OpenBLAS.v0.3.21+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/aca7ef7f854cfe45bedbf1f6b5a97aaf
-OpenBLAS.v0.3.21+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/16821728c7a0e56589b60176a06543240f2a061b437dc1cf38723dc56910c6add114a4a5e65eda051e5e88ff7b82b8613ffaf5dad7864f1208eb381159bacc8c
-OpenBLAS.v0.3.21+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/9f2c39eef42e5c80f56d36aee156e2b0
-OpenBLAS.v0.3.21+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/441a7833e4e2d1334aeda58d87560e613244138005bc54c74af00d81c26e1e508ce874fccdcd3b3114a74f5e2a102eb520a2e4165b461861ba79fbaff81e4ae3
-OpenBLAS.v0.3.21+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/bf1a9f3e553d6855133b4de3ffc841ee
-OpenBLAS.v0.3.21+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/2002b305ef3f3721287ed839d6d08f34127058e6295233f8280cc3ebd06d91cb326ea83f13c0158820c381fa8a2cc74ec1360a65c99bc525f492be561b15cc09
-OpenBLAS.v0.3.21+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6051a0545d272bf19988e2a889531acd
-OpenBLAS.v0.3.21+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/21706d2fd8dde6e1e938b0c7a53c6073d52d830d7672702d828d5606582e2c98bdb39fc7ff1fa67188942713e9d718fdf5f014812115d0d0853066c2df21f297
-OpenBLAS.v0.3.21+0.i686-linux-gnu-libgfortran4.tar.gz/md5/4fc17cff9c7a4512245ffead4d75c678
-OpenBLAS.v0.3.21+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/5a28c5a587be439cd2bdf4880caf967cdec14945d26c978fa5c59ce251d5811d460bebc038808e0e8dd2eb4b6a0fdfcaacca4718e2aeb7855f466bd13d1974a7
-OpenBLAS.v0.3.21+0.i686-linux-gnu-libgfortran5.tar.gz/md5/06fa8dff91cff8ba91e2b4bc896e776c
-OpenBLAS.v0.3.21+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/493110d06c4361df13ba8e0839b9410551b0bba4fe6e3cdcb53c0dff41a03b3e34ec1c2e73cd4671516631492a16b8dd140a59fa3ac71c348e670858654f2d8a
-OpenBLAS.v0.3.21+0.i686-linux-musl-libgfortran3.tar.gz/md5/1b16814a10900c96537b5bfed19e71c2
-OpenBLAS.v0.3.21+0.i686-linux-musl-libgfortran3.tar.gz/sha512/603b2a2fd92286143cb0cb573e3c085db485cf3c4f54987d255efedaea2a8a3d84b83e28f6b2db9dbf05cd31f061845a749b8402d145cc5e8cc2eb369b38e3f5
-OpenBLAS.v0.3.21+0.i686-linux-musl-libgfortran4.tar.gz/md5/20ec87e486f1154d253bc251c1ec0bce
-OpenBLAS.v0.3.21+0.i686-linux-musl-libgfortran4.tar.gz/sha512/a2d1736e0f632bddc5140ea88840113b80fedcad51bf5ea93445053eb07c1ae304a1510a85cf964d3a0e087390b8526a0df2bcd24e356b4693a41e5dfc8a671c
-OpenBLAS.v0.3.21+0.i686-linux-musl-libgfortran5.tar.gz/md5/df9f4898d550130b0c0c5cede6dd4db3
-OpenBLAS.v0.3.21+0.i686-linux-musl-libgfortran5.tar.gz/sha512/c4c3133904e7b401c5d3013d0ef38b13d7a9804fb4ba38a2c0a062f1badb4d9150214bfc2a1bf55df1299e4151d71a6dbfce7063d9d80a19fe321878c0e59309
-OpenBLAS.v0.3.21+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/3d83d839f80abbd17f84631fbb8f6624
-OpenBLAS.v0.3.21+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/e51680b4b97f404f244b4d943148f506f84a21f0f59f4d41a3a0cf81d545658d9cc59592a2538c4c077606fc1e6f87eda23063e49828f066e743205c5e6aee8e
-OpenBLAS.v0.3.21+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/2ca3ebd61038a5e422a946ede3d17535
-OpenBLAS.v0.3.21+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/c692b7c00aa0eda4a3fa989f84c797126b1596e13514117ad898166f941f040df67826ba33d0af93673c7a0b478fe4392f9a53d7859b7371878b6608dcb0556b
-OpenBLAS.v0.3.21+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/55995848e4bc9ef739e8ba17962787d3
-OpenBLAS.v0.3.21+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/e274dcfc6593786ac4c32c3f49ec69ab3a0c7278c67bbd04373d4845bff2dfaf17300d4a71e48ebd508e299fa629190ffe70ce074a68e83bac0eafa51f4db2a0
-OpenBLAS.v0.3.21+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/2a3d5f0240a27cf1617d9d4abba6df78
-OpenBLAS.v0.3.21+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/489128c884a0c2b9eb5145b18c68f9c7de9a9cc4131f4635689dc67765f87bec852b0547ebea4ecfdad4eec38063aabe8f6c3e96e5856e08c0c26130c2f11897
-OpenBLAS.v0.3.21+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/bb3501965c26519ecf30830465b12672
-OpenBLAS.v0.3.21+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/bcc78c10f7aba973f1159443e8d408465833ef43530c1c2514715db5a1bb84c0f48510c12b2ac1211b2777328ec682e01ab668818025651f00db7ca747f5674e
-OpenBLAS.v0.3.21+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/dc6192b5130e114b3cb1183690c7e398
-OpenBLAS.v0.3.21+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/95d69ace7b145c02acbe13c52d1e7835fdb1e8788c0b03f0f967d88575b322988e4f4acef3b6ad3e505c895f8d19566b8eb9add02f0250cf2d4a14c9f1428f3f
-OpenBLAS.v0.3.21+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/27a9117002f96c41c7907be0475a8d86
-OpenBLAS.v0.3.21+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/2247f3691cc552f65a353412c46a76c1ac4b4d994a5725044ba300f0944f15b37144ceff438d77022b441c25eaf530709a4d3ed4523b97d292991b6407a72970
-OpenBLAS.v0.3.21+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/30dea9ca8b658ff6a9db9245d8ad7500
-OpenBLAS.v0.3.21+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/3289e766bfad456b35efae6d341a77698d4d36908ac8d802f47777feed5eef224fde1cb4799b5bd4e8e216c28c78ab4407b92906ddac0bdd1cfb674136c69aaa
-OpenBLAS.v0.3.21+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/ed820c420a67b32df0a9f34760ce605c
-OpenBLAS.v0.3.21+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/63f43eb0b1c037849fd5addda66d818c0b8188f9758391cd9929e78593c69ec61729be0efce6a9f943ebac178c634cdddffe172ad681ad1c4717949b075a1de7
-OpenBLAS.v0.3.21+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/a5393eb8e1799b6c089a82d8dde39fb0
-OpenBLAS.v0.3.21+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/8ce9ad2f55f131592a87789ec6a824cbe1d23c3be32fb2ba59e107045f75c34684b85d3bab2913923f5a19414a072b5e855c86fddd44a4319a9b5e7b28d5c169
-OpenBLAS.v0.3.21+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/652aa333440219a4ec17d94dd4e6d358
-OpenBLAS.v0.3.21+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/89d7740b462d4216e920dcd5c7867520b2f49c3cb74bd8424efd287927c92a08492c9fa413506248207f9532c7bb9ea2af587a4f70d7db8ea42ac1bc144e8a12
-OpenBLAS.v0.3.21+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/9e0831544d02a39565a2d0714b1e121a
-OpenBLAS.v0.3.21+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/9e57a08a09d5fc47d881f9f7ed2e52fbdc7301908cf1be384fe767e6b7771a5980827417049dd37df4d71a861b2cf2a05f25df892b15ed03458910b0bc53101a
-OpenBLAS.v0.3.21+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/dde15d3a2f26601cd7ac0a803efbe503
-OpenBLAS.v0.3.21+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/fa48e51a59b6fb213f88ce8b2778ca5eef73f0721a5c71e27cd8952a34a5003d69710571deb5c0c06526fa8016cfdacabdc2b343342ad0d1e523903fa94a8d42
-OpenBLAS.v0.3.21+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/55b80d13104f4ddc9eefa0424d71330b
-OpenBLAS.v0.3.21+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/fcd816e4dcd767963ae555846cee846c19f0b7d666042d59757eb2eebe115d30af60072c134c3025049712520705dbe9d2862a1f07c955780f9a0de69e6e00b5
-OpenBLAS.v0.3.21+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/e22e46b304962a1edb550e19914cc5e6
-OpenBLAS.v0.3.21+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/38f8ce1baee4082fd2432dbc1905fd03d8efbcc378aefc9312e90b6054341717ea46bc4d33f9f517e67af0fca2da55b5c5b112850e6071ba18753a4936d78da2
-OpenBLAS.v0.3.21+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/b8b7a9f9aff94b154e8479a84d7abe4b
-OpenBLAS.v0.3.21+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/afe4c45d4bf4e38fdbbf00b9e86a65244aadaa2b74e59f9a43f1860c130f721bba2f36186519b2573ff0819bd2b30414cc23800634847db2ecd2107f985495ad
-OpenBLAS.v0.3.21+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/8fceea42a8524fef29a54b88ea0a721b
-OpenBLAS.v0.3.21+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/e8195597057ab6de0aa5978b4d13b3248ac6acde3f86fc55d9e1c76ec39d464fc2eefea1096cfb5dffbd623f47b06be163c4c74981d2eb13387bc8499b9053fe
-OpenBLAS.v0.3.21+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/6f88d96c05663b4aeb81ba8a63882066
-OpenBLAS.v0.3.21+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f1f516d8d16a2f86bfb26129e0530146de3a4bcb62abcd2c7b9bf64cc09c069e5eeb66658b1cc0cdcc665de98246ad8ed20a7d8b132fe0f0e4d0651d3b4516d4
-OpenBLAS.v0.3.21+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/4fb99984ec612a090b294c6b349a7cdb
-OpenBLAS.v0.3.21+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/81bf55c6398a6cf4a61add084332e7cb79e6d550737641f6c0bc77aa61bd8187603a6100b78c2ef80109c3c5b21f7ba618a4780a5b68e5168a461af521f26c52
-OpenBLAS.v0.3.21+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/a1a2c3623d583ab995ac86df07ab73bb
-OpenBLAS.v0.3.21+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/ec5fab349607862c9e0badaa1fd433e057ac7d056008af683bbb37bf43fef5322e598cd71a6d9c3dd55ef857b39ca634e64572e9ae6e263022dc7f89083f9bca
-OpenBLAS.v0.3.21+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/05ef0ede7d565247a936c3110c25c83c
-OpenBLAS.v0.3.21+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/34d2812bc0c6605558cbd6244c41d0805fc9a943cd91a74994bcacdd5ff19612eac8751832e3ee761089a853cf16069e67e13507ca75bbe4b7dc4517e41515e0
-openblas-b89fb708caa5a5a32de8f4306c4ff132e0228e9a.tar.gz/md5/716ebe95d4b491253cdde8308b8adb83
-openblas-b89fb708caa5a5a32de8f4306c4ff132e0228e9a.tar.gz/sha512/00e7bde49525c2c28bf07b47290e00b53bff446be63f09e90c51724c6350e5ddc90f5a071ae6de057b3fbb107060e70bf16683fcefcf48ae37ba1d0758be553b
+OpenBLAS.v0.3.23+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/f4ab1aa718db6ab731179199b48506ad
+OpenBLAS.v0.3.23+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/5cd6326eab751d087b6638acc256a7c5dfc3a8a4be8949f4e2b5b8079aedc05cd8569774da19912fcbcd2dc1eac6a09d72d19bdbeded1198317992a85ccd605b
+OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/57b8903e05998d293d28e70ee6cbc4d8
+OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/03325728191f88dcfc2bea16d818c0325b4f42019ed9c2e0533233e8e2a4da09a2c70503632fef2ab55ed12b7da39fdab470b801d34a9b6f576bda509f8a8a8d
+OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/fe529647382de5693557363f658c71b6
+OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/77ac56f683a481477fa898d208e67c0c04c1ab8ca9dacb1e4e4ea3795fadb2604faffd1f3fd35d53eecb223c7f92de40cc8b2bdeb9c8a6a1b6a9949965cb9380
+OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/5aea8a00a946273a154110ca7b468214
+OpenBLAS.v0.3.23+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/a606933bed17e563d15ac206a4a37d38d75e9bb0bef46ef62485dcd32aa5a0e8501dab01f6887a1e60736c59177c6fbf0ec541fa521a9a8de854f44703f337c3
+OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/d81dc2a42a8c0d87f4ee9bad98579f2a
+OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/f2bda57546f1b9aa1f8dfe9a07b2243cadc002a9ffefbcfdde344ccc96efb07608a55bf8dbb6de34925af03f01ac5487f9fe293befa84edd9a84c01a9b7409e1
+OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/400ba512f73a60420aa0d316bc24db48
+OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/927c711c3950f24e6b4c22c6dd92cd2b212e3df9241c637ff42f5b9135e7bee8f3864868aea594c6e8ba5b40f0563d63a5f8634ea3c3276bec35d480601e76e5
+OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/6a91ea53f3aff17b602b324d025309c5
+OpenBLAS.v0.3.23+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/8ee85883fcc605c16031bafdd0f1a4f4d4a5957a4f85c2022466232f902a4cf64c284537dd2f237221f7d0c154e2b46200501891d3990e94dcf49a74a66c36de
+OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/c653ff340dc25b19ca36309060dd6b1a
+OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/cc77c84538bb0301eaa98ca1a32f024da6242e40e847e71f4a36ab69233590422aea41a32ee67031d8055c929f741617053416e5b9d446affa36e7233e5af48b
+OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/18a914a1df2be07ff6b419617cb6347f
+OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/eafab27655b0c179ad8b9b1dc818e8394d365f19cf75a0d77402951a38e204aa2fbe580037116a28e8e1254b66d15a543ccd0f438f3ae388e8bcad39f5953c64
+OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/4b8d18500b4bdc6f1081da6f0837340f
+OpenBLAS.v0.3.23+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6512bd03d58b7669dba7f9830d3f8654b2747ee66c7bfc05acdbca6c3d2c3750c9d1163768a3f91d56c5a87cb30705ad6f10395652fee4c9cd06cd2920db3027
+OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/27fd022a3b84c3a92da9d6062d8dafaf
+OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/c0e73f2012df2453cc6231a9e7a644609ba1280c9aea63d2cbbf9594539fb26c8f9ab6976de8ec9870cab483b1fe7e3a1fc81246fa99bbd7526051e74a4733e1
+OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/e2b0503bf1144f4b6a65ae9f09b25828
+OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/204678995b9f337e4ddae793762c3a00968faa3da3433ea17578944fd56f33c381150521b6a561d6ff2022693f8d46b9d0f32f330e500036b4bfc08a7dbd8a62
+OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/3e733c1c668a3efaccfde643092595e5
+OpenBLAS.v0.3.23+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/4a37e5de66920f20a648118f62555755b51e6e089e7ee43d2b7b8ec0dc47e68c7705b878158ad83d152cfebf77118f789d1bf7b2ee0702334d4317f0c6a926a1
+OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/c653ff340dc25b19ca36309060dd6b1a
+OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/cc77c84538bb0301eaa98ca1a32f024da6242e40e847e71f4a36ab69233590422aea41a32ee67031d8055c929f741617053416e5b9d446affa36e7233e5af48b
+OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/18a914a1df2be07ff6b419617cb6347f
+OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/eafab27655b0c179ad8b9b1dc818e8394d365f19cf75a0d77402951a38e204aa2fbe580037116a28e8e1254b66d15a543ccd0f438f3ae388e8bcad39f5953c64
+OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/4b8d18500b4bdc6f1081da6f0837340f
+OpenBLAS.v0.3.23+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6512bd03d58b7669dba7f9830d3f8654b2747ee66c7bfc05acdbca6c3d2c3750c9d1163768a3f91d56c5a87cb30705ad6f10395652fee4c9cd06cd2920db3027
+OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/27fd022a3b84c3a92da9d6062d8dafaf
+OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/c0e73f2012df2453cc6231a9e7a644609ba1280c9aea63d2cbbf9594539fb26c8f9ab6976de8ec9870cab483b1fe7e3a1fc81246fa99bbd7526051e74a4733e1
+OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/e2b0503bf1144f4b6a65ae9f09b25828
+OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/204678995b9f337e4ddae793762c3a00968faa3da3433ea17578944fd56f33c381150521b6a561d6ff2022693f8d46b9d0f32f330e500036b4bfc08a7dbd8a62
+OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/3e733c1c668a3efaccfde643092595e5
+OpenBLAS.v0.3.23+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/4a37e5de66920f20a648118f62555755b51e6e089e7ee43d2b7b8ec0dc47e68c7705b878158ad83d152cfebf77118f789d1bf7b2ee0702334d4317f0c6a926a1
+OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran3.tar.gz/md5/639643a12f8018e4be7bb1f9f29e57f6
+OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/0993e1967964874a3f90610745d82369ee70fa4313445391fdcb26c4218c6badb18577c67648d2f77f359b163dafde31a3723998e0b006622effeace506b669f
+OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran4.tar.gz/md5/13ec86d62840258c425b0a5a6824a609
+OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/0bc74dac87b8ab5ea244fa5bcd05baf2968b7041c4eb392ff808d0aae897cec4b3082ef7fecda28aea2662b6cd956a5254212740b1802a947dd3f1e5a3dfe2d2
+OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran5.tar.gz/md5/413d4eae7b9c409204ab5fb7867dc30f
+OpenBLAS.v0.3.23+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/4a484d2aa239d8c1e2733cd9d16bd17549f5048d9958899a4e20039a7efcfd280bba901f3fe63b3b079fd7fae88911f7201a7649a472d47d0148ba8520f350cb
+OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran3.tar.gz/md5/7f342d27a9b193b5d37e2ae4de6e4640
+OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran3.tar.gz/sha512/2927b18e176e07fe8a05d2eba24f6160680131832094bde9634f0890c1bc3b877c3293163fc65067cea402f3e75871c41b47e4a9999f273e667ac400878aa2b2
+OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran4.tar.gz/md5/523c007c319adbdde6e8cd7d3d89a9a1
+OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran4.tar.gz/sha512/ddb7a8d67c9430976ad967e21a6b8717c8a5501e8808fabf6e7b2e7298a0ca56049dcfc12214a5a19dbf7bd52d625b0b2b1bcc6b4c1d921c3ee62fd2766da891
+OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran5.tar.gz/md5/7dd91db180e59da5f866f73eaccc4d1d
+OpenBLAS.v0.3.23+0.i686-linux-musl-libgfortran5.tar.gz/sha512/ff0ee65e536eae5ece7fbc00a0735349d560a142e025084d64f28891bdd3da5914e976640be354d8ad34fd3d89bfb90461eb95f2426d5e292906ed4ead1cfafc
+OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/fef43c3fed5ed7e9fdd9c7757be6b95e
+OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/b580c1da073ed94d1a259183c5b2a6896a746c5e88c83e2df57fea801f259cb49f99b3468bbc5c1d7dc6bb84f597843bc3c383c9cab7608dbfbbb15352fb1012
+OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/88db137baca7ce99e58ff3b13ee73644
+OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/1608f3ee3964df833db9a1277fb9f69e3bb1d328a27482ac419e08520a51b2cb25501cf8986b2ff617bc04881984ce73ecd2b55b0c99afb5cb28f32d24d89052
+OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/32c1ca252dcae7d02bcd54d2b00a4409
+OpenBLAS.v0.3.23+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/401126557d3072d965327aa1440eeaf22fdfb1e5265c28dca779d81b94ababd1d487603d55e384f2bac305125c9ed3826f0bb7be99af20b0d18a674a8069ce5b
+OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/3059083c8293106486a0f28a3564e499
+OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/019bb4bc71d7be14f040b36d1b44f653ee89aac680749a6a3b8b72446dffae185dd3d8172ca7ac9aac45cfe564c0fc6cf3221a6f8496b9ba10d04ab44d897b65
+OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/648167f83536f32921f1208d09cc8f47
+OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/084346b93a99671967433f4ac6548d7b828aa65c402bac7e68aee78bbf75e5cb06b22f42a7d4876fdea3e838162278ee3fcf011fa18530c8d8b0e853a4c6440c
+OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/9796916fb0acbea2e93747dafa96d496
+OpenBLAS.v0.3.23+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/7c3643c3669fea262907bb5c0f27b492adfec910716498a0bd992d705a544b21023d77801f27c967c07be9d5b30bbd936137c8f59f61632fb16cc0e1f2efebd1
+OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/cbf9ad429547ebd1a473f735b6c65442
+OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5e98ec17ee35624bf0a286a2dbe01f5ae4fa879274af70b218080c537a325a92fe76331b746e98b3ce3a0d127df2c03f522f554cb43c169a2b7b1890a9a8a81f
+OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/28792164b6c34bc627966e338221ff34
+OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/433dcec661ff2459740c4d1e72d766549135f6f41a7ffb488502d76751fcb00c3d75aaa0e3db182441ef6b5e3b487a9df3e1b8b979da3681496f4ac6c6ce819b
+OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/7013b806bfcd2c65582df5f224bd7d86
+OpenBLAS.v0.3.23+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/1078cf5583d158af5d38690acf913db378195b79b4743d977e7654c246fecb0ded4ebee96d89f54c5ec5f04af1b9858bcc0700251ccce1bf7c87926ede069b91
+OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/f959117d5c3fd001412c790bd478f7f6
+OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/e6fbe9fe1b7a92e10760d2b945bcc2c1c5e8399d729fbbb771764e7b72856707629123bc2d2fed2549f551776f8f0a737b0f414ffddc820a655172d933c10af9
+OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/af04d6bd91df5c9bcc63fe06c88a4b79
+OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/0cd4972d0a44505f9d8d3958bd20e491c986f55f5f84000ab534020dc8d39d788402355fa51bbd521c8c1bf6884d9d35c1db156bd106a98fbde80c104e8dd5a1
+OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/c5e6138630c5b616df1d045e1c388710
+OpenBLAS.v0.3.23+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/a54db7cb7e28dd792bd2c4f33945e7d99db1ee9a620bbe77a21cd7fa7f4cddc5c7744d27116951582f00223df09e7dc2258754032cebd57f61a723762743d3fb
+OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/7d407633f4f59c305896f9132c098cd2
+OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/8a04d46b6dc2eef87d6c4ac43bcdacf5da2b1669bb829c42f07f7f73bc0dba35a6e48f303d1e9cb951062fa2c3a4cce894406c5551c2bac7f57f02d2f92122a3
+OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1d6c7e0b6f3eeedb41ecfea9881d0bac
+OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/9152b7f584ecc3f06caf0eaf0a496d9e9c16afe41a4750a9bcce0477cd3cabcdcec5c97c24fa3fba03d603148c8a3dcf7199c171abe10121aaee2f8a68b93c91
+OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/fdd5c9e5f746403f7ba4789d8d8c47e1
+OpenBLAS.v0.3.23+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/2bd980e1e2021b32f3455fb3fdbae407fb672074ca798664c77e063ea6a7503b625eac7655c8cf25307afbfd9abaa64af52fbb3ed811ff8eb6515e3edcf26b1d
+OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/8c69d9b7b6fbd0896f839c8979c35a81
+OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/d8859f485fa35b33be167dd45f1fe87696be0b12f27dd041087cfbb9df0da94bb726fb9c5f89162405de473969013e3a6a11b0520236db7f5603b25466ebf0d9
+OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/40724e1d694288f930a15860650f37bd
+OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/b7bd75b57803da93d19016f5fe63bd88357aa4e728fdde026a55ab2382957f5a82254b12e701ffb19085a6d1ecc0c0b0c685efb6fa9654e7537f146087cce00a
+OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/d78352f4e9baf1225aa135b03da9315b
+OpenBLAS.v0.3.23+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/aa4d3b0972429af7376e80eab93375ea0368f2f3a31cdbacdb782ff32f7b1c708c5e2d7f1c30ba5b8a7c604a3a7c27a7601fc7f09c8dad2b6dbc54ff099fc0e2
+OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/dbf8b0592102b01de80df0767f681227
+OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/9bdf9ab9c3ff36281fa501771c4ed932e8a481ffc4cef08725b4877999bd320c99f9c756beba7143050705323bdc0bea150ab3a11e47f3f7c60f206595c37b73
+OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/feba9f9647e82992ba310650e3b8ff71
+OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/b6c98a5a57764eef4940d81461f9706f905d376d165abdbd0fafbdd5802e34523ad15e6ee75a4550555b7c969630c43438d6cce3d6e37ac95e57b58bcc9d542c
+OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/732544eb61201b6dd7c27d5be376d50d
+OpenBLAS.v0.3.23+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7b68cceb0bdb892ae74e2744f2a9139602a03e01d937188ca9c875d606d79f555594a5ff022b64d955613b6eb0026a26003011dc17382f019882d9c4c612e8e2
+openblas-394a9fbafe9010b76a2615c562204277a956eb52.tar.gz/md5/7ccaaaafc8176b87dc59d4e527ca4d9f
+openblas-394a9fbafe9010b76a2615c562204277a956eb52.tar.gz/sha512/12235f0459469b483a393844c228be5ad4bc60575bbe4b3238198f2480b7b457e4b0609730ce6d99530bb82e1d16fdd2338ceed6d28c952e6fff0da7f571f863
diff --git a/deps/clang.version b/deps/clang.version
index 2fa84f679cb19..d291dc8e8f8d8 100644
--- a/deps/clang.version
+++ b/deps/clang.version
@@ -1,4 +1,4 @@
 ## jll artifact
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 14.0.6+0
+CLANG_JLL_VER  := 15.0.7+5
diff --git a/deps/curl.mk b/deps/curl.mk
index 435ee278e3468..a063dfe07fba0 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -36,7 +36,7 @@ checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 ## xref: https://github.com/JuliaPackaging/Yggdrasil/blob/master/L/LibCURL/common.jl
 # Disable....almost everything
 CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
-	--without-ssl --without-gnutls --without-libidn2 --without-librtmp \
+	--without-gnutls --without-libidn2 --without-librtmp \
 	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
 	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
 	--without-gssapi --without-brotli
diff --git a/deps/curl.version b/deps/curl.version
index 96bc09263156f..f704bc2bebc61 100644
--- a/deps/curl.version
+++ b/deps/curl.version
@@ -3,4 +3,4 @@
 CURL_JLL_NAME := LibCURL
 
 ## source build
-CURL_VER := 7.84.0
+CURL_VER := 8.0.1
diff --git a/deps/dsfmt.mk b/deps/dsfmt.mk
index 203c5f1917c91..da57799053933 100644
--- a/deps/dsfmt.mk
+++ b/deps/dsfmt.mk
@@ -5,10 +5,13 @@ ifneq ($(USE_BINARYBUILDER_DSFMT),1)
 
 DSFMT_CFLAGS := $(CFLAGS) -DNDEBUG -DDSFMT_MEXP=19937 $(fPIC) -DDSFMT_DO_NOT_USE_OLD_NAMES -DDSFMT_SHLIB $(SANITIZE_OPTS)
 DSFMT_CFLAGS += -O3 -finline-functions -fomit-frame-pointer -fno-strict-aliasing \
-		--param max-inline-insns-single=1800 -Wall  -std=c99 -shared
+		-Wall  -std=c99 -shared
 ifeq ($(ARCH), x86_64)
 DSFMT_CFLAGS += -msse2 -DHAVE_SSE2
 endif
+ifneq ($(OS), emscripten)
+DSFMT_CFLAGS += --param max-inline-insns-single=1800
+endif
 
 $(SRCCACHE)/dsfmt-$(DSFMT_VER).tar.gz: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://github.com/MersenneTwister-Lab/dSFMT/archive/v$(DSFMT_VER).tar.gz
diff --git a/deps/gmp.mk b/deps/gmp.mk
index 2354a6ca44a9f..12ba15f8aa0f6 100644
--- a/deps/gmp.mk
+++ b/deps/gmp.mk
@@ -19,6 +19,9 @@ ifeq ($(BUILD_OS),WINNT)
 GMP_CONFIGURE_OPTS += --srcdir="$(subst \,/,$(call mingw_to_dos,$(SRCCACHE)/gmp-$(GMP_VER)))"
 endif
 
+ifeq ($(OS),emscripten)
+GMP_CONFIGURE_OPTS += CFLAGS="-fPIC"
+endif
 
 $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://gmplib.org/download/gmp/$(notdir $@)
diff --git a/deps/ittapi.mk b/deps/ittapi.mk
new file mode 100644
index 0000000000000..1a47c3ae89390
--- /dev/null
+++ b/deps/ittapi.mk
@@ -0,0 +1,43 @@
+## ittapi ##
+include $(SRCDIR)/ittapi.version
+
+ITTAPI_GIT_URL := https://github.com/intel/ittapi.git
+ITTAPI_TAR_URL = https://api.github.com/repos/intel/ittapi/tarball/$1
+$(eval $(call git-external,ittapi,ITTAPI,CMakeLists.txt,,$(SRCCACHE)))
+
+ITTAPI_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DITT_API_IPT_SUPPORT= -DITT_API_FORTRAN_SUPPORT=0
+
+$(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+	$(CMAKE) $(dir $<) $(ITTAPI_OPTS)
+	echo 1 > $@
+
+$(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured
+	$(MAKE) -C $(dir $<)
+	echo 1 > $@
+
+define ITTAPI_INSTALL
+	mkdir -p $2/$$(build_libdir)
+	mkdir -p $2/$$(build_includedir)/ittapi
+	cp -a $1/bin/libittnotify.a $2/$$(build_libdir)
+	cp -a $1/bin/libjitprofiling.a $2/$$(build_libdir)
+	# cp -a $1/bin/libadvisor.a $2/$$(build_libdir)
+	cp -a $(SRCCACHE)/$(ITTAPI_SRC_DIR)/include/ittnotify.h $2/$$(build_includedir)/ittapi/
+	cp -a $(SRCCACHE)/$(ITTAPI_SRC_DIR)/include/ittnotify-zca.h $2/$$(build_includedir)/ittapi/
+	cp -a $(SRCCACHE)/$(ITTAPI_SRC_DIR)/include/jitprofiling.h $2/$$(build_includedir)/ittapi/
+endef
+
+$(eval $(call staged-install, \
+	ittapi,$(ITTAPI_SRC_DIR), \
+	ITTAPI_INSTALL,,,))
+
+get-ittapi: $(ITTAPI_SRC_FILE)
+extract-ittapi: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
+configure-ittapi: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured
+compile-ittapi: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled
+fastcheck-ittapi: #none
+check-ittapi: #none
+
+clean-ittapi:
+	-rm -f $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled $(build_libdir)/libopenlibm.a
diff --git a/deps/ittapi.version b/deps/ittapi.version
new file mode 100644
index 0000000000000..81afb6de2add2
--- /dev/null
+++ b/deps/ittapi.version
@@ -0,0 +1,3 @@
+## source build
+ITTAPI_BRANCH=v3.24.0
+ITTAPI_SHA1=0014aec56fea2f30c1374f40861e1bccdd53d0cb
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index 30d94aeca7b7d..9bd7bd555d89d 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -35,23 +35,6 @@ endif
 
 LIBGIT2_SRC_PATH := $(SRCCACHE)/$(LIBGIT2_SRC_DIR)
 
-$(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied: $(LIBGIT2_SRC_PATH)/source-extracted
-	cd $(LIBGIT2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libgit2-agent-nonfatal.patch
-	echo 1 > $@
-
-$(LIBGIT2_SRC_PATH)/libgit2-hostkey.patch-applied: $(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied
-	cd $(LIBGIT2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libgit2-hostkey.patch
-	echo 1 > $@
-
-$(LIBGIT2_SRC_PATH)/libgit2-lowercase-windows-h.patch-applied: $(LIBGIT2_SRC_PATH)/libgit2-hostkey.patch-applied
-	cd $(LIBGIT2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libgit2-lowercase-windows-h.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/libgit2-lowercase-windows-h.patch-applied
-
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
diff --git a/deps/libgit2.version b/deps/libgit2.version
index 62633db62409f..b8cefc3c5c6f3 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -3,11 +3,11 @@
 LIBGIT2_JLL_NAME := LibGit2
 
 ## source build
-LIBGIT2_BRANCH=v1.5.0
-LIBGIT2_SHA1=fbea439d4b6fc91c6b619d01b85ab3b7746e4c19
+LIBGIT2_BRANCH=v1.6.1
+LIBGIT2_SHA1=8a871d13b7f4e186b8ad943ae5a7fcf30be52e67
 
 ## Other deps
 # Specify the version of the Mozilla CA Certificate Store to obtain.
 # The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
 # See https://curl.haxx.se/docs/caextract.html for more details.
-MOZILLA_CACERT_VERSION := 2022-10-11
+MOZILLA_CACERT_VERSION := 2023-01-10
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
index 8900390d24c24..7d79e03ee8d0e 100644
--- a/deps/libsuitesparse.mk
+++ b/deps/libsuitesparse.mk
@@ -26,7 +26,7 @@ LIBSUITESPARSE_MFLAGS := CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS
 	  AR="$(AR)" RANLIB="$(RANLIB)" \
 	  BLAS="-L$(build_shlibdir) -lblastrampoline" \
 	  LAPACK="-L$(build_shlibdir) -lblastrampoline" \
-	  LDFLAGS="$(SUITESPARSE_LIB) $(SANITIZE_LDFLAGS) -Wl,--warn-unresolved-symbols" CFOPENMP="" CUDA=no CUDA_PATH="" \
+	  LDFLAGS="$(SUITESPARSE_LIB) $(SANITIZE_LDFLAGS)" CFOPENMP="" CUDA=no CUDA_PATH="" \
 	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" \
 	  CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" \
 	  SPQR_CONFIG="$(SPQR_CONFIG)"
diff --git a/deps/libtracyclient.mk b/deps/libtracyclient.mk
new file mode 100644
index 0000000000000..92d6bee4caea6
--- /dev/null
+++ b/deps/libtracyclient.mk
@@ -0,0 +1,83 @@
+## LIBTRACYCLIENT ##
+ifneq ($(USE_BINARYBUILDER_LIBTRACYCLIENT),1)
+LIBTRACYCLIENT_GIT_URL:=https://github.com/wolfpld/tracy.git
+LIBTRACYCLIENT_TAR_URL=https://api.github.com/repos/wolfpld/tracy/tarball/$1
+$(eval $(call git-external,libtracyclient,LIBTRACYCLIENT,,,$(BUILDDIR)))
+
+LIBTRACYCLIENT_BUILDDIR := $(BUILDDIR)/$(LIBTRACYCLIENT_SRC_DIR)
+LIBTRACYCLIENT_SRCCACHE := $(SRCCACHE)/$(LIBTRACYCLIENT_SRC_DIR)
+
+LIBTRACYCLIENT_CMAKE :=
+LIBTRACYCLIENT_CMAKE += -DBUILD_SHARED_LIBS=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_FIBERS=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_ONLY_LOCALHOST=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_CODE_TRANSFER=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_FRAME_IMAGE=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_CRASH_HANDLER=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_ON_DEMAND=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_TIMER_FALLBACK=ON
+
+ifeq ($(WITH_TRACY_CALLSTACKS),1)
+LIBTRACYCLIENT_CMAKE += -DTRACY_CALLSTACK=32
+else
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_SAMPLING=ON
+endif
+
+$(LIBTRACYCLIENT_BUILDDIR)/cmake-patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/source-extracted
+ifneq ($(OS),WINNT)
+	echo "target_compile_definitions(TracyClient PUBLIC __STDC_FORMAT_MACROS)" >> $(LIBTRACYCLIENT_BUILDDIR)/CMakeLists.txt
+else
+	echo "target_compile_definitions(TracyClient PUBLIC WINVER=0x0602 _WIN32_WINNT=0x0602)" >> $(LIBTRACYCLIENT_BUILDDIR)/CMakeLists.txt
+endif
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-freebsd-elfw.patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/cmake-patch-applied
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/libTracyClient-freebsd-elfw.patch
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-no-sampling.patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-freebsd-elfw.patch-applied
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/libTracyClient-no-sampling.patch
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-plot-config.patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-no-sampling.patch-applied
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/libTracyClient-plot-config.patch
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/build-configured: $(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-plot-config.patch-applied
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+		$(CMAKE) . $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LIBTRACYCLIENT_CMAKE) \
+		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/build-compiled: $(LIBTRACYCLIENT_BUILDDIR)/build-configured
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build .)
+	echo 1 > $@
+
+$(eval $(call staged-install, \
+	libtracyclient,$$(LIBTRACYCLIENT_SRC_DIR), \
+	MAKE_INSTALL,,, \
+	$$(INSTALL_NAME_CMD)libtracyclient.$$(SHLIB_EXT) $$(build_shlibdir)/libtracyclient.$$(SHLIB_EXT)))
+
+clean-libtracyclient:
+	rm -rf $(LIBTRACYCLIENT_BUILDDIR)/build-configured $(LIBTRACYCLIENT_BUILDDIR)/build-compiled
+	-$(MAKE) -C $(LIBTRACYCLIENT_BUILDDIR) clean
+
+get-libtracyclient: $(LIBTRACYCLIENT_SRC_FILE)
+extract-libtracyclient: $(LIBTRACYCLIENT_BUILDDIR)/source-extracted
+configure-libtracyclient: $(LIBTRACYCLIENT_BUILDDIR)/build-configured
+compile-libtracyclient: $(LIBTRACYCLIENT_BUILDDIR)/build-compiled
+fastcheck-libtracyclient: check-libtracyclient
+check-libtracyclient: compile-libtracyclient
+
+else # USE_BINARYBUILDER_LIBTRACYCLIENT
+
+$(eval $(call bb-install,libtracyclient,LIBTRACYCLIENT,false))
+
+endif
diff --git a/deps/libtracyclient.version b/deps/libtracyclient.version
new file mode 100644
index 0000000000000..0baf8504261f1
--- /dev/null
+++ b/deps/libtracyclient.version
@@ -0,0 +1,8 @@
+## jll artifact
+LIBTRACYCLIENT_JLL_NAME := LibTracyClient
+LIBTRACYCLIENT_JLL_VER := 0.9.1+2
+
+## source build
+LIBTRACYCLIENT_VER := 0.9.1
+LIBTRACYCLIENT_BRANCH=v0.9.1
+LIBTRACYCLIENT_SHA1=897aec5b062664d2485f4f9a213715d2e527e0ca
diff --git a/deps/libuv.mk b/deps/libuv.mk
index cdcd12d8db4fa..eacabac55e34f 100644
--- a/deps/libuv.mk
+++ b/deps/libuv.mk
@@ -18,6 +18,21 @@ LIBUV_BUILDDIR := $(BUILDDIR)/$(LIBUV_SRC_DIR)
 ifneq ($(CLDFLAGS)$(SANITIZE_LDFLAGS),)
 $(LIBUV_BUILDDIR)/build-configured: LDFLAGS:=$(LDFLAGS) $(CLDFLAGS) $(SANITIZE_LDFLAGS)
 endif
+
+ifeq ($(OS), emscripten)
+$(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && cmake -E env \
+		CMAKE_C_FLAGS="-pthread" \
+		CMAKE_SHARED_LINKER_FLAGS="-sTOTAL_MEMORY=65536000 -pthread" \
+		CMAKE_EXE_LINKER_FLAGS="-sTOTAL_MEMORY=65536000 -pthread" \
+		emcmake cmake $(dir $<) $(CMAKE_COMMON) -DBUILD_TESTING=OFF
+	echo 1 > $@
+
+$(LIBUV_BUILDDIR)/build-compiled: $(LIBUV_BUILDDIR)/build-configured
+	emmake $(MAKE) -C $(dir $<) $(UV_MFLAGS)
+	echo 1 > $@
+else
 $(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracted
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/aclocal.m4 # touch a few files to prevent autogen from getting called
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/Makefile.in
@@ -30,6 +45,7 @@ $(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracte
 $(LIBUV_BUILDDIR)/build-compiled: $(LIBUV_BUILDDIR)/build-configured
 	$(MAKE) -C $(dir $<) $(UV_MFLAGS)
 	echo 1 > $@
+endif
 
 $(LIBUV_BUILDDIR)/build-checked: $(LIBUV_BUILDDIR)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
diff --git a/deps/lld.version b/deps/lld.version
index 2b34a5d3012ad..d4b2a664d980c 100644
--- a/deps/lld.version
+++ b/deps/lld.version
@@ -1,3 +1,3 @@
 ## jll artifact
 LLD_JLL_NAME := LLD
-LLD_JLL_VER := 14.0.6+0
+LLD_JLL_VER := 15.0.7+5
diff --git a/deps/llvm-tools.version b/deps/llvm-tools.version
index 5da312d32f0af..f2ecd0b33e989 100644
--- a/deps/llvm-tools.version
+++ b/deps/llvm-tools.version
@@ -1,5 +1,5 @@
 ## jll artifact
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 14.0.6+0
-LLVM_TOOLS_ASSERT_JLL_VER := 14.0.6+0
+LLVM_TOOLS_JLL_VER := 15.0.7+5
+LLVM_TOOLS_ASSERT_JLL_VER := 15.0.7+5
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 81dcff1ce4c84..83b9a66ec608e 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -64,7 +64,7 @@ endif
 LLVM_LIB_FILE := libLLVMCodeGen.a
 
 # Figure out which targets to build
-LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF
+LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF;AVR
 LLVM_EXPERIMENTAL_TARGETS :=
 
 LLVM_CFLAGS :=
@@ -120,7 +120,7 @@ ifeq ($(USE_LLVM_SHLIB),1)
 LLVM_CMAKE += -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON -DLLVM_LINK_LLVM_DYLIB:BOOL=ON
 endif
 ifeq ($(USE_INTEL_JITEVENTS), 1)
-LLVM_CMAKE += -DLLVM_USE_INTEL_JITEVENTS:BOOL=ON
+LLVM_CMAKE += -DLLVM_USE_INTEL_JITEVENTS:BOOL=ON -DITTAPI_SOURCE_DIR=$(SRCCACHE)/$(ITTAPI_SRC_DIR)
 endif # USE_INTEL_JITEVENTS
 
 ifeq ($(USE_OPROFILE_JITEVENTS), 1)
@@ -286,6 +286,11 @@ configure-llvm: $(LLVM_BUILDDIR_withtype)/build-configured
 compile-llvm: $(LLVM_BUILDDIR_withtype)/build-compiled
 fastcheck-llvm: #none
 check-llvm: $(LLVM_BUILDDIR_withtype)/build-checked
+
+ifeq ($(USE_INTEL_JITEVENTS),1)
+extract-llvm: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
+endif
+
 #todo: LLVM make check target is broken on julia.mit.edu (and really slow elsewhere)
 
 else # USE_BINARYBUILDER_LLVM
diff --git a/deps/llvm.version b/deps/llvm.version
index 2dbcd0f510f81..e35db3bd6aed2 100644
--- a/deps/llvm.version
+++ b/deps/llvm.version
@@ -1,8 +1,7 @@
 ## jll artifact
 LLVM_JLL_NAME := libLLVM
-LLVM_ASSERT_JLL_VER := 14.0.5+3
-
+LLVM_ASSERT_JLL_VER := 15.0.7+5
 ## source build
-LLVM_VER := 14.0.5
-LLVM_BRANCH=julia-14.0.6-0
-LLVM_SHA1=julia-14.0.6-0
+LLVM_VER := 15.0.7
+LLVM_BRANCH=julia-15.0.7-5
+LLVM_SHA1=julia-15.0.7-5
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
index 0f654dfd04c58..b4147c2c2684e 100644
--- a/deps/mbedtls.mk
+++ b/deps/mbedtls.mk
@@ -3,7 +3,7 @@ include $(SRCDIR)/mbedtls.version
 
 ifneq ($(USE_BINARYBUILDER_MBEDTLS), 1)
 MBEDTLS_SRC = mbedtls-$(MBEDTLS_VER)
-MBEDTLS_URL = https://github.com/ARMmbed/mbedtls/archive/v$(MBEDTLS_VER).tar.gz
+MBEDTLS_URL = https://github.com/Mbed-TLS/mbedtls/archive/v$(MBEDTLS_VER).tar.gz
 
 MBEDTLS_OPTS := $(CMAKE_COMMON) -DUSE_SHARED_MBEDTLS_LIBRARY=ON \
     -DUSE_STATIC_MBEDTLS_LIBRARY=OFF -DENABLE_PROGRAMS=OFF -DCMAKE_BUILD_TYPE=Release
diff --git a/deps/mbedtls.version b/deps/mbedtls.version
index eaf3bca011e1f..f262476af1684 100644
--- a/deps/mbedtls.version
+++ b/deps/mbedtls.version
@@ -2,4 +2,4 @@
 MBEDTLS_JLL_NAME := MbedTLS
 
 ## source build
-MBEDTLS_VER := 2.28.0
+MBEDTLS_VER := 2.28.2
diff --git a/deps/mpfr.mk b/deps/mpfr.mk
index 36a4f77c6a929..5a0605ba6b601 100644
--- a/deps/mpfr.mk
+++ b/deps/mpfr.mk
@@ -20,6 +20,9 @@ ifeq ($(SANITIZE),1)
 MPFR_CONFIGURE_OPTS += --host=none-unknown-linux
 endif
 
+ifeq ($(OS),emscripten)
+MPFR_CONFIGURE_OPTS += CFLAGS="-fPIC"
+endif
 
 $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://www.mpfr.org/mpfr-$(MPFR_VER)/$(notdir $@)
@@ -27,7 +30,6 @@ $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2: | $(SRCCACHE)
 $(SRCCACHE)/mpfr-$(MPFR_VER)/source-extracted: $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) -jxf $<
-	cp $(SRCDIR)/patches/config.sub $(SRCCACHE)/mpfr-$(MPFR_VER)/config.sub
 	touch -c $(SRCCACHE)/mpfr-$(MPFR_VER)/configure # old target
 	echo 1 > $@
 
diff --git a/deps/nghttp2.version b/deps/nghttp2.version
index e87b6781433ad..200e08bf4bfd9 100644
--- a/deps/nghttp2.version
+++ b/deps/nghttp2.version
@@ -3,4 +3,4 @@
 NGHTTP2_JLL_NAME := nghttp2
 
 ## source build
-NGHTTP2_VER := 1.48.0
+NGHTTP2_VER := 1.52.0
diff --git a/deps/openblas.mk b/deps/openblas.mk
index f949143f393b1..e2837bc47232a 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -5,7 +5,7 @@ OPENBLAS_GIT_URL := https://github.com/xianyi/OpenBLAS.git
 OPENBLAS_TAR_URL = https://api.github.com/repos/xianyi/OpenBLAS/tarball/$1
 $(eval $(call git-external,openblas,OPENBLAS,,,$(BUILDDIR)))
 
-OPENBLAS_BUILD_OPTS := CC="$(CC) $(SANITIZE_OPTS)" FC="$(FC) $(SANITIZE_OPTS) -L/home/keno/julia-msan/usr/lib" LD="$(LD) $(SANITIZE_LDFLAGS)" RANLIB="$(RANLIB)" BINARY=$(BINARY)
+OPENBLAS_BUILD_OPTS := CC="$(CC) $(SANITIZE_OPTS)" FC="$(FC) $(SANITIZE_OPTS)" LD="$(LD) $(SANITIZE_LDFLAGS)" RANLIB="$(RANLIB)" BINARY=$(BINARY)
 
 # Thread support
 ifeq ($(OPENBLAS_USE_THREAD), 1)
diff --git a/deps/openblas.version b/deps/openblas.version
index 9e433d2629071..be0506fcd5137 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -3,9 +3,9 @@
 OPENBLAS_JLL_NAME := OpenBLAS
 
 ## source build
-OPENBLAS_VER := 0.3.21
-OPENBLAS_BRANCH=v0.3.21
-OPENBLAS_SHA1=b89fb708caa5a5a32de8f4306c4ff132e0228e9a
+OPENBLAS_VER := 0.3.23
+OPENBLAS_BRANCH=v0.3.23
+OPENBLAS_SHA1=394a9fbafe9010b76a2615c562204277a956eb52
 
 # LAPACK, source-only
 LAPACK_VER := 3.9.0
diff --git a/deps/patches/config.sub b/deps/patches/config.sub
deleted file mode 100755
index 3d9a8dc3d5a76..0000000000000
--- a/deps/patches/config.sub
+++ /dev/null
@@ -1,1851 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
-
-timestamp='2020-07-10'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
-
-
-# Please send patches to <config-patches@gnu.org>.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support.  The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
-
-Canonicalize a configuration name.
-
-Options:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright 1992-2020 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-
-    *local*)
-       # First pass through any local machine types.
-       echo "$1"
-       exit ;;
-
-    * )
-       break ;;
-  esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
-    exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
-    exit 1;;
-esac
-
-# Split fields of configuration type
-# shellcheck disable=SC2162
-IFS="-" read field1 field2 field3 field4 <<EOF
-$1
-EOF
-
-# Separate into logical components for further validation
-case $1 in
-	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
-		exit 1
-		;;
-	*-*-*-*)
-		basic_machine=$field1-$field2
-		basic_os=$field3-$field4
-		;;
-	*-*-*)
-		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
-		# parts
-		maybe_os=$field2-$field3
-		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
-				basic_machine=$field1
-				basic_os=$maybe_os
-				;;
-			android-linux)
-				basic_machine=$field1-unknown
-				basic_os=linux-android
-				;;
-			*)
-				basic_machine=$field1-$field2
-				basic_os=$field3
-				;;
-		esac
-		;;
-	*-*)
-		# A lone config we happen to match not fitting any pattern
-		case $field1-$field2 in
-			decstation-3100)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			*-*)
-				# Second component is usually, but not always the OS
-				case $field2 in
-					# Prevent following clause from handling this valid os
-					sun*os*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
-						basic_machine=$field1-$field2
-						basic_os=
-						;;
-					*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-				esac
-			;;
-		esac
-		;;
-	*)
-		# Convert single-component short-hands not valid as part of
-		# multi-component configurations.
-		case $field1 in
-			386bsd)
-				basic_machine=i386-pc
-				basic_os=bsd
-				;;
-			a29khif)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			adobe68k)
-				basic_machine=m68010-adobe
-				basic_os=scout
-				;;
-			alliant)
-				basic_machine=fx80-alliant
-				basic_os=
-				;;
-			altos | altos3068)
-				basic_machine=m68k-altos
-				basic_os=
-				;;
-			am29k)
-				basic_machine=a29k-none
-				basic_os=bsd
-				;;
-			amdahl)
-				basic_machine=580-amdahl
-				basic_os=sysv
-				;;
-			amiga)
-				basic_machine=m68k-unknown
-				basic_os=
-				;;
-			amigaos | amigados)
-				basic_machine=m68k-unknown
-				basic_os=amigaos
-				;;
-			amigaunix | amix)
-				basic_machine=m68k-unknown
-				basic_os=sysv4
-				;;
-			apollo68)
-				basic_machine=m68k-apollo
-				basic_os=sysv
-				;;
-			apollo68bsd)
-				basic_machine=m68k-apollo
-				basic_os=bsd
-				;;
-			aros)
-				basic_machine=i386-pc
-				basic_os=aros
-				;;
-			aux)
-				basic_machine=m68k-apple
-				basic_os=aux
-				;;
-			balance)
-				basic_machine=ns32k-sequent
-				basic_os=dynix
-				;;
-			blackfin)
-				basic_machine=bfin-unknown
-				basic_os=linux
-				;;
-			cegcc)
-				basic_machine=arm-unknown
-				basic_os=cegcc
-				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
-			cray)
-				basic_machine=j90-cray
-				basic_os=unicos
-				;;
-			crds | unos)
-				basic_machine=m68k-crds
-				basic_os=
-				;;
-			da30)
-				basic_machine=m68k-da30
-				basic_os=
-				;;
-			decstation | pmax | pmin | dec3100 | decstatn)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			delta88)
-				basic_machine=m88k-motorola
-				basic_os=sysv3
-				;;
-			dicos)
-				basic_machine=i686-pc
-				basic_os=dicos
-				;;
-			djgpp)
-				basic_machine=i586-pc
-				basic_os=msdosdjgpp
-				;;
-			ebmon29k)
-				basic_machine=a29k-amd
-				basic_os=ebmon
-				;;
-			es1800 | OSE68k | ose68k | ose | OSE)
-				basic_machine=m68k-ericsson
-				basic_os=ose
-				;;
-			gmicro)
-				basic_machine=tron-gmicro
-				basic_os=sysv
-				;;
-			go32)
-				basic_machine=i386-pc
-				basic_os=go32
-				;;
-			h8300hms)
-				basic_machine=h8300-hitachi
-				basic_os=hms
-				;;
-			h8300xray)
-				basic_machine=h8300-hitachi
-				basic_os=xray
-				;;
-			h8500hms)
-				basic_machine=h8500-hitachi
-				basic_os=hms
-				;;
-			harris)
-				basic_machine=m88k-harris
-				basic_os=sysv3
-				;;
-			hp300 | hp300hpux)
-				basic_machine=m68k-hp
-				basic_os=hpux
-				;;
-			hp300bsd)
-				basic_machine=m68k-hp
-				basic_os=bsd
-				;;
-			hppaosf)
-				basic_machine=hppa1.1-hp
-				basic_os=osf
-				;;
-			hppro)
-				basic_machine=hppa1.1-hp
-				basic_os=proelf
-				;;
-			i386mach)
-				basic_machine=i386-mach
-				basic_os=mach
-				;;
-			isi68 | isi)
-				basic_machine=m68k-isi
-				basic_os=sysv
-				;;
-			m68knommu)
-				basic_machine=m68k-unknown
-				basic_os=linux
-				;;
-			magnum | m3230)
-				basic_machine=mips-mips
-				basic_os=sysv
-				;;
-			merlin)
-				basic_machine=ns32k-utek
-				basic_os=sysv
-				;;
-			mingw64)
-				basic_machine=x86_64-pc
-				basic_os=mingw64
-				;;
-			mingw32)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			mingw32ce)
-				basic_machine=arm-unknown
-				basic_os=mingw32ce
-				;;
-			monitor)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			morphos)
-				basic_machine=powerpc-unknown
-				basic_os=morphos
-				;;
-			moxiebox)
-				basic_machine=moxie-unknown
-				basic_os=moxiebox
-				;;
-			msdos)
-				basic_machine=i386-pc
-				basic_os=msdos
-				;;
-			msys)
-				basic_machine=i686-pc
-				basic_os=msys
-				;;
-			mvs)
-				basic_machine=i370-ibm
-				basic_os=mvs
-				;;
-			nacl)
-				basic_machine=le32-unknown
-				basic_os=nacl
-				;;
-			ncr3000)
-				basic_machine=i486-ncr
-				basic_os=sysv4
-				;;
-			netbsd386)
-				basic_machine=i386-pc
-				basic_os=netbsd
-				;;
-			netwinder)
-				basic_machine=armv4l-rebel
-				basic_os=linux
-				;;
-			news | news700 | news800 | news900)
-				basic_machine=m68k-sony
-				basic_os=newsos
-				;;
-			news1000)
-				basic_machine=m68030-sony
-				basic_os=newsos
-				;;
-			necv70)
-				basic_machine=v70-nec
-				basic_os=sysv
-				;;
-			nh3000)
-				basic_machine=m68k-harris
-				basic_os=cxux
-				;;
-			nh[45]000)
-				basic_machine=m88k-harris
-				basic_os=cxux
-				;;
-			nindy960)
-				basic_machine=i960-intel
-				basic_os=nindy
-				;;
-			mon960)
-				basic_machine=i960-intel
-				basic_os=mon960
-				;;
-			nonstopux)
-				basic_machine=mips-compaq
-				basic_os=nonstopux
-				;;
-			os400)
-				basic_machine=powerpc-ibm
-				basic_os=os400
-				;;
-			OSE68000 | ose68000)
-				basic_machine=m68000-ericsson
-				basic_os=ose
-				;;
-			os68k)
-				basic_machine=m68k-none
-				basic_os=os68k
-				;;
-			paragon)
-				basic_machine=i860-intel
-				basic_os=osf
-				;;
-			parisc)
-				basic_machine=hppa-unknown
-				basic_os=linux
-				;;
-			psp)
-				basic_machine=mipsallegrexel-sony
-				basic_os=psp
-				;;
-			pw32)
-				basic_machine=i586-unknown
-				basic_os=pw32
-				;;
-			rdos | rdos64)
-				basic_machine=x86_64-pc
-				basic_os=rdos
-				;;
-			rdos32)
-				basic_machine=i386-pc
-				basic_os=rdos
-				;;
-			rom68k)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			sa29200)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			sei)
-				basic_machine=mips-sei
-				basic_os=seiux
-				;;
-			sequent)
-				basic_machine=i386-sequent
-				basic_os=
-				;;
-			sps7)
-				basic_machine=m68k-bull
-				basic_os=sysv2
-				;;
-			st2000)
-				basic_machine=m68k-tandem
-				basic_os=
-				;;
-			stratus)
-				basic_machine=i860-stratus
-				basic_os=sysv4
-				;;
-			sun2)
-				basic_machine=m68000-sun
-				basic_os=
-				;;
-			sun2os3)
-				basic_machine=m68000-sun
-				basic_os=sunos3
-				;;
-			sun2os4)
-				basic_machine=m68000-sun
-				basic_os=sunos4
-				;;
-			sun3)
-				basic_machine=m68k-sun
-				basic_os=
-				;;
-			sun3os3)
-				basic_machine=m68k-sun
-				basic_os=sunos3
-				;;
-			sun3os4)
-				basic_machine=m68k-sun
-				basic_os=sunos4
-				;;
-			sun4)
-				basic_machine=sparc-sun
-				basic_os=
-				;;
-			sun4os3)
-				basic_machine=sparc-sun
-				basic_os=sunos3
-				;;
-			sun4os4)
-				basic_machine=sparc-sun
-				basic_os=sunos4
-				;;
-			sun4sol2)
-				basic_machine=sparc-sun
-				basic_os=solaris2
-				;;
-			sun386 | sun386i | roadrunner)
-				basic_machine=i386-sun
-				basic_os=
-				;;
-			sv1)
-				basic_machine=sv1-cray
-				basic_os=unicos
-				;;
-			symmetry)
-				basic_machine=i386-sequent
-				basic_os=dynix
-				;;
-			t3e)
-				basic_machine=alphaev5-cray
-				basic_os=unicos
-				;;
-			t90)
-				basic_machine=t90-cray
-				basic_os=unicos
-				;;
-			toad1)
-				basic_machine=pdp10-xkl
-				basic_os=tops20
-				;;
-			tpf)
-				basic_machine=s390x-ibm
-				basic_os=tpf
-				;;
-			udi29k)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			ultra3)
-				basic_machine=a29k-nyu
-				basic_os=sym1
-				;;
-			v810 | necv810)
-				basic_machine=v810-nec
-				basic_os=none
-				;;
-			vaxv)
-				basic_machine=vax-dec
-				basic_os=sysv
-				;;
-			vms)
-				basic_machine=vax-dec
-				basic_os=vms
-				;;
-			vsta)
-				basic_machine=i386-pc
-				basic_os=vsta
-				;;
-			vxworks960)
-				basic_machine=i960-wrs
-				basic_os=vxworks
-				;;
-			vxworks68)
-				basic_machine=m68k-wrs
-				basic_os=vxworks
-				;;
-			vxworks29k)
-				basic_machine=a29k-wrs
-				basic_os=vxworks
-				;;
-			xbox)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			ymp)
-				basic_machine=ymp-cray
-				basic_os=unicos
-				;;
-			*)
-				basic_machine=$1
-				basic_os=
-				;;
-		esac
-		;;
-esac
-
-# Decode 1-component or ad-hoc basic machines
-case $basic_machine in
-	# Here we handle the default manufacturer of certain CPU types.  It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		cpu=hppa1.1
-		vendor=winbond
-		;;
-	op50n)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	op60c)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	ibm*)
-		cpu=i370
-		vendor=ibm
-		;;
-	orion105)
-		cpu=clipper
-		vendor=highlevel
-		;;
-	mac | mpw | mac-mpw)
-		cpu=m68k
-		vendor=apple
-		;;
-	pmac | pmac-mpw)
-		cpu=powerpc
-		vendor=apple
-		;;
-
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		cpu=m68000
-		vendor=att
-		;;
-	3b*)
-		cpu=we32k
-		vendor=att
-		;;
-	bluegene*)
-		cpu=powerpc
-		vendor=ibm
-		basic_os=cnk
-		;;
-	decsystem10* | dec10*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops10
-		;;
-	decsystem20* | dec20*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops20
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		cpu=m68k
-		vendor=motorola
-		;;
-	dpx2*)
-		cpu=m68k
-		vendor=bull
-		basic_os=sysv3
-		;;
-	encore | umax | mmax)
-		cpu=ns32k
-		vendor=encore
-		;;
-	elxsi)
-		cpu=elxsi
-		vendor=elxsi
-		basic_os=${basic_os:-bsd}
-		;;
-	fx2800)
-		cpu=i860
-		vendor=alliant
-		;;
-	genix)
-		cpu=ns32k
-		vendor=ns
-		;;
-	h3050r* | hiux*)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		cpu=m68000
-		vendor=hp
-		;;
-	hp9k3[2-9][0-9])
-		cpu=m68k
-		vendor=hp
-		;;
-	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k78[0-9] | hp78[0-9])
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][13679] | hp8[0-9][13679])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	i*86v32)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv32
-		;;
-	i*86v4*)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv4
-		;;
-	i*86v)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv
-		;;
-	i*86sol2)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=solaris2
-		;;
-	j90 | j90-cray)
-		cpu=j90
-		vendor=cray
-		basic_os=${basic_os:-unicos}
-		;;
-	iris | iris4d)
-		cpu=mips
-		vendor=sgi
-		case $basic_os in
-		    irix*)
-			;;
-		    *)
-			basic_os=irix4
-			;;
-		esac
-		;;
-	miniframe)
-		cpu=m68000
-		vendor=convergent
-		;;
-	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		cpu=m68k
-		vendor=atari
-		basic_os=mint
-		;;
-	news-3600 | risc-news)
-		cpu=mips
-		vendor=sony
-		basic_os=newsos
-		;;
-	next | m*-next)
-		cpu=m68k
-		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
-		;;
-	np1)
-		cpu=np1
-		vendor=gould
-		;;
-	op50n-* | op60c-*)
-		cpu=hppa1.1
-		vendor=oki
-		basic_os=proelf
-		;;
-	pa-hitachi)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	pbd)
-		cpu=sparc
-		vendor=tti
-		;;
-	pbb)
-		cpu=m68k
-		vendor=tti
-		;;
-	pc532)
-		cpu=ns32k
-		vendor=pc532
-		;;
-	pn)
-		cpu=pn
-		vendor=gould
-		;;
-	power)
-		cpu=power
-		vendor=ibm
-		;;
-	ps2)
-		cpu=i386
-		vendor=ibm
-		;;
-	rm[46]00)
-		cpu=mips
-		vendor=siemens
-		;;
-	rtpc | rtpc-*)
-		cpu=romp
-		vendor=ibm
-		;;
-	sde)
-		cpu=mipsisa32
-		vendor=sde
-		basic_os=${basic_os:-elf}
-		;;
-	simso-wrs)
-		cpu=sparclite
-		vendor=wrs
-		basic_os=vxworks
-		;;
-	tower | tower-32)
-		cpu=m68k
-		vendor=ncr
-		;;
-	vpp*|vx|vx-*)
-		cpu=f301
-		vendor=fujitsu
-		;;
-	w65)
-		cpu=w65
-		vendor=wdc
-		;;
-	w89k-*)
-		cpu=hppa1.1
-		vendor=winbond
-		basic_os=proelf
-		;;
-	none)
-		cpu=none
-		vendor=none
-		;;
-	leon|leon[3-9])
-		cpu=sparc
-		vendor=$basic_machine
-		;;
-	leon-*|leon[3-9]-*)
-		cpu=sparc
-		vendor=`echo "$basic_machine" | sed 's/-.*//'`
-		;;
-
-	*-*)
-		# shellcheck disable=SC2162
-		IFS="-" read cpu vendor <<EOF
-$basic_machine
-EOF
-		;;
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i*86 | x86_64)
-		cpu=$basic_machine
-		vendor=pc
-		;;
-	# These rules are duplicated from below for sake of the special case above;
-	# i.e. things that normalized to x86 arches should also default to "pc"
-	pc98)
-		cpu=i386
-		vendor=pc
-		;;
-	x64 | amd64)
-		cpu=x86_64
-		vendor=pc
-		;;
-	# Recognize the basic CPU types without company name.
-	*)
-		cpu=$basic_machine
-		vendor=unknown
-		;;
-esac
-
-unset -v basic_machine
-
-# Decode basic machines in the full and proper CPU-Company form.
-case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	craynv-unknown)
-		vendor=cray
-		basic_os=${basic_os:-unicosmp}
-		;;
-	c90-unknown | c90-cray)
-		vendor=cray
-		basic_os=${Basic_os:-unicos}
-		;;
-	fx80-unknown)
-		vendor=alliant
-		;;
-	romp-unknown)
-		vendor=ibm
-		;;
-	mmix-unknown)
-		vendor=knuth
-		;;
-	microblaze-unknown | microblazeel-unknown)
-		vendor=xilinx
-		;;
-	rs6000-unknown)
-		vendor=ibm
-		;;
-	vax-unknown)
-		vendor=dec
-		;;
-	pdp11-unknown)
-		vendor=dec
-		;;
-	we32k-unknown)
-		vendor=att
-		;;
-	cydra-unknown)
-		vendor=cydrome
-		;;
-	i370-ibm*)
-		vendor=ibm
-		;;
-	orion-unknown)
-		vendor=highlevel
-		;;
-	xps-unknown | xps100-unknown)
-		cpu=xps100
-		vendor=honeywell
-		;;
-
-	# Here we normalize CPU types with a missing or matching vendor
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
-		basic_os=${basic_os:-bosx}
-		;;
-
-	# Here we normalize CPU types irrespective of the vendor
-	amd64-*)
-		cpu=x86_64
-		;;
-	blackfin-*)
-		cpu=bfin
-		basic_os=linux
-		;;
-	c54x-*)
-		cpu=tic54x
-		;;
-	c55x-*)
-		cpu=tic55x
-		;;
-	c6x-*)
-		cpu=tic6x
-		;;
-	e500v[12]-*)
-		cpu=powerpc
-		basic_os=${basic_os}"spe"
-		;;
-	mips3*-*)
-		cpu=mips64
-		;;
-	ms1-*)
-		cpu=mt
-		;;
-	m68knommu-*)
-		cpu=m68k
-		basic_os=linux
-		;;
-	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
-		cpu=s12z
-		;;
-	openrisc-*)
-		cpu=or32
-		;;
-	parisc-*)
-		cpu=hppa
-		basic_os=linux
-		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		cpu=i586
-		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
-		cpu=i686
-		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		cpu=i686
-		;;
-	pentium4-*)
-		cpu=i786
-		;;
-	pc98-*)
-		cpu=i386
-		;;
-	ppc-* | ppcbe-*)
-		cpu=powerpc
-		;;
-	ppcle-* | powerpclittle-*)
-		cpu=powerpcle
-		;;
-	ppc64-*)
-		cpu=powerpc64
-		;;
-	ppc64le-* | powerpc64little-*)
-		cpu=powerpc64le
-		;;
-	sb1-*)
-		cpu=mipsisa64sb1
-		;;
-	sb1el-*)
-		cpu=mipsisa64sb1el
-		;;
-	sh5e[lb]-*)
-		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
-		;;
-	spur-*)
-		cpu=spur
-		;;
-	strongarm-* | thumb-*)
-		cpu=arm
-		;;
-	tx39-*)
-		cpu=mipstx39
-		;;
-	tx39el-*)
-		cpu=mipstx39el
-		;;
-	x64-*)
-		cpu=x86_64
-		;;
-	xscale-* | xscalee[bl]-*)
-		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
-		;;
-	arm64-*)
-		cpu=aarch64
-		;;
-
-	# Recognize the canonical CPU Types that limit and/or modify the
-	# company names they are paired with.
-	cr16-*)
-		basic_os=${basic_os:-elf}
-		;;
-	crisv32-* | etraxfs*-*)
-		cpu=crisv32
-		vendor=axis
-		;;
-	cris-* | etrax*-*)
-		cpu=cris
-		vendor=axis
-		;;
-	crx-*)
-		basic_os=${basic_os:-elf}
-		;;
-	neo-tandem)
-		cpu=neo
-		vendor=tandem
-		;;
-	nse-tandem)
-		cpu=nse
-		vendor=tandem
-		;;
-	nsr-tandem)
-		cpu=nsr
-		vendor=tandem
-		;;
-	nsv-tandem)
-		cpu=nsv
-		vendor=tandem
-		;;
-	nsx-tandem)
-		cpu=nsx
-		vendor=tandem
-		;;
-	mipsallegrexel-sony)
-		cpu=mipsallegrexel
-		vendor=sony
-		;;
-	tile*-*)
-		basic_os=${basic_os:-linux-gnu}
-		;;
-
-	*)
-		# Recognize the canonical CPU types that are allowed with any
-		# company name.
-		case $cpu in
-			1750a | 580 \
-			| a29k \
-			| aarch64 | aarch64_be \
-			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
-			| am33_2.0 \
-			| amdgcn \
-			| arc | arceb \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
-			| asmjs \
-			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
-			| k1om \
-			| le32 | le64 \
-			| lm32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
-			| mmix \
-			| mn10200 | mn10300 \
-			| moxie \
-			| mt \
-			| msp430 \
-			| nds32 | nds32le | nds32be \
-			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
-			| open8 \
-			| or1k* \
-			| or32 \
-			| orion \
-			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
-			| pru \
-			| pyramid \
-			| riscv | riscv32 | riscv64 \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
-			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
-			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
-			| spu \
-			| tahoe \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
-			| tron \
-			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
-			| vax \
-			| visium \
-			| w65 \
-			| wasm32 | wasm64 \
-			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
-			| ymp \
-			| z8k | z80)
-				;;
-
-			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
-				exit 1
-				;;
-		esac
-		;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $vendor in
-	digital*)
-		vendor=dec
-		;;
-	commodore*)
-		vendor=cbm
-		;;
-	*)
-		;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x$basic_os != x ]
-then
-
-# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
-# set os.
-case $basic_os in
-	gnu/linux*)
-		kernel=linux
-		os=`echo $basic_os | sed -e 's|gnu/linux|gnu|'`
-		;;
-	nto-qnx*)
-		kernel=nto
-		os=`echo $basic_os | sed -e 's|nto-qnx|qnx|'`
-		;;
-	*-*)
-		# shellcheck disable=SC2162
-		IFS="-" read kernel os <<EOF
-$basic_os
-EOF
-		;;
-	# Default OS when just kernel was specified
-	nto*)
-		kernel=nto
-		os=`echo $basic_os | sed -e 's|nto|qnx|'`
-		;;
-	linux*)
-		kernel=linux
-		os=`echo $basic_os | sed -e 's|linux|gnu|'`
-		;;
-	*)
-		kernel=
-		os=$basic_os
-		;;
-esac
-
-# Now, normalize the OS (knowing we just have one component, it's not a kernel,
-# etc.)
-case $os in
-	# First match some system type aliases that might get confused
-	# with valid system types.
-	# solaris* is a basic system type, with this one exception.
-	auroraux)
-		os=auroraux
-		;;
-	bluegene*)
-		os=cnk
-		;;
-	solaris1 | solaris1.*)
-		os=`echo $os | sed -e 's|solaris1|sunos4|'`
-		;;
-	solaris)
-		os=solaris2
-		;;
-	unixware*)
-		os=sysv4.2uw
-		;;
-	# es1800 is here to avoid being matched by es* (a different OS)
-	es1800*)
-		os=ose
-		;;
-	# Some version numbers need modification
-	chorusos*)
-		os=chorusos
-		;;
-	isc)
-		os=isc2.2
-		;;
-	sco6)
-		os=sco5v6
-		;;
-	sco5)
-		os=sco3.2v5
-		;;
-	sco4)
-		os=sco3.2v4
-		;;
-	sco3.2.[4-9]*)
-		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
-		;;
-	sco*v* | scout)
-		# Don't match below
-		;;
-	sco*)
-		os=sco3.2v2
-		;;
-	psos*)
-		os=psos
-		;;
-	qnx*)
-		case $cpu in
-		    x86 | i*86)
-			;;
-		    *)
-			os=nto-$os
-			;;
-		esac
-		;;
-	hiux*)
-		os=hiuxwe2
-		;;
-	lynx*178)
-		os=lynxos178
-		;;
-	lynx*5)
-		os=lynxos5
-		;;
-	lynxos*)
-		# don't get caught up in next wildcard
-		;;
-	lynx*)
-		os=lynxos
-		;;
-	mac[0-9]*)
-		os=`echo "$os" | sed -e 's|mac|macos|'`
-		;;
-	opened*)
-		os=openedition
-		;;
-	os400*)
-		os=os400
-		;;
-	sunos5*)
-		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
-		;;
-	sunos6*)
-		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
-		;;
-	wince*)
-		os=wince
-		;;
-	utek*)
-		os=bsd
-		;;
-	dynix*)
-		os=bsd
-		;;
-	acis*)
-		os=aos
-		;;
-	atheos*)
-		os=atheos
-		;;
-	syllable*)
-		os=syllable
-		;;
-	386bsd)
-		os=bsd
-		;;
-	ctix* | uts*)
-		os=sysv
-		;;
-	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
-		;;
-	# Preserve the version number of sinix5.
-	sinix5.*)
-		os=`echo $os | sed -e 's|sinix|sysv|'`
-		;;
-	sinix*)
-		os=sysv4
-		;;
-	tpf*)
-		os=tpf
-		;;
-	triton*)
-		os=sysv3
-		;;
-	oss*)
-		os=sysv3
-		;;
-	svr4*)
-		os=sysv4
-		;;
-	svr3)
-		os=sysv3
-		;;
-	sysvr4)
-		os=sysv4
-		;;
-	ose*)
-		os=ose
-		;;
-	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
-		os=mint
-		;;
-	dicos*)
-		os=dicos
-		;;
-	pikeos*)
-		# Until real need of OS specific support for
-		# particular features comes up, bare metal
-		# configurations are quite functional.
-		case $cpu in
-		    arm*)
-			os=eabi
-			;;
-		    *)
-			os=elf
-			;;
-		esac
-		;;
-	*)
-		# No normalization, but not necessarily accepted, that comes below.
-		;;
-esac
-
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system.  Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-kernel=
-case $cpu-$vendor in
-	score-*)
-		os=elf
-		;;
-	spu-*)
-		os=elf
-		;;
-	*-acorn)
-		os=riscix1.2
-		;;
-	arm*-rebel)
-		kernel=linux
-		os=gnu
-		;;
-	arm*-semi)
-		os=aout
-		;;
-	c4x-* | tic4x-*)
-		os=coff
-		;;
-	c8051-*)
-		os=elf
-		;;
-	clipper-intergraph)
-		os=clix
-		;;
-	hexagon-*)
-		os=elf
-		;;
-	tic54x-*)
-		os=coff
-		;;
-	tic55x-*)
-		os=coff
-		;;
-	tic6x-*)
-		os=coff
-		;;
-	# This must come before the *-dec entry.
-	pdp10-*)
-		os=tops20
-		;;
-	pdp11-*)
-		os=none
-		;;
-	*-dec | vax-*)
-		os=ultrix4.2
-		;;
-	m68*-apollo)
-		os=domain
-		;;
-	i386-sun)
-		os=sunos4.0.2
-		;;
-	m68000-sun)
-		os=sunos3
-		;;
-	m68*-cisco)
-		os=aout
-		;;
-	mep-*)
-		os=elf
-		;;
-	mips*-cisco)
-		os=elf
-		;;
-	mips*-*)
-		os=elf
-		;;
-	or32-*)
-		os=coff
-		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=sysv3
-		;;
-	sparc-* | *-sun)
-		os=sunos4.1.1
-		;;
-	pru-*)
-		os=elf
-		;;
-	*-be)
-		os=beos
-		;;
-	*-ibm)
-		os=aix
-		;;
-	*-knuth)
-		os=mmixware
-		;;
-	*-wec)
-		os=proelf
-		;;
-	*-winbond)
-		os=proelf
-		;;
-	*-oki)
-		os=proelf
-		;;
-	*-hp)
-		os=hpux
-		;;
-	*-hitachi)
-		os=hiux
-		;;
-	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=sysv
-		;;
-	*-cbm)
-		os=amigaos
-		;;
-	*-dg)
-		os=dgux
-		;;
-	*-dolphin)
-		os=sysv3
-		;;
-	m68k-ccur)
-		os=rtu
-		;;
-	m88k-omron*)
-		os=luna
-		;;
-	*-next)
-		os=nextstep
-		;;
-	*-sequent)
-		os=ptx
-		;;
-	*-crds)
-		os=unos
-		;;
-	*-ns)
-		os=genix
-		;;
-	i370-*)
-		os=mvs
-		;;
-	*-gould)
-		os=sysv
-		;;
-	*-highlevel)
-		os=bsd
-		;;
-	*-encore)
-		os=bsd
-		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
-	*-masscomp)
-		os=rtu
-		;;
-	f30[01]-fujitsu | f700-fujitsu)
-		os=uxpv
-		;;
-	*-rom68k)
-		os=coff
-		;;
-	*-*bug)
-		os=coff
-		;;
-	*-apple)
-		os=macos
-		;;
-	*-atari*)
-		os=mint
-		;;
-	*-wrs)
-		os=vxworks
-		;;
-	*)
-		os=none
-		;;
-esac
-
-fi
-
-# Now, validate our (potentially fixed-up) OS.
-case $os in
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | uclibc*)
-		;;
-	# Likewise for "kernel-libc"
-	eabi | eabihf | gnueabi | gnueabihf)
-		;;
-	# Now accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* )
-		;;
-	# This one is extra strict with allowed versions
-	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		;;
-	none)
-		;;
-	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
-		exit 1
-		;;
-esac
-
-# As a final step for OS-related things, validate the OS-kernel combination
-# (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
-		;;
-	-dietlibc* | -newlib* | -musl* | -uclibc* )
-		# These are just libc implementations, not actual OSes, and thus
-		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
-		exit 1
-		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
-		;;
-	nto-qnx*)
-		;;
-	*-eabi* | *-gnueabi*)
-		;;
-	-*)
-		# Blank kernel with real OS is always fine.
-		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
-		exit 1
-		;;
-esac
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer.  We pick the logical manufacturer.
-case $vendor in
-	unknown)
-		case $cpu-$os in
-			*-riscix*)
-				vendor=acorn
-				;;
-			*-sunos*)
-				vendor=sun
-				;;
-			*-cnk* | *-aix*)
-				vendor=ibm
-				;;
-			*-beos*)
-				vendor=be
-				;;
-			*-hpux*)
-				vendor=hp
-				;;
-			*-mpeix*)
-				vendor=hp
-				;;
-			*-hiux*)
-				vendor=hitachi
-				;;
-			*-unos*)
-				vendor=crds
-				;;
-			*-dgux*)
-				vendor=dg
-				;;
-			*-luna*)
-				vendor=omron
-				;;
-			*-genix*)
-				vendor=ns
-				;;
-			*-clix*)
-				vendor=intergraph
-				;;
-			*-mvs* | *-opened*)
-				vendor=ibm
-				;;
-			*-os400*)
-				vendor=ibm
-				;;
-			s390-* | s390x-*)
-				vendor=ibm
-				;;
-			*-ptx*)
-				vendor=sequent
-				;;
-			*-tpf*)
-				vendor=ibm
-				;;
-			*-vxsim* | *-vxworks* | *-windiss*)
-				vendor=wrs
-				;;
-			*-aux*)
-				vendor=apple
-				;;
-			*-hms*)
-				vendor=hitachi
-				;;
-			*-mpw* | *-macos*)
-				vendor=apple
-				;;
-			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
-				vendor=atari
-				;;
-			*-vos*)
-				vendor=stratus
-				;;
-		esac
-		;;
-esac
-
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
-exit
-
-# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/deps/patches/libTracyClient-freebsd-elfw.patch b/deps/patches/libTracyClient-freebsd-elfw.patch
new file mode 100644
index 0000000000000..8feb738714e11
--- /dev/null
+++ b/deps/patches/libTracyClient-freebsd-elfw.patch
@@ -0,0 +1,33 @@
+diff --git a/public/TracyClient.cpp b/public/TracyClient.cpp
+index 77f81a4a..ebeb65c9 100644
+--- a/public/TracyClient.cpp
++++ b/public/TracyClient.cpp
+@@ -19,6 +19,28 @@
+ #  pragma warning(push, 0)
+ #endif
+
++#ifndef ElfW
++#  if defined(FREEBSD)
++#    if __ELF_WORD_SIZE == 32
++#      define ElfW(type) Elf32_##type
++#    else
++#      define ElfW(type) Elf64_##type
++#    endif
++#  elif defined(NETBSD) || defined(OPENBSD)
++#    if ELFSIZE == 32
++#      define ElfW(type) Elf32_##type
++#    else
++#      define ElfW(type) Elf64_##type
++#    endif
++#  else
++#    if !defined(ELF_CLASS) || ELF_CLASS == ELFCLASS32
++#      define ElfW(type) Elf32_##type
++#    else
++#      define ElfW(type) Elf64_##type
++#    endif
++#  endif
++#endif
++
+ #include "common/tracy_lz4.cpp"
+ #include "client/TracyProfiler.cpp"
+ #include "client/TracyCallstack.cpp"
diff --git a/deps/patches/libTracyClient-no-sampling.patch b/deps/patches/libTracyClient-no-sampling.patch
new file mode 100644
index 0000000000000..c4c8576099348
--- /dev/null
+++ b/deps/patches/libTracyClient-no-sampling.patch
@@ -0,0 +1,79 @@
+commit 6249999153a9497b32bc84e9dc95a1537a0af714
+Author: Cody Tapscott <topolarity@tapscott.me>
+Date:   Tue Apr 4 15:20:46 2023 -0400
+
+    linux: respect `TRACY_NO_SAMPLING` for sys-tracing
+
+    This compile-time flag was being ignored on Linux. This change adds
+    gating for software-sampled stack trace sampling following the same
+    pattern as other `TRACY_NO_SAMPLE_*` options.
+
+    If `TRACY_NO_SAMPLING=1` is provided as an environment variable,
+    software stack sampling is also disabled.
+
+diff --git a/public/client/TracySysTrace.cpp b/public/client/TracySysTrace.cpp
+index 4a562eaa..af0641fe 100644
+--- a/public/client/TracySysTrace.cpp
++++ b/public/client/TracySysTrace.cpp
+@@ -770,6 +770,13 @@ bool SysTraceStart( int64_t& samplingPeriod )
+     TracyDebug( "sched_wakeup id: %i\n", wakeupId );
+     TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
+
++#ifdef TRACY_NO_SAMPLING
++    const bool noSoftwareSampling = true;
++#else
++    const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" );
++    const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1';
++#endif
++
+ #ifdef TRACY_NO_SAMPLE_RETIREMENT
+     const bool noRetirement = true;
+ #else
+@@ -839,28 +846,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
+     pe.clockid = CLOCK_MONOTONIC_RAW;
+ #endif
+
+-    TracyDebug( "Setup software sampling\n" );
+-    ProbePreciseIp( pe, currentPid );
+-    for( int i=0; i<s_numCpus; i++ )
++    if( !noSoftwareSampling )
+     {
+-        int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
+-        if( fd == -1 )
++        TracyDebug( "Setup software sampling\n" );
++        ProbePreciseIp( pe, currentPid );
++        for( int i=0; i<s_numCpus; i++ )
+         {
+-            pe.exclude_kernel = 1;
+-            ProbePreciseIp( pe, currentPid );
+-            fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
++            int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
+             if( fd == -1 )
+             {
+-                TracyDebug( "  Failed to setup!\n");
+-                break;
++                pe.exclude_kernel = 1;
++                ProbePreciseIp( pe, currentPid );
++                fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
++                if( fd == -1 )
++                {
++                    TracyDebug( "  Failed to setup!\n");
++                    break;
++                }
++                TracyDebug( "  No access to kernel samples\n" );
++            }
++            new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
++            if( s_ring[s_numBuffers].IsValid() )
++            {
++                s_numBuffers++;
++                TracyDebug( "  Core %i ok\n", i );
+             }
+-            TracyDebug( "  No access to kernel samples\n" );
+-        }
+-        new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
+-        if( s_ring[s_numBuffers].IsValid() )
+-        {
+-            s_numBuffers++;
+-            TracyDebug( "  Core %i ok\n", i );
+         }
+     }
diff --git a/deps/patches/libTracyClient-plot-config.patch b/deps/patches/libTracyClient-plot-config.patch
new file mode 100644
index 0000000000000..7162b39ee901c
--- /dev/null
+++ b/deps/patches/libTracyClient-plot-config.patch
@@ -0,0 +1,57 @@
+commit 7151c6afd9cc40877325c64bd19bcff7211fbd59
+Author: Bartosz Taudul <wolf@nereid.pl>
+Date:   Wed Mar 8 23:18:36 2023 +0100
+
+    Add support for configuring plots to C API.
+
+diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp
+index 6104a7ed..38b5ea13 100644
+--- a/public/client/TracyProfiler.cpp
++++ b/public/client/TracyProfiler.cpp
+@@ -4149,6 +4149,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
+ TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); }
+ TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); }
+ TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); }
++TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); }
+ TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); }
+ TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); }
+ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
+diff --git a/public/tracy/TracyC.h b/public/tracy/TracyC.h
+index bedf5e16..736b51ed 100644
+--- a/public/tracy/TracyC.h
++++ b/public/tracy/TracyC.h
+@@ -11,6 +11,13 @@
+ extern "C" {
+ #endif
+
++enum TracyPlotFormatEnum
++{
++    TracyPlotFormatNumber,
++    TracyPlotFormatMemory,
++    TracyPlotFormatPercentage,
++};
++
+ TRACY_API void ___tracy_set_thread_name( const char* name );
+
+ #define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
+@@ -60,6 +67,8 @@ typedef const void* TracyCZoneCtx;
+ #define TracyCPlot(x,y)
+ #define TracyCPlotF(x,y)
+ #define TracyCPlotI(x,y)
++#define TracyCPlotConfig(x,y,z,w,a)
++
+ #define TracyCMessage(x,y)
+ #define TracyCMessageL(x)
+ #define TracyCMessageC(x,y,z)
+@@ -289,11 +298,13 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
+ TRACY_API void ___tracy_emit_plot( const char* name, double val );
+ TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
+ TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
++TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color );
+ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
+
+ #define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
+ #define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val );
+ #define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val );
++#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color );
+ #define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
diff --git a/deps/patches/libgit2-agent-nonfatal.patch b/deps/patches/libgit2-agent-nonfatal.patch
deleted file mode 100644
index 4d46965f27bf1..0000000000000
--- a/deps/patches/libgit2-agent-nonfatal.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-commit 70020247d1903c7a1262d967cf205a44dc6f6ebe
-Author: Keno Fischer <kfischer@college.harvard.edu>
-Date:   Wed Jul 20 19:59:00 2016 -0400
-
-    Make failure to connect to ssh-agent non-fatal
-
-    Julia issue: https://github.com/JuliaLang/julia/pull/17459
-    Upstream: https://github.com/libgit2/libgit2/issues/3866
-
-diff --git a/src/libgit2/transports/ssh.c b/src/libgit2/transports/ssh.c
-index cfd5736..82d2c63 100644
---- a/src/libgit2/transports/ssh.c
-+++ b/src/libgit2/transports/ssh.c
-@@ -296,8 +296,10 @@ static int ssh_agent_auth(LIBSSH2_SESSION *session, git_cred_ssh_key *c) {
- 
- 	rc = libssh2_agent_connect(agent);
- 
--	if (rc != LIBSSH2_ERROR_NONE)
-+	if (rc != LIBSSH2_ERROR_NONE) {
-+		rc = LIBSSH2_ERROR_AUTHENTICATION_FAILED;
- 		goto shutdown;
-+	}
- 
- 	rc = libssh2_agent_list_identities(agent);
- 
diff --git a/deps/patches/libgit2-hostkey.patch b/deps/patches/libgit2-hostkey.patch
deleted file mode 100644
index b53484fc07951..0000000000000
--- a/deps/patches/libgit2-hostkey.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-diff --git a/src/libgit2/transports/ssh.c b/src/libgit2/transports/ssh.c
-index 89f085230..b8bdca61a 100644
---- a/src/libgit2/transports/ssh.c
-+++ b/src/libgit2/transports/ssh.c
-@@ -467,6 +467,7 @@ static int _git_ssh_setup_conn(
- 	git_credential *cred = NULL;
- 	LIBSSH2_SESSION *session=NULL;
- 	LIBSSH2_CHANNEL *channel=NULL;
-+	char *host_and_port;
- 
- 	t->current_stream = NULL;
- 
-@@ -567,10 +568,18 @@ static int _git_ssh_setup_conn(
- 
- 		cert_ptr = &cert;
- 
-+		if (atoi(s->url.port) == SSH_DEFAULT_PORT) {
-+			host_and_port = s->url.host;
-+		} else {
-+			size_t n = strlen(s->url.host) + strlen(s->url.port) + 2;
-+			host_and_port = alloca(n);
-+			sprintf(host_and_port, "%s:%s", s->url.host, s->url.port);
-+		}
-+
- 		error = t->owner->connect_opts.callbacks.certificate_check(
- 			(git_cert *)cert_ptr,
- 			0,
--			s->url.host,
-+			host_and_port,
- 			t->owner->connect_opts.callbacks.payload);
- 
- 		if (error < 0 && error != GIT_PASSTHROUGH) {
diff --git a/deps/patches/libgit2-lowercase-windows-h.patch b/deps/patches/libgit2-lowercase-windows-h.patch
deleted file mode 100644
index f7c79a7e59e11..0000000000000
--- a/deps/patches/libgit2-lowercase-windows-h.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-From d64f3d0992ec278d843c397b4b52e3434962c197 Mon Sep 17 00:00:00 2001
-From: Vinz2008 <68145293+Vinz2008@users.noreply.github.com>
-Date: Thu, 11 Aug 2022 00:25:31 +0200
-Subject: [PATCH] Fix #6365
-
----
- src/cli/opt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/cli/opt.c b/src/cli/opt.c
-index 72df5877fbf..62a3430d16e 100644
---- a/src/cli/opt.c
-+++ b/src/cli/opt.c
-@@ -23,7 +23,7 @@
- #include "opt.h"
- 
- #ifdef _WIN32
--# include <Windows.h>
-+# include <windows.h>
- #else
- # include <fcntl.h>
- # include <sys/ioctl.h>
diff --git a/deps/pcre.mk b/deps/pcre.mk
index 5ff91b6bc44ac..cd1180d992885 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -6,6 +6,13 @@ ifneq ($(USE_BINARYBUILDER_PCRE),1)
 PCRE_CFLAGS := -O3
 PCRE_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
 
+ifeq ($(OS),emscripten)
+PCRE_CFLAGS += -fPIC
+PCRE_JIT = --disable-jit
+else
+PCRE_JIT = --enable-jit
+endif
+
 $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://github.com/PCRE2Project/pcre2/releases/download/pcre2-$(PCRE_VER)/pcre2-$(PCRE_VER).tar.bz2
 
@@ -20,7 +27,7 @@ checksum-pcre: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
 $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured: $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) --enable-jit --includedir=$(build_includedir) CFLAGS="$(CFLAGS) $(PCRE_CFLAGS) -g -O0" LDFLAGS="$(LDFLAGS) $(PCRE_LDFLAGS)"
+	$(dir $<)/configure $(CONFIGURE_COMMON) $(PCRE_JIT) --includedir=$(build_includedir) CFLAGS="$(CFLAGS) $(PCRE_CFLAGS) -g -O0" LDFLAGS="$(LDFLAGS) $(PCRE_LDFLAGS)"
 	echo 1 > $@
 
 $(BUILDDIR)/pcre2-$(PCRE_VER)/build-compiled: $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured
diff --git a/doc/make.jl b/doc/make.jl
index 04b8af595e58f..3c69f4e6c47b5 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -159,6 +159,7 @@ DevDocs = [
         "devdocs/backtraces.md",
         "devdocs/debuggingtips.md",
         "devdocs/valgrind.md",
+        "devdocs/external_profilers.md",
         "devdocs/sanitizers.md",
         "devdocs/probes.md",
     ],
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 383c588c58dae..fa9f641b1e76f 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -118,6 +118,11 @@ supported (Linux and Windows). If this is not supported (macOS) or
 process affinity is not configured, it uses the number of CPU
 threads.
 
+.TP
+--gcthreads <n>
+Enable n GC threads; If unspecified is set to half of the
+compute worker threads.
+
 .TP
 -p, --procs {N|auto}
 Integer value N launches N additional local worker processes `auto` launches as many workers
diff --git a/doc/src/assets/julialogoheaderimage_dark.svg b/doc/src/assets/julialogoheaderimage_dark.svg
new file mode 100644
index 0000000000000..04e06d2665633
--- /dev/null
+++ b/doc/src/assets/julialogoheaderimage_dark.svg
@@ -0,0 +1,209 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="1280pt" height="640pt" viewBox="0 0 1280 640" version="1.1">
+<defs>
+<linearGradient id="gradient1" x1="0.0" y1="0.0" x2="1" y2="1">
+      <stop offset="0%" stop-color="#2f2c63"/>
+      <stop offset="16.33%" stop-color="#37245c"/>
+      <stop offset="49%" stop-color="#3f134e"/>
+      <stop offset="66%" stop-color="#370d42"/>
+      <stop offset="100%" stop-color="#26042a"/>
+</linearGradient>
+<g>
+<symbol overflow="visible" id="glyph0-0">
+<path style="stroke:none;" d="M 0 -0.210938 L 0 -24.289062 C 0 -24.429688 0.0703125 -24.5 0.210938 -24.5 L 16.378906 -24.5 C 16.519531 -24.5 16.589844 -24.429688 16.589844 -24.289062 L 16.589844 -0.210938 C 16.589844 -0.0703125 16.519531 0 16.378906 0 L 0.210938 0 C 0.0703125 0 0 -0.105469 0 -0.210938 Z M 6.824219 -8.785156 L 9.136719 -8.785156 C 9.519531 -8.785156 9.730469 -8.996094 9.730469 -9.378906 L 9.730469 -9.660156 C 9.730469 -11.933594 14.175781 -11.933594 14.175781 -16.101562 C 14.175781 -19.003906 11.96875 -20.964844 8.503906 -20.964844 C 5.003906 -20.964844 2.589844 -18.898438 2.589844 -15.855469 L 2.589844 -15.609375 C 2.589844 -15.433594 2.835938 -15.296875 3.183594 -15.296875 L 5.53125 -15.191406 C 5.914062 -15.15625 6.125 -15.363281 6.125 -15.75 L 6.125 -15.855469 C 6.125 -16.976562 7 -17.746094 8.433594 -17.746094 C 9.730469 -17.746094 10.605469 -17.046875 10.605469 -15.960938 C 10.605469 -13.511719 6.230469 -13.335938 6.230469 -9.871094 L 6.230469 -9.34375 C 6.230469 -8.996094 6.441406 -8.785156 6.824219 -8.785156 Z M 8.121094 -3.183594 C 9.273438 -3.183594 10.253906 -4.058594 10.253906 -5.179688 C 10.253906 -6.265625 9.273438 -7.175781 8.15625 -7.175781 C 6.964844 -7.175781 5.984375 -6.265625 5.984375 -5.179688 C 5.984375 -4.058594 6.929688 -3.183594 8.121094 -3.183594 Z M 8.121094 -3.183594 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-1">
+<path style="stroke:none;" d="M 11.199219 -18.269531 C 9.34375 -18.269531 7.945312 -17.640625 6.964844 -16.308594 C 6.859375 -16.171875 6.753906 -16.238281 6.753906 -16.414062 L 6.753906 -23.90625 C 6.753906 -24.289062 6.546875 -24.5 6.160156 -24.5 L 2.414062 -24.5 C 2.03125 -24.5 1.820312 -24.289062 1.820312 -23.90625 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -10.886719 C 6.753906 -12.773438 7.945312 -14.035156 9.660156 -14.035156 C 11.375 -14.035156 12.496094 -12.738281 12.496094 -10.886719 L 12.496094 -0.59375 C 12.496094 -0.210938 12.703125 0 13.089844 0 L 16.835938 0 C 17.21875 0 17.429688 -0.210938 17.429688 -0.59375 L 17.429688 -11.898438 C 17.429688 -15.890625 14.875 -18.269531 11.199219 -18.269531 Z M 11.199219 -18.269531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-2">
+<path style="stroke:none;" d="M 4.550781 -19.703125 C 6.371094 -19.703125 7.558594 -20.894531 7.558594 -22.644531 C 7.558594 -24.394531 6.371094 -25.585938 4.550781 -25.585938 C 2.765625 -25.585938 1.539062 -24.394531 1.539062 -22.644531 C 1.539062 -20.894531 2.765625 -19.703125 4.550781 -19.703125 Z M 2.695312 0 L 6.441406 0 C 6.824219 0 7.035156 -0.210938 7.035156 -0.59375 L 7.035156 -17.394531 C 7.035156 -17.78125 6.824219 -17.988281 6.441406 -17.988281 L 2.695312 -17.988281 C 2.308594 -17.988281 2.101562 -17.78125 2.101562 -17.394531 L 2.101562 -0.59375 C 2.101562 -0.210938 2.308594 0 2.695312 0 Z M 2.695312 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-3">
+<path style="stroke:none;" d="M 12.808594 -17.394531 L 12.808594 -16.800781 C 12.808594 -16.625 12.703125 -16.554688 12.566406 -16.695312 C 11.65625 -17.675781 10.324219 -18.269531 8.46875 -18.269531 C 5.320312 -18.269531 2.90625 -16.34375 1.925781 -13.546875 C 1.433594 -12.214844 1.296875 -10.675781 1.296875 -9.101562 C 1.296875 -7.769531 1.398438 -6.230469 1.820312 -4.933594 C 2.976562 -1.363281 6.089844 -0.28125 8.714844 -0.28125 C 10.5 -0.28125 11.761719 -0.804688 12.566406 -1.679688 C 12.703125 -1.820312 12.808594 -1.785156 12.808594 -1.644531 C 12.808594 2.066406 10.113281 2.871094 6.019531 2.308594 C 5.636719 2.238281 5.355469 2.449219 5.355469 2.835938 L 5.214844 6.089844 C 5.214844 6.441406 5.355469 6.683594 5.738281 6.71875 C 11.375 7.421875 17.746094 6.195312 17.746094 -1.609375 L 17.746094 -17.394531 C 17.746094 -17.78125 17.535156 -17.988281 17.148438 -17.988281 L 13.40625 -17.988281 C 13.019531 -17.988281 12.808594 -17.78125 12.808594 -17.394531 Z M 12.53125 -6.265625 C 12.179688 -5.109375 11.234375 -4.199219 9.660156 -4.199219 C 8.15625 -4.199219 7.105469 -5.109375 6.683594 -6.300781 C 6.40625 -6.859375 6.265625 -7.804688 6.265625 -9.101562 C 6.265625 -10.359375 6.441406 -11.269531 6.753906 -11.933594 C 7.210938 -13.125 8.15625 -14.035156 9.625 -14.035156 C 11.128906 -14.035156 12.109375 -13.160156 12.496094 -11.933594 C 12.703125 -11.269531 12.808594 -10.675781 12.808594 -9.136719 C 12.808594 -7.59375 12.703125 -6.929688 12.53125 -6.265625 Z M 12.53125 -6.265625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-4">
+<path style="stroke:none;" d="M 1.609375 -8.226562 L 12.703125 -8.226562 C 13.089844 -8.226562 13.300781 -8.433594 13.300781 -8.820312 L 13.300781 -11.863281 C 13.300781 -12.25 13.089844 -12.460938 12.703125 -12.460938 L 1.609375 -12.460938 C 1.226562 -12.460938 1.015625 -12.25 1.015625 -11.863281 L 1.015625 -8.820312 C 1.015625 -8.433594 1.226562 -8.226562 1.609375 -8.226562 Z M 1.609375 -8.226562 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-5">
+<path style="stroke:none;" d="M 17.921875 -13.125 C 16.976562 -16.203125 14.351562 -18.269531 11.164062 -18.269531 C 9.308594 -18.269531 8.050781 -17.535156 7.175781 -16.378906 C 7.070312 -16.238281 6.929688 -16.273438 6.929688 -16.449219 L 6.929688 -17.394531 C 6.929688 -17.78125 6.71875 -17.988281 6.335938 -17.988281 L 2.589844 -17.988281 C 2.203125 -17.988281 1.996094 -17.78125 1.996094 -17.394531 L 1.996094 5.914062 C 1.996094 6.300781 2.203125 6.511719 2.589844 6.511719 L 6.335938 6.511719 C 6.71875 6.511719 6.929688 6.300781 6.929688 5.914062 L 6.929688 -1.296875 C 6.929688 -1.46875 7.070312 -1.539062 7.175781 -1.398438 C 8.050781 -0.316406 9.34375 0.316406 11.164062 0.316406 C 14.386719 0.316406 16.835938 -1.644531 17.851562 -4.585938 C 18.339844 -5.878906 18.515625 -7.386719 18.515625 -8.925781 C 18.515625 -10.394531 18.375 -11.828125 17.921875 -13.125 Z M 12.496094 -5.355469 C 11.933594 -4.480469 11.128906 -4.023438 10.046875 -4.023438 C 9.03125 -4.023438 8.261719 -4.515625 7.699219 -5.390625 C 7.210938 -6.265625 6.964844 -7.488281 6.964844 -8.996094 C 6.964844 -10.429688 7.210938 -11.621094 7.664062 -12.460938 C 8.191406 -13.40625 8.996094 -13.964844 10.046875 -13.964844 C 11.199219 -13.964844 12.074219 -13.40625 12.601562 -12.460938 C 13.054688 -11.621094 13.335938 -10.429688 13.335938 -9.03125 C 13.335938 -7.488281 13.019531 -6.230469 12.496094 -5.355469 Z M 12.496094 -5.355469 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-6">
+<path style="stroke:none;" d="M 10.5 -3.886719 C 8.539062 -3.886719 7.105469 -4.691406 6.613281 -6.125 C 6.511719 -6.40625 6.441406 -6.753906 6.371094 -7.210938 C 6.371094 -7.351562 6.441406 -7.421875 6.578125 -7.421875 L 17.324219 -7.421875 C 17.710938 -7.421875 17.921875 -7.628906 17.921875 -8.015625 C 17.886719 -8.925781 17.816406 -9.800781 17.710938 -10.570312 C 16.941406 -15.296875 14.386719 -18.304688 9.660156 -18.304688 C 5.738281 -18.304688 2.765625 -15.996094 1.820312 -12.566406 C 1.539062 -11.550781 1.433594 -10.464844 1.433594 -9.136719 C 1.433594 -7.980469 1.574219 -6.894531 1.855469 -5.878906 C 2.800781 -2.171875 5.636719 0.28125 10.046875 0.28125 C 12.914062 0.28125 15.433594 -0.910156 16.941406 -2.800781 C 17.148438 -3.113281 17.148438 -3.359375 16.871094 -3.640625 L 14.875 -5.566406 C 14.59375 -5.84375 14.316406 -5.808594 14.035156 -5.496094 C 13.265625 -4.550781 12.074219 -3.953125 10.5 -3.886719 Z M 9.589844 -14.035156 C 11.199219 -14.035156 12.214844 -13.160156 12.636719 -11.828125 C 12.703125 -11.621094 12.738281 -11.410156 12.773438 -11.09375 C 12.808594 -10.953125 12.738281 -10.886719 12.601562 -10.886719 L 6.648438 -10.886719 C 6.511719 -10.886719 6.441406 -10.953125 6.476562 -11.09375 C 6.511719 -11.445312 6.613281 -11.726562 6.683594 -11.96875 C 7.070312 -13.230469 8.050781 -14.035156 9.589844 -14.035156 Z M 9.589844 -14.035156 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-7">
+<path style="stroke:none;" d="M 10.78125 -18.234375 C 9.066406 -18.234375 7.839844 -17.429688 7 -16.203125 C 6.859375 -16.066406 6.753906 -16.136719 6.753906 -16.308594 L 6.753906 -17.394531 C 6.753906 -17.78125 6.546875 -17.988281 6.160156 -17.988281 L 2.414062 -17.988281 C 2.03125 -17.988281 1.820312 -17.78125 1.820312 -17.394531 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -9.976562 C 6.753906 -11.863281 8.191406 -12.949219 9.765625 -13.089844 C 10.464844 -13.195312 11.128906 -13.160156 11.585938 -13.019531 C 12.003906 -12.949219 12.214844 -12.984375 12.285156 -13.40625 L 12.949219 -17.078125 C 13.019531 -17.394531 12.949219 -17.675781 12.636719 -17.816406 C 12.214844 -18.058594 11.621094 -18.234375 10.78125 -18.234375 Z M 10.78125 -18.234375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-8">
+<path style="stroke:none;" d="M 11.898438 -18.023438 L 8.714844 -18.023438 C 8.574219 -18.023438 8.503906 -18.09375 8.503906 -18.234375 L 8.503906 -18.585938 C 8.503906 -20.265625 9.03125 -20.789062 10.675781 -20.824219 L 11.65625 -20.824219 C 12.039062 -20.824219 12.25 -21.035156 12.25 -21.421875 L 12.25 -23.90625 C 12.25 -24.289062 12.039062 -24.5 11.65625 -24.5 L 10.5 -24.5 C 5.355469 -24.640625 3.570312 -23.238281 3.570312 -18.96875 L 3.570312 -18.234375 C 3.570312 -18.09375 3.464844 -18.023438 3.359375 -18.023438 L 1.503906 -18.023438 C 1.121094 -18.023438 0.910156 -17.816406 0.910156 -17.429688 L 0.910156 -14.453125 C 0.910156 -14.070312 1.121094 -13.859375 1.503906 -13.859375 L 3.359375 -13.859375 C 3.5 -13.859375 3.570312 -13.789062 3.570312 -13.648438 L 3.570312 -0.59375 C 3.570312 -0.210938 3.78125 0 4.164062 0 L 7.910156 0 C 8.296875 0 8.503906 -0.210938 8.503906 -0.59375 L 8.503906 -13.648438 C 8.503906 -13.789062 8.574219 -13.859375 8.714844 -13.859375 L 11.898438 -13.859375 C 12.285156 -13.859375 12.496094 -14.070312 12.496094 -14.453125 L 12.496094 -17.429688 C 12.496094 -17.816406 12.285156 -18.023438 11.898438 -18.023438 Z M 11.898438 -18.023438 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-9">
+<path style="stroke:none;" d="M 9.800781 0.28125 C 13.71875 0.28125 16.625 -1.960938 17.675781 -5.566406 C 17.953125 -6.613281 18.128906 -7.875 18.128906 -9.101562 C 18.128906 -10.394531 17.953125 -11.691406 17.605469 -12.773438 C 16.519531 -16.171875 13.683594 -18.269531 9.835938 -18.269531 C 5.84375 -18.269531 2.941406 -16.171875 1.890625 -12.738281 C 1.539062 -11.691406 1.363281 -10.359375 1.363281 -9.03125 C 1.363281 -7.769531 1.539062 -6.511719 1.855469 -5.460938 C 2.871094 -1.925781 5.808594 0.28125 9.800781 0.28125 Z M 9.800781 -3.953125 C 8.15625 -3.953125 7.070312 -4.96875 6.613281 -6.578125 C 6.441406 -7.210938 6.335938 -8.121094 6.335938 -9.03125 C 6.335938 -9.976562 6.441406 -10.886719 6.613281 -11.515625 C 7.070312 -13.054688 8.15625 -14.035156 9.730469 -14.035156 C 11.339844 -14.035156 12.425781 -13.089844 12.878906 -11.515625 C 13.054688 -10.886719 13.160156 -9.976562 13.160156 -9.03125 C 13.160156 -8.15625 13.089844 -7.28125 12.878906 -6.578125 C 12.425781 -4.96875 11.339844 -3.953125 9.800781 -3.953125 Z M 9.800781 -3.953125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-10">
+<path style="stroke:none;" d="M 21.769531 -18.269531 C 19.671875 -18.269531 17.816406 -17.394531 16.730469 -15.46875 C 16.660156 -15.328125 16.519531 -15.296875 16.449219 -15.46875 C 15.503906 -17.253906 13.753906 -18.269531 11.410156 -18.269531 C 9.519531 -18.269531 7.980469 -17.605469 7 -16.203125 C 6.894531 -16.03125 6.753906 -16.101562 6.753906 -16.273438 L 6.753906 -17.394531 C 6.753906 -17.78125 6.546875 -17.988281 6.160156 -17.988281 L 2.414062 -17.988281 C 2.03125 -17.988281 1.820312 -17.78125 1.820312 -17.394531 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -11.515625 C 6.964844 -13.089844 8.015625 -14.035156 9.519531 -14.035156 C 11.164062 -14.035156 12.285156 -12.808594 12.285156 -10.921875 L 12.285156 -0.59375 C 12.285156 -0.210938 12.496094 0 12.878906 0 L 16.589844 0 C 16.976562 0 17.183594 -0.210938 17.183594 -0.59375 L 17.183594 -10.953125 C 17.183594 -12.84375 18.339844 -14.035156 19.949219 -14.035156 C 21.59375 -14.035156 22.644531 -12.808594 22.644531 -10.921875 L 22.644531 -0.59375 C 22.644531 -0.210938 22.855469 0 23.238281 0 L 26.984375 0 C 27.371094 0 27.578125 -0.210938 27.578125 -0.59375 L 27.578125 -12.003906 C 27.578125 -15.960938 25.410156 -18.269531 21.769531 -18.269531 Z M 21.769531 -18.269531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-11">
+<path style="stroke:none;" d="M 9.03125 -18.269531 C 4.796875 -18.269531 1.679688 -16.136719 1.328125 -13.230469 C 1.296875 -12.949219 1.539062 -12.773438 1.890625 -12.738281 L 5.773438 -12.25 C 6.125 -12.214844 6.335938 -12.355469 6.476562 -12.703125 C 6.789062 -13.546875 7.734375 -14.035156 9.136719 -14.035156 C 10.988281 -14.035156 11.933594 -13.089844 11.933594 -11.585938 L 11.933594 -10.851562 C 11.933594 -10.710938 11.828125 -10.640625 11.726562 -10.640625 L 8.191406 -10.640625 C 3.464844 -10.640625 0.910156 -8.503906 0.910156 -4.96875 C 0.910156 -1.363281 3.464844 0.28125 6.894531 0.28125 C 8.996094 0.28125 10.605469 -0.316406 11.691406 -1.503906 C 11.828125 -1.644531 11.933594 -1.609375 11.933594 -1.433594 L 11.933594 -0.59375 C 11.933594 -0.210938 12.144531 0 12.53125 0 L 16.273438 0 C 16.660156 0 16.871094 -0.210938 16.871094 -0.59375 L 16.871094 -12.285156 C 16.871094 -15.679688 13.648438 -18.269531 9.03125 -18.269531 Z M 8.328125 -3.394531 C 6.859375 -3.394531 5.84375 -4.164062 5.84375 -5.425781 C 5.84375 -6.894531 7.140625 -7.699219 9.238281 -7.699219 L 11.726562 -7.699219 C 11.863281 -7.699219 11.933594 -7.59375 11.933594 -7.488281 L 11.933594 -6.265625 C 11.933594 -4.550781 10.289062 -3.394531 8.328125 -3.394531 Z M 8.328125 -3.394531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-12">
+<path style="stroke:none;" d="M 11.199219 -18.269531 C 9.34375 -18.269531 7.945312 -17.640625 6.964844 -16.308594 C 6.859375 -16.171875 6.753906 -16.238281 6.753906 -16.414062 L 6.753906 -17.394531 C 6.753906 -17.78125 6.546875 -17.988281 6.160156 -17.988281 L 2.414062 -17.988281 C 2.03125 -17.988281 1.820312 -17.78125 1.820312 -17.394531 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -10.886719 C 6.753906 -12.773438 7.945312 -14.035156 9.660156 -14.035156 C 11.375 -14.035156 12.496094 -12.738281 12.496094 -10.886719 L 12.496094 -0.59375 C 12.496094 -0.210938 12.703125 0 13.089844 0 L 16.835938 0 C 17.21875 0 17.429688 -0.210938 17.429688 -0.59375 L 17.429688 -11.898438 C 17.429688 -15.890625 14.875 -18.269531 11.199219 -18.269531 Z M 11.199219 -18.269531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-13">
+<path style="stroke:none;" d="M 9.730469 0.28125 C 13.371094 0.28125 16.449219 -1.574219 17.464844 -4.726562 C 17.570312 -5.003906 17.605469 -5.285156 17.640625 -5.53125 C 17.710938 -5.878906 17.5 -6.125 17.148438 -6.195312 L 13.476562 -6.753906 C 13.089844 -6.824219 12.808594 -6.40625 12.773438 -6.265625 C 12.773438 -6.265625 12.773438 -6.195312 12.738281 -6.089844 C 12.320312 -4.796875 11.164062 -3.953125 9.695312 -3.953125 C 8.15625 -3.953125 7.105469 -4.828125 6.683594 -6.195312 C 6.476562 -6.824219 6.335938 -7.769531 6.335938 -9.066406 C 6.335938 -10.289062 6.476562 -11.269531 6.71875 -11.933594 C 7.140625 -13.230469 8.191406 -14.035156 9.695312 -14.035156 C 11.304688 -14.035156 12.460938 -13.089844 12.773438 -11.933594 L 12.878906 -11.445312 C 12.914062 -11.128906 13.195312 -10.988281 13.546875 -11.058594 L 17.21875 -11.621094 C 17.570312 -11.691406 17.78125 -11.898438 17.746094 -12.25 C 17.710938 -12.53125 17.640625 -12.914062 17.464844 -13.335938 C 16.554688 -16.101562 13.578125 -18.269531 9.730469 -18.269531 C 5.984375 -18.269531 3.078125 -16.308594 1.960938 -13.160156 C 1.609375 -12.144531 1.363281 -10.851562 1.363281 -9.101562 C 1.363281 -7.59375 1.539062 -6.195312 1.960938 -4.96875 C 3.113281 -1.75 5.984375 0.28125 9.730469 0.28125 Z M 9.730469 0.28125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-14">
+<g>
+</g>
+</symbol>
+<symbol overflow="visible" id="glyph0-15">
+<path style="stroke:none;" d="M 8.996094 0.246094 C 13.511719 0.246094 16.589844 -1.714844 16.589844 -5.25 C 16.589844 -8.46875 13.964844 -9.695312 11.199219 -10.5 C 8.609375 -11.304688 6.160156 -11.410156 6.160156 -12.808594 C 6.160156 -13.824219 7.316406 -14.386719 8.820312 -14.386719 C 10.605469 -14.386719 11.65625 -13.578125 11.65625 -12.566406 L 11.65625 -12.355469 C 11.65625 -12.214844 11.863281 -12.144531 12.25 -12.144531 L 15.679688 -12.144531 C 16.066406 -12.144531 16.273438 -12.320312 16.273438 -12.566406 C 16.273438 -15.890625 13.300781 -18.234375 8.785156 -18.234375 C 4.445312 -18.234375 1.433594 -16.101562 1.433594 -12.601562 C 1.433594 -9.484375 3.988281 -8.296875 6.371094 -7.488281 C 8.960938 -6.441406 11.621094 -6.476562 11.621094 -5.074219 C 11.621094 -4.128906 10.570312 -3.429688 8.960938 -3.429688 C 7.210938 -3.429688 5.984375 -4.269531 5.984375 -5.285156 L 5.984375 -5.53125 C 5.984375 -5.671875 5.773438 -5.738281 5.390625 -5.738281 L 1.855469 -5.738281 C 1.46875 -5.738281 1.261719 -5.53125 1.261719 -5.144531 L 1.261719 -4.96875 C 1.261719 -1.925781 4.234375 0.246094 8.996094 0.246094 Z M 8.996094 0.246094 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-16">
+<path style="stroke:none;" d="M 12.320312 -17.394531 L 12.320312 -6.546875 C 12.109375 -5.003906 11.023438 -3.953125 9.414062 -3.953125 C 7.628906 -3.953125 6.578125 -5.25 6.578125 -7.105469 L 6.578125 -17.394531 C 6.578125 -17.78125 6.371094 -17.988281 5.984375 -17.988281 L 2.273438 -17.988281 C 1.890625 -17.988281 1.679688 -17.78125 1.679688 -17.394531 L 1.679688 -5.878906 C 1.679688 -1.855469 4.375 0.246094 7.769531 0.246094 C 9.730469 0.246094 11.164062 -0.386719 12.074219 -1.609375 C 12.179688 -1.75 12.320312 -1.714844 12.320312 -1.539062 L 12.320312 -0.59375 C 12.320312 -0.210938 12.53125 0 12.914062 0 L 16.660156 0 C 17.046875 0 17.253906 -0.210938 17.253906 -0.59375 L 17.253906 -17.394531 C 17.253906 -17.78125 17.046875 -17.988281 16.660156 -17.988281 L 12.914062 -17.988281 C 12.53125 -17.988281 12.320312 -17.78125 12.320312 -17.394531 Z M 12.320312 -17.394531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-17">
+<path style="stroke:none;" d="M 2.484375 0 L 6.230469 0 C 6.613281 0 6.824219 -0.210938 6.824219 -0.59375 L 6.824219 -23.90625 C 6.824219 -24.289062 6.613281 -24.5 6.230469 -24.5 L 2.484375 -24.5 C 2.101562 -24.5 1.890625 -24.289062 1.890625 -23.90625 L 1.890625 -0.59375 C 1.890625 -0.210938 2.101562 0 2.484375 0 Z M 2.484375 0 "/>
+</symbol>
+</g>
+<clipPath id="clip1">
+  <path d="M 5 5 L 1275 5 L 1275 635 L 5 635 Z M 5 5 "/>
+</clipPath>
+<clipPath id="clip2">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip3">
+  <path d="M 414 216 L 864 216 L 864 460 L 414 460 Z M 414 216 "/>
+</clipPath>
+<clipPath id="clip4">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip5">
+  <path d="M 456 216 L 510 216 L 510 269 L 456 269 Z M 456 216 "/>
+</clipPath>
+<clipPath id="clip6">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip7">
+  <path d="M 731 161 L 785 161 L 785 215 L 731 215 Z M 731 161 "/>
+</clipPath>
+<clipPath id="clip8">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip9">
+  <path d="M 763 216 L 816 216 L 816 269 L 763 269 Z M 763 216 "/>
+</clipPath>
+<clipPath id="clip10">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip11">
+  <path d="M 700 216 L 753 216 L 753 269 L 700 269 Z M 700 216 "/>
+</clipPath>
+<clipPath id="clip12">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip13">
+  <path d="M 217 544 L 1064 544 L 1064 578 L 217 578 Z M 217 544 "/>
+</clipPath>
+<clipPath id="clip14">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip15">
+  <path d="M 5 5 L 1275 5 L 1275 635 L 5 635 Z M 5 5 "/>
+</clipPath>
+<clipPath id="clip16">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+</defs>
+<g id="surface861">
+<g clip-path="url(#clip1)" clip-rule="nonzero">
+<g clip-path="url(#clip2)" clip-rule="nonzero">
+<rect x="0" y="0" width="1280" height="640" fill="url(#gradient1)"/>
+</g>
+</g>
+<g clip-path="url(#clip3)" clip-rule="nonzero">
+<g clip-path="url(#clip4)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(94.117647%,100%,100%);fill-opacity:1;" d="M 501.804688 406.21875 C 501.804688 417.523438 500.535156 426.648438 497.992188 433.601562 C 495.449219 440.554688 491.835938 445.953125 487.140625 449.796875 C 482.453125 453.640625 476.828125 456.210938 470.273438 457.511719 C 463.714844 458.804688 456.421875 459.457031 448.398438 459.457031 C 437.539062 459.457031 429.234375 457.765625 423.472656 454.371094 C 417.707031 450.976562 414.824219 446.914062 414.824219 442.160156 C 414.824219 438.203125 416.429688 434.871094 419.652344 432.15625 C 422.875 429.445312 427.199219 428.09375 432.625 428.09375 C 436.695312 428.09375 439.949219 429.195312 442.375 431.398438 C 444.804688 433.601562 446.816406 435.78125 448.398438 437.921875 C 450.203125 440.296875 451.734375 441.878906 452.976562 442.671875 C 454.21875 443.460938 455.347656 443.859375 456.367188 443.859375 C 458.511719 443.859375 460.152344 442.585938 461.28125 440.046875 C 462.414062 437.5 462.976562 432.554688 462.976562 425.210938 L 462.976562 285.828125 L 501.804688 275.144531 Z M 557.109375 278.871094 L 557.109375 368.570312 C 557.109375 371.0625 557.585938 373.40625 558.546875 375.609375 C 559.507812 377.8125 560.839844 379.710938 562.53125 381.292969 C 564.234375 382.875 566.207031 384.148438 568.46875 385.109375 C 570.730469 386.070312 573.164062 386.546875 575.765625 386.546875 C 578.699219 386.546875 582.039062 384.902344 586.105469 382.054688 C 592.546875 377.542969 596.445312 374.445312 596.445312 370.773438 C 596.445312 369.898438 596.445312 278.871094 596.445312 278.871094 L 635.109375 278.871094 L 635.109375 406.21875 L 596.445312 406.21875 L 596.445312 394.347656 C 591.359375 398.640625 585.9375 402.085938 580.171875 404.6875 C 574.40625 407.289062 568.808594 408.59375 563.382812 408.59375 C 557.054688 408.59375 551.171875 407.542969 545.746094 405.453125 C 540.320312 403.359375 535.574219 400.507812 531.507812 396.890625 C 527.4375 393.273438 524.242188 389.03125 521.921875 384.171875 C 519.609375 379.3125 518.445312 374.109375 518.445312 368.570312 L 518.445312 278.871094 Z M 689.324219 406.21875 L 650.835938 406.21875 L 650.835938 227.664062 L 689.324219 216.984375 Z M 705.402344 285.828125 L 744.0625 275.144531 L 744.0625 406.21875 L 705.402344 406.21875 Z M 824.839844 340.421875 C 821.109375 342.011719 817.34375 343.84375 813.5625 345.9375 C 809.773438 348.03125 806.324219 350.320312 803.21875 352.804688 C 800.109375 355.289062 797.59375 357.949219 795.671875 360.773438 C 793.75 363.597656 792.789062 366.539062 792.789062 369.59375 C 792.789062 371.964844 793.101562 374.257812 793.71875 376.460938 C 794.34375 378.664062 795.21875 380.554688 796.351562 382.136719 C 797.484375 383.71875 798.726562 384.992188 800.085938 385.953125 C 801.4375 386.914062 802.90625 387.390625 804.492188 387.390625 C 807.65625 387.390625 810.847656 386.429688 814.070312 384.507812 C 817.292969 382.585938 820.878906 380.164062 824.839844 377.21875 Z M 863.664062 406.21875 L 824.839844 406.21875 L 824.839844 396.039062 C 822.6875 397.851562 820.597656 399.523438 818.5625 401.046875 C 816.53125 402.570312 814.242188 403.898438 811.695312 405.03125 C 809.15625 406.160156 806.300781 407.03125 803.132812 407.660156 C 799.96875 408.28125 796.234375 408.59375 791.945312 408.59375 C 786.0625 408.59375 780.777344 407.742188 776.089844 406.046875 C 771.398438 404.351562 767.414062 402.035156 764.136719 399.09375 C 760.855469 396.15625 758.34375 392.679688 756.589844 388.664062 C 754.835938 384.65625 753.960938 380.273438 753.960938 375.527344 C 753.960938 370.664062 754.890625 366.257812 756.757812 362.296875 C 758.625 358.34375 761.164062 354.785156 764.390625 351.617188 C 767.609375 348.453125 771.367188 345.625 775.664062 343.136719 C 779.957031 340.652344 784.570312 338.359375 789.484375 336.269531 C 794.398438 334.179688 799.515625 332.261719 804.828125 330.507812 C 810.140625 328.757812 815.398438 327.085938 820.597656 325.507812 L 824.839844 324.484375 L 824.839844 311.941406 C 824.839844 303.804688 823.28125 298.039062 820.175781 294.644531 C 817.0625 291.25 812.910156 289.558594 807.710938 289.558594 C 801.609375 289.558594 797.367188 291.03125 794.992188 293.964844 C 792.617188 296.90625 791.429688 300.460938 791.429688 304.648438 C 791.429688 307.019531 791.179688 309.339844 790.667969 311.601562 C 790.164062 313.863281 789.285156 315.835938 788.042969 317.539062 C 786.800781 319.230469 785.019531 320.589844 782.699219 321.601562 C 780.382812 322.625 777.472656 323.132812 773.96875 323.132812 C 768.542969 323.132812 764.136719 321.574219 760.742188 318.46875 C 757.351562 315.359375 755.65625 311.429688 755.65625 306.679688 C 755.65625 302.273438 757.152344 298.179688 760.148438 294.390625 C 763.148438 290.601562 767.183594 287.351562 772.273438 284.640625 C 777.359375 281.921875 783.238281 279.78125 789.90625 278.195312 C 796.574219 276.617188 803.640625 275.824219 811.101562 275.824219 C 820.257812 275.824219 828.144531 276.644531 834.757812 278.28125 C 841.367188 279.921875 846.824219 282.265625 851.117188 285.316406 C 855.414062 288.367188 858.578125 292.042969 860.609375 296.335938 C 862.648438 300.632812 863.664062 305.445312 863.664062 310.75 Z M 863.664062 406.21875 "/>
+</g>
+</g>
+<g clip-path="url(#clip5)" clip-rule="nonzero">
+<g clip-path="url(#clip6)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(25.1%,38.8%,84.7%);fill-opacity:1;" d="M 509.429688 242.382812 C 509.429688 256.886719 497.675781 268.632812 483.179688 268.632812 C 468.683594 268.632812 456.929688 256.886719 456.929688 242.382812 C 456.929688 227.890625 468.683594 216.132812 483.179688 216.132812 C 497.675781 216.132812 509.429688 227.890625 509.429688 242.382812 "/>
+</g>
+</g>
+<g clip-path="url(#clip7)" clip-rule="nonzero">
+<g clip-path="url(#clip8)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(22%,59.6%,14.9%);fill-opacity:1;" d="M 784.453125 187.757812 C 784.453125 202.253906 772.695312 214.007812 758.203125 214.007812 C 743.699219 214.007812 731.953125 202.253906 731.953125 187.757812 C 731.953125 173.261719 743.699219 161.507812 758.203125 161.507812 C 772.695312 161.507812 784.453125 173.261719 784.453125 187.757812 "/>
+</g>
+</g>
+<g clip-path="url(#clip9)" clip-rule="nonzero">
+<g clip-path="url(#clip10)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(58.4%,34.5%,69.8%);fill-opacity:1;" d="M 815.980469 242.382812 C 815.980469 256.886719 804.226562 268.632812 789.730469 268.632812 C 775.234375 268.632812 763.480469 256.886719 763.480469 242.382812 C 763.480469 227.890625 775.234375 216.132812 789.730469 216.132812 C 804.226562 216.132812 815.980469 227.890625 815.980469 242.382812 "/>
+</g>
+</g>
+<g clip-path="url(#clip11)" clip-rule="nonzero">
+<g clip-path="url(#clip12)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(79.6%,23.5%,20%);fill-opacity:1;" d="M 752.910156 242.382812 C 752.910156 256.886719 741.15625 268.632812 726.660156 268.632812 C 712.15625 268.632812 700.410156 256.886719 700.410156 242.382812 C 700.410156 227.890625 712.15625 216.132812 726.660156 216.132812 C 741.15625 216.132812 752.910156 227.890625 752.910156 242.382812 "/>
+</g>
+</g>
+<g clip-path="url(#clip13)" clip-rule="nonzero">
+<g clip-path="url(#clip14)" clip-rule="nonzero">
+<g style="fill:rgb(94.117647%,100%,100%);fill-opacity:1;">
+  <use xlink:href="#glyph0-1" x="215.345" y="570"/>
+  <use xlink:href="#glyph0-2" x="234.49" y="570"/>
+  <use xlink:href="#glyph0-3" x="243.59" y="570"/>
+  <use xlink:href="#glyph0-1" x="263.05" y="570"/>
+  <use xlink:href="#glyph0-4" x="282.195" y="570"/>
+  <use xlink:href="#glyph0-5" x="296.51" y="570"/>
+  <use xlink:href="#glyph0-6" x="316.075" y="570"/>
+  <use xlink:href="#glyph0-7" x="335.22" y="570"/>
+  <use xlink:href="#glyph0-8" x="348.625" y="570"/>
+  <use xlink:href="#glyph0-9" x="362.17" y="570"/>
+  <use xlink:href="#glyph0-7" x="381.665" y="570"/>
+  <use xlink:href="#glyph0-10" x="395.07" y="570"/>
+  <use xlink:href="#glyph0-11" x="424.295" y="570"/>
+  <use xlink:href="#glyph0-12" x="442.775" y="570"/>
+  <use xlink:href="#glyph0-13" x="461.92" y="570"/>
+  <use xlink:href="#glyph0-6" x="480.715" y="570"/>
+  <use xlink:href="#glyph0-14" x="499.86" y="570"/>
+  <use xlink:href="#glyph0-9" x="506.86" y="570"/>
+  <use xlink:href="#glyph0-5" x="526.355" y="570"/>
+  <use xlink:href="#glyph0-6" x="545.92" y="570"/>
+  <use xlink:href="#glyph0-12" x="565.065" y="570"/>
+  <use xlink:href="#glyph0-14" x="584.21" y="570"/>
+  <use xlink:href="#glyph0-15" x="591.21" y="570"/>
+  <use xlink:href="#glyph0-9" x="608.815" y="570"/>
+  <use xlink:href="#glyph0-16" x="628.31" y="570"/>
+  <use xlink:href="#glyph0-7" x="647.385" y="570"/>
+  <use xlink:href="#glyph0-13" x="660.79" y="570"/>
+  <use xlink:href="#glyph0-6" x="679.585" y="570"/>
+  <use xlink:href="#glyph0-14" x="698.73" y="570"/>
+  <use xlink:href="#glyph0-5" x="705.73" y="570"/>
+  <use xlink:href="#glyph0-7" x="725.295" y="570"/>
+  <use xlink:href="#glyph0-9" x="738.7" y="570"/>
+  <use xlink:href="#glyph0-3" x="758.195" y="570"/>
+  <use xlink:href="#glyph0-7" x="777.655" y="570"/>
+  <use xlink:href="#glyph0-11" x="791.06" y="570"/>
+  <use xlink:href="#glyph0-10" x="809.54" y="570"/>
+  <use xlink:href="#glyph0-10" x="838.765" y="570"/>
+  <use xlink:href="#glyph0-2" x="867.99" y="570"/>
+  <use xlink:href="#glyph0-12" x="877.09" y="570"/>
+  <use xlink:href="#glyph0-3" x="896.235" y="570"/>
+  <use xlink:href="#glyph0-14" x="915.695" y="570"/>
+  <use xlink:href="#glyph0-17" x="922.695" y="570"/>
+  <use xlink:href="#glyph0-11" x="931.41" y="570"/>
+  <use xlink:href="#glyph0-12" x="949.89" y="570"/>
+  <use xlink:href="#glyph0-3" x="969.035" y="570"/>
+  <use xlink:href="#glyph0-16" x="988.495" y="570"/>
+  <use xlink:href="#glyph0-11" x="1007.57" y="570"/>
+  <use xlink:href="#glyph0-3" x="1026.05" y="570"/>
+  <use xlink:href="#glyph0-6" x="1045.51" y="570"/>
+</g>
+</g>
+</g>
+<g clip-path="url(#clip15)" clip-rule="nonzero">
+<g clip-path="url(#clip16)" clip-rule="nonzero">
+<path style="fill:none;stroke-width:3;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(100%,100%,100%);stroke-opacity:1;stroke-miterlimit:10;" d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</g>
+</g>
+</g>
+</svg>
diff --git a/doc/src/assets/julialogoheaderimage_light.svg b/doc/src/assets/julialogoheaderimage_light.svg
new file mode 100644
index 0000000000000..892ca1bd08701
--- /dev/null
+++ b/doc/src/assets/julialogoheaderimage_light.svg
@@ -0,0 +1,209 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="1280pt" height="640pt" viewBox="0 0 1280 640" version="1.1">
+<defs>
+<linearGradient id="gradient1" x1="0.0" y1="0.0" x2="1" y2="1">
+      <stop offset="0%" stop-color="#ffb3df"/>
+      <stop offset="16.33%" stop-color="#f5ccc0"/>
+      <stop offset="49%" stop-color="#dff1e5"/>
+      <stop offset="66%" stop-color="#a6d8d4"/>
+      <stop offset="100%" stop-color="#9589e1"/>
+</linearGradient>
+<g>
+<symbol overflow="visible" id="glyph0-0">
+<path style="stroke:none;" d="M 0 -0.210938 L 0 -24.289062 C 0 -24.429688 0.0703125 -24.5 0.210938 -24.5 L 16.378906 -24.5 C 16.519531 -24.5 16.589844 -24.429688 16.589844 -24.289062 L 16.589844 -0.210938 C 16.589844 -0.0703125 16.519531 0 16.378906 0 L 0.210938 0 C 0.0703125 0 0 -0.105469 0 -0.210938 Z M 6.824219 -8.785156 L 9.136719 -8.785156 C 9.519531 -8.785156 9.730469 -8.996094 9.730469 -9.378906 L 9.730469 -9.660156 C 9.730469 -11.933594 14.175781 -11.933594 14.175781 -16.101562 C 14.175781 -19.003906 11.96875 -20.964844 8.503906 -20.964844 C 5.003906 -20.964844 2.589844 -18.898438 2.589844 -15.855469 L 2.589844 -15.609375 C 2.589844 -15.433594 2.835938 -15.296875 3.183594 -15.296875 L 5.53125 -15.191406 C 5.914062 -15.15625 6.125 -15.363281 6.125 -15.75 L 6.125 -15.855469 C 6.125 -16.976562 7 -17.746094 8.433594 -17.746094 C 9.730469 -17.746094 10.605469 -17.046875 10.605469 -15.960938 C 10.605469 -13.511719 6.230469 -13.335938 6.230469 -9.871094 L 6.230469 -9.34375 C 6.230469 -8.996094 6.441406 -8.785156 6.824219 -8.785156 Z M 8.121094 -3.183594 C 9.273438 -3.183594 10.253906 -4.058594 10.253906 -5.179688 C 10.253906 -6.265625 9.273438 -7.175781 8.15625 -7.175781 C 6.964844 -7.175781 5.984375 -6.265625 5.984375 -5.179688 C 5.984375 -4.058594 6.929688 -3.183594 8.121094 -3.183594 Z M 8.121094 -3.183594 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-1">
+<path style="stroke:none;" d="M 11.199219 -18.269531 C 9.34375 -18.269531 7.945312 -17.640625 6.964844 -16.308594 C 6.859375 -16.171875 6.753906 -16.238281 6.753906 -16.414062 L 6.753906 -23.90625 C 6.753906 -24.289062 6.546875 -24.5 6.160156 -24.5 L 2.414062 -24.5 C 2.03125 -24.5 1.820312 -24.289062 1.820312 -23.90625 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -10.886719 C 6.753906 -12.773438 7.945312 -14.035156 9.660156 -14.035156 C 11.375 -14.035156 12.496094 -12.738281 12.496094 -10.886719 L 12.496094 -0.59375 C 12.496094 -0.210938 12.703125 0 13.089844 0 L 16.835938 0 C 17.21875 0 17.429688 -0.210938 17.429688 -0.59375 L 17.429688 -11.898438 C 17.429688 -15.890625 14.875 -18.269531 11.199219 -18.269531 Z M 11.199219 -18.269531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-2">
+<path style="stroke:none;" d="M 4.550781 -19.703125 C 6.371094 -19.703125 7.558594 -20.894531 7.558594 -22.644531 C 7.558594 -24.394531 6.371094 -25.585938 4.550781 -25.585938 C 2.765625 -25.585938 1.539062 -24.394531 1.539062 -22.644531 C 1.539062 -20.894531 2.765625 -19.703125 4.550781 -19.703125 Z M 2.695312 0 L 6.441406 0 C 6.824219 0 7.035156 -0.210938 7.035156 -0.59375 L 7.035156 -17.394531 C 7.035156 -17.78125 6.824219 -17.988281 6.441406 -17.988281 L 2.695312 -17.988281 C 2.308594 -17.988281 2.101562 -17.78125 2.101562 -17.394531 L 2.101562 -0.59375 C 2.101562 -0.210938 2.308594 0 2.695312 0 Z M 2.695312 0 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-3">
+<path style="stroke:none;" d="M 12.808594 -17.394531 L 12.808594 -16.800781 C 12.808594 -16.625 12.703125 -16.554688 12.566406 -16.695312 C 11.65625 -17.675781 10.324219 -18.269531 8.46875 -18.269531 C 5.320312 -18.269531 2.90625 -16.34375 1.925781 -13.546875 C 1.433594 -12.214844 1.296875 -10.675781 1.296875 -9.101562 C 1.296875 -7.769531 1.398438 -6.230469 1.820312 -4.933594 C 2.976562 -1.363281 6.089844 -0.28125 8.714844 -0.28125 C 10.5 -0.28125 11.761719 -0.804688 12.566406 -1.679688 C 12.703125 -1.820312 12.808594 -1.785156 12.808594 -1.644531 C 12.808594 2.066406 10.113281 2.871094 6.019531 2.308594 C 5.636719 2.238281 5.355469 2.449219 5.355469 2.835938 L 5.214844 6.089844 C 5.214844 6.441406 5.355469 6.683594 5.738281 6.71875 C 11.375 7.421875 17.746094 6.195312 17.746094 -1.609375 L 17.746094 -17.394531 C 17.746094 -17.78125 17.535156 -17.988281 17.148438 -17.988281 L 13.40625 -17.988281 C 13.019531 -17.988281 12.808594 -17.78125 12.808594 -17.394531 Z M 12.53125 -6.265625 C 12.179688 -5.109375 11.234375 -4.199219 9.660156 -4.199219 C 8.15625 -4.199219 7.105469 -5.109375 6.683594 -6.300781 C 6.40625 -6.859375 6.265625 -7.804688 6.265625 -9.101562 C 6.265625 -10.359375 6.441406 -11.269531 6.753906 -11.933594 C 7.210938 -13.125 8.15625 -14.035156 9.625 -14.035156 C 11.128906 -14.035156 12.109375 -13.160156 12.496094 -11.933594 C 12.703125 -11.269531 12.808594 -10.675781 12.808594 -9.136719 C 12.808594 -7.59375 12.703125 -6.929688 12.53125 -6.265625 Z M 12.53125 -6.265625 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-4">
+<path style="stroke:none;" d="M 1.609375 -8.226562 L 12.703125 -8.226562 C 13.089844 -8.226562 13.300781 -8.433594 13.300781 -8.820312 L 13.300781 -11.863281 C 13.300781 -12.25 13.089844 -12.460938 12.703125 -12.460938 L 1.609375 -12.460938 C 1.226562 -12.460938 1.015625 -12.25 1.015625 -11.863281 L 1.015625 -8.820312 C 1.015625 -8.433594 1.226562 -8.226562 1.609375 -8.226562 Z M 1.609375 -8.226562 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-5">
+<path style="stroke:none;" d="M 17.921875 -13.125 C 16.976562 -16.203125 14.351562 -18.269531 11.164062 -18.269531 C 9.308594 -18.269531 8.050781 -17.535156 7.175781 -16.378906 C 7.070312 -16.238281 6.929688 -16.273438 6.929688 -16.449219 L 6.929688 -17.394531 C 6.929688 -17.78125 6.71875 -17.988281 6.335938 -17.988281 L 2.589844 -17.988281 C 2.203125 -17.988281 1.996094 -17.78125 1.996094 -17.394531 L 1.996094 5.914062 C 1.996094 6.300781 2.203125 6.511719 2.589844 6.511719 L 6.335938 6.511719 C 6.71875 6.511719 6.929688 6.300781 6.929688 5.914062 L 6.929688 -1.296875 C 6.929688 -1.46875 7.070312 -1.539062 7.175781 -1.398438 C 8.050781 -0.316406 9.34375 0.316406 11.164062 0.316406 C 14.386719 0.316406 16.835938 -1.644531 17.851562 -4.585938 C 18.339844 -5.878906 18.515625 -7.386719 18.515625 -8.925781 C 18.515625 -10.394531 18.375 -11.828125 17.921875 -13.125 Z M 12.496094 -5.355469 C 11.933594 -4.480469 11.128906 -4.023438 10.046875 -4.023438 C 9.03125 -4.023438 8.261719 -4.515625 7.699219 -5.390625 C 7.210938 -6.265625 6.964844 -7.488281 6.964844 -8.996094 C 6.964844 -10.429688 7.210938 -11.621094 7.664062 -12.460938 C 8.191406 -13.40625 8.996094 -13.964844 10.046875 -13.964844 C 11.199219 -13.964844 12.074219 -13.40625 12.601562 -12.460938 C 13.054688 -11.621094 13.335938 -10.429688 13.335938 -9.03125 C 13.335938 -7.488281 13.019531 -6.230469 12.496094 -5.355469 Z M 12.496094 -5.355469 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-6">
+<path style="stroke:none;" d="M 10.5 -3.886719 C 8.539062 -3.886719 7.105469 -4.691406 6.613281 -6.125 C 6.511719 -6.40625 6.441406 -6.753906 6.371094 -7.210938 C 6.371094 -7.351562 6.441406 -7.421875 6.578125 -7.421875 L 17.324219 -7.421875 C 17.710938 -7.421875 17.921875 -7.628906 17.921875 -8.015625 C 17.886719 -8.925781 17.816406 -9.800781 17.710938 -10.570312 C 16.941406 -15.296875 14.386719 -18.304688 9.660156 -18.304688 C 5.738281 -18.304688 2.765625 -15.996094 1.820312 -12.566406 C 1.539062 -11.550781 1.433594 -10.464844 1.433594 -9.136719 C 1.433594 -7.980469 1.574219 -6.894531 1.855469 -5.878906 C 2.800781 -2.171875 5.636719 0.28125 10.046875 0.28125 C 12.914062 0.28125 15.433594 -0.910156 16.941406 -2.800781 C 17.148438 -3.113281 17.148438 -3.359375 16.871094 -3.640625 L 14.875 -5.566406 C 14.59375 -5.84375 14.316406 -5.808594 14.035156 -5.496094 C 13.265625 -4.550781 12.074219 -3.953125 10.5 -3.886719 Z M 9.589844 -14.035156 C 11.199219 -14.035156 12.214844 -13.160156 12.636719 -11.828125 C 12.703125 -11.621094 12.738281 -11.410156 12.773438 -11.09375 C 12.808594 -10.953125 12.738281 -10.886719 12.601562 -10.886719 L 6.648438 -10.886719 C 6.511719 -10.886719 6.441406 -10.953125 6.476562 -11.09375 C 6.511719 -11.445312 6.613281 -11.726562 6.683594 -11.96875 C 7.070312 -13.230469 8.050781 -14.035156 9.589844 -14.035156 Z M 9.589844 -14.035156 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-7">
+<path style="stroke:none;" d="M 10.78125 -18.234375 C 9.066406 -18.234375 7.839844 -17.429688 7 -16.203125 C 6.859375 -16.066406 6.753906 -16.136719 6.753906 -16.308594 L 6.753906 -17.394531 C 6.753906 -17.78125 6.546875 -17.988281 6.160156 -17.988281 L 2.414062 -17.988281 C 2.03125 -17.988281 1.820312 -17.78125 1.820312 -17.394531 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -9.976562 C 6.753906 -11.863281 8.191406 -12.949219 9.765625 -13.089844 C 10.464844 -13.195312 11.128906 -13.160156 11.585938 -13.019531 C 12.003906 -12.949219 12.214844 -12.984375 12.285156 -13.40625 L 12.949219 -17.078125 C 13.019531 -17.394531 12.949219 -17.675781 12.636719 -17.816406 C 12.214844 -18.058594 11.621094 -18.234375 10.78125 -18.234375 Z M 10.78125 -18.234375 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-8">
+<path style="stroke:none;" d="M 11.898438 -18.023438 L 8.714844 -18.023438 C 8.574219 -18.023438 8.503906 -18.09375 8.503906 -18.234375 L 8.503906 -18.585938 C 8.503906 -20.265625 9.03125 -20.789062 10.675781 -20.824219 L 11.65625 -20.824219 C 12.039062 -20.824219 12.25 -21.035156 12.25 -21.421875 L 12.25 -23.90625 C 12.25 -24.289062 12.039062 -24.5 11.65625 -24.5 L 10.5 -24.5 C 5.355469 -24.640625 3.570312 -23.238281 3.570312 -18.96875 L 3.570312 -18.234375 C 3.570312 -18.09375 3.464844 -18.023438 3.359375 -18.023438 L 1.503906 -18.023438 C 1.121094 -18.023438 0.910156 -17.816406 0.910156 -17.429688 L 0.910156 -14.453125 C 0.910156 -14.070312 1.121094 -13.859375 1.503906 -13.859375 L 3.359375 -13.859375 C 3.5 -13.859375 3.570312 -13.789062 3.570312 -13.648438 L 3.570312 -0.59375 C 3.570312 -0.210938 3.78125 0 4.164062 0 L 7.910156 0 C 8.296875 0 8.503906 -0.210938 8.503906 -0.59375 L 8.503906 -13.648438 C 8.503906 -13.789062 8.574219 -13.859375 8.714844 -13.859375 L 11.898438 -13.859375 C 12.285156 -13.859375 12.496094 -14.070312 12.496094 -14.453125 L 12.496094 -17.429688 C 12.496094 -17.816406 12.285156 -18.023438 11.898438 -18.023438 Z M 11.898438 -18.023438 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-9">
+<path style="stroke:none;" d="M 9.800781 0.28125 C 13.71875 0.28125 16.625 -1.960938 17.675781 -5.566406 C 17.953125 -6.613281 18.128906 -7.875 18.128906 -9.101562 C 18.128906 -10.394531 17.953125 -11.691406 17.605469 -12.773438 C 16.519531 -16.171875 13.683594 -18.269531 9.835938 -18.269531 C 5.84375 -18.269531 2.941406 -16.171875 1.890625 -12.738281 C 1.539062 -11.691406 1.363281 -10.359375 1.363281 -9.03125 C 1.363281 -7.769531 1.539062 -6.511719 1.855469 -5.460938 C 2.871094 -1.925781 5.808594 0.28125 9.800781 0.28125 Z M 9.800781 -3.953125 C 8.15625 -3.953125 7.070312 -4.96875 6.613281 -6.578125 C 6.441406 -7.210938 6.335938 -8.121094 6.335938 -9.03125 C 6.335938 -9.976562 6.441406 -10.886719 6.613281 -11.515625 C 7.070312 -13.054688 8.15625 -14.035156 9.730469 -14.035156 C 11.339844 -14.035156 12.425781 -13.089844 12.878906 -11.515625 C 13.054688 -10.886719 13.160156 -9.976562 13.160156 -9.03125 C 13.160156 -8.15625 13.089844 -7.28125 12.878906 -6.578125 C 12.425781 -4.96875 11.339844 -3.953125 9.800781 -3.953125 Z M 9.800781 -3.953125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-10">
+<path style="stroke:none;" d="M 21.769531 -18.269531 C 19.671875 -18.269531 17.816406 -17.394531 16.730469 -15.46875 C 16.660156 -15.328125 16.519531 -15.296875 16.449219 -15.46875 C 15.503906 -17.253906 13.753906 -18.269531 11.410156 -18.269531 C 9.519531 -18.269531 7.980469 -17.605469 7 -16.203125 C 6.894531 -16.03125 6.753906 -16.101562 6.753906 -16.273438 L 6.753906 -17.394531 C 6.753906 -17.78125 6.546875 -17.988281 6.160156 -17.988281 L 2.414062 -17.988281 C 2.03125 -17.988281 1.820312 -17.78125 1.820312 -17.394531 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -11.515625 C 6.964844 -13.089844 8.015625 -14.035156 9.519531 -14.035156 C 11.164062 -14.035156 12.285156 -12.808594 12.285156 -10.921875 L 12.285156 -0.59375 C 12.285156 -0.210938 12.496094 0 12.878906 0 L 16.589844 0 C 16.976562 0 17.183594 -0.210938 17.183594 -0.59375 L 17.183594 -10.953125 C 17.183594 -12.84375 18.339844 -14.035156 19.949219 -14.035156 C 21.59375 -14.035156 22.644531 -12.808594 22.644531 -10.921875 L 22.644531 -0.59375 C 22.644531 -0.210938 22.855469 0 23.238281 0 L 26.984375 0 C 27.371094 0 27.578125 -0.210938 27.578125 -0.59375 L 27.578125 -12.003906 C 27.578125 -15.960938 25.410156 -18.269531 21.769531 -18.269531 Z M 21.769531 -18.269531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-11">
+<path style="stroke:none;" d="M 9.03125 -18.269531 C 4.796875 -18.269531 1.679688 -16.136719 1.328125 -13.230469 C 1.296875 -12.949219 1.539062 -12.773438 1.890625 -12.738281 L 5.773438 -12.25 C 6.125 -12.214844 6.335938 -12.355469 6.476562 -12.703125 C 6.789062 -13.546875 7.734375 -14.035156 9.136719 -14.035156 C 10.988281 -14.035156 11.933594 -13.089844 11.933594 -11.585938 L 11.933594 -10.851562 C 11.933594 -10.710938 11.828125 -10.640625 11.726562 -10.640625 L 8.191406 -10.640625 C 3.464844 -10.640625 0.910156 -8.503906 0.910156 -4.96875 C 0.910156 -1.363281 3.464844 0.28125 6.894531 0.28125 C 8.996094 0.28125 10.605469 -0.316406 11.691406 -1.503906 C 11.828125 -1.644531 11.933594 -1.609375 11.933594 -1.433594 L 11.933594 -0.59375 C 11.933594 -0.210938 12.144531 0 12.53125 0 L 16.273438 0 C 16.660156 0 16.871094 -0.210938 16.871094 -0.59375 L 16.871094 -12.285156 C 16.871094 -15.679688 13.648438 -18.269531 9.03125 -18.269531 Z M 8.328125 -3.394531 C 6.859375 -3.394531 5.84375 -4.164062 5.84375 -5.425781 C 5.84375 -6.894531 7.140625 -7.699219 9.238281 -7.699219 L 11.726562 -7.699219 C 11.863281 -7.699219 11.933594 -7.59375 11.933594 -7.488281 L 11.933594 -6.265625 C 11.933594 -4.550781 10.289062 -3.394531 8.328125 -3.394531 Z M 8.328125 -3.394531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-12">
+<path style="stroke:none;" d="M 11.199219 -18.269531 C 9.34375 -18.269531 7.945312 -17.640625 6.964844 -16.308594 C 6.859375 -16.171875 6.753906 -16.238281 6.753906 -16.414062 L 6.753906 -17.394531 C 6.753906 -17.78125 6.546875 -17.988281 6.160156 -17.988281 L 2.414062 -17.988281 C 2.03125 -17.988281 1.820312 -17.78125 1.820312 -17.394531 L 1.820312 -0.59375 C 1.820312 -0.210938 2.03125 0 2.414062 0 L 6.160156 0 C 6.546875 0 6.753906 -0.210938 6.753906 -0.59375 L 6.753906 -10.886719 C 6.753906 -12.773438 7.945312 -14.035156 9.660156 -14.035156 C 11.375 -14.035156 12.496094 -12.738281 12.496094 -10.886719 L 12.496094 -0.59375 C 12.496094 -0.210938 12.703125 0 13.089844 0 L 16.835938 0 C 17.21875 0 17.429688 -0.210938 17.429688 -0.59375 L 17.429688 -11.898438 C 17.429688 -15.890625 14.875 -18.269531 11.199219 -18.269531 Z M 11.199219 -18.269531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-13">
+<path style="stroke:none;" d="M 9.730469 0.28125 C 13.371094 0.28125 16.449219 -1.574219 17.464844 -4.726562 C 17.570312 -5.003906 17.605469 -5.285156 17.640625 -5.53125 C 17.710938 -5.878906 17.5 -6.125 17.148438 -6.195312 L 13.476562 -6.753906 C 13.089844 -6.824219 12.808594 -6.40625 12.773438 -6.265625 C 12.773438 -6.265625 12.773438 -6.195312 12.738281 -6.089844 C 12.320312 -4.796875 11.164062 -3.953125 9.695312 -3.953125 C 8.15625 -3.953125 7.105469 -4.828125 6.683594 -6.195312 C 6.476562 -6.824219 6.335938 -7.769531 6.335938 -9.066406 C 6.335938 -10.289062 6.476562 -11.269531 6.71875 -11.933594 C 7.140625 -13.230469 8.191406 -14.035156 9.695312 -14.035156 C 11.304688 -14.035156 12.460938 -13.089844 12.773438 -11.933594 L 12.878906 -11.445312 C 12.914062 -11.128906 13.195312 -10.988281 13.546875 -11.058594 L 17.21875 -11.621094 C 17.570312 -11.691406 17.78125 -11.898438 17.746094 -12.25 C 17.710938 -12.53125 17.640625 -12.914062 17.464844 -13.335938 C 16.554688 -16.101562 13.578125 -18.269531 9.730469 -18.269531 C 5.984375 -18.269531 3.078125 -16.308594 1.960938 -13.160156 C 1.609375 -12.144531 1.363281 -10.851562 1.363281 -9.101562 C 1.363281 -7.59375 1.539062 -6.195312 1.960938 -4.96875 C 3.113281 -1.75 5.984375 0.28125 9.730469 0.28125 Z M 9.730469 0.28125 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-14">
+<g>
+</g>
+</symbol>
+<symbol overflow="visible" id="glyph0-15">
+<path style="stroke:none;" d="M 8.996094 0.246094 C 13.511719 0.246094 16.589844 -1.714844 16.589844 -5.25 C 16.589844 -8.46875 13.964844 -9.695312 11.199219 -10.5 C 8.609375 -11.304688 6.160156 -11.410156 6.160156 -12.808594 C 6.160156 -13.824219 7.316406 -14.386719 8.820312 -14.386719 C 10.605469 -14.386719 11.65625 -13.578125 11.65625 -12.566406 L 11.65625 -12.355469 C 11.65625 -12.214844 11.863281 -12.144531 12.25 -12.144531 L 15.679688 -12.144531 C 16.066406 -12.144531 16.273438 -12.320312 16.273438 -12.566406 C 16.273438 -15.890625 13.300781 -18.234375 8.785156 -18.234375 C 4.445312 -18.234375 1.433594 -16.101562 1.433594 -12.601562 C 1.433594 -9.484375 3.988281 -8.296875 6.371094 -7.488281 C 8.960938 -6.441406 11.621094 -6.476562 11.621094 -5.074219 C 11.621094 -4.128906 10.570312 -3.429688 8.960938 -3.429688 C 7.210938 -3.429688 5.984375 -4.269531 5.984375 -5.285156 L 5.984375 -5.53125 C 5.984375 -5.671875 5.773438 -5.738281 5.390625 -5.738281 L 1.855469 -5.738281 C 1.46875 -5.738281 1.261719 -5.53125 1.261719 -5.144531 L 1.261719 -4.96875 C 1.261719 -1.925781 4.234375 0.246094 8.996094 0.246094 Z M 8.996094 0.246094 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-16">
+<path style="stroke:none;" d="M 12.320312 -17.394531 L 12.320312 -6.546875 C 12.109375 -5.003906 11.023438 -3.953125 9.414062 -3.953125 C 7.628906 -3.953125 6.578125 -5.25 6.578125 -7.105469 L 6.578125 -17.394531 C 6.578125 -17.78125 6.371094 -17.988281 5.984375 -17.988281 L 2.273438 -17.988281 C 1.890625 -17.988281 1.679688 -17.78125 1.679688 -17.394531 L 1.679688 -5.878906 C 1.679688 -1.855469 4.375 0.246094 7.769531 0.246094 C 9.730469 0.246094 11.164062 -0.386719 12.074219 -1.609375 C 12.179688 -1.75 12.320312 -1.714844 12.320312 -1.539062 L 12.320312 -0.59375 C 12.320312 -0.210938 12.53125 0 12.914062 0 L 16.660156 0 C 17.046875 0 17.253906 -0.210938 17.253906 -0.59375 L 17.253906 -17.394531 C 17.253906 -17.78125 17.046875 -17.988281 16.660156 -17.988281 L 12.914062 -17.988281 C 12.53125 -17.988281 12.320312 -17.78125 12.320312 -17.394531 Z M 12.320312 -17.394531 "/>
+</symbol>
+<symbol overflow="visible" id="glyph0-17">
+<path style="stroke:none;" d="M 2.484375 0 L 6.230469 0 C 6.613281 0 6.824219 -0.210938 6.824219 -0.59375 L 6.824219 -23.90625 C 6.824219 -24.289062 6.613281 -24.5 6.230469 -24.5 L 2.484375 -24.5 C 2.101562 -24.5 1.890625 -24.289062 1.890625 -23.90625 L 1.890625 -0.59375 C 1.890625 -0.210938 2.101562 0 2.484375 0 Z M 2.484375 0 "/>
+</symbol>
+</g>
+<clipPath id="clip1">
+  <path d="M 5 5 L 1275 5 L 1275 635 L 5 635 Z M 5 5 "/>
+</clipPath>
+<clipPath id="clip2">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip3">
+  <path d="M 414 216 L 864 216 L 864 460 L 414 460 Z M 414 216 "/>
+</clipPath>
+<clipPath id="clip4">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip5">
+  <path d="M 456 216 L 510 216 L 510 269 L 456 269 Z M 456 216 "/>
+</clipPath>
+<clipPath id="clip6">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip7">
+  <path d="M 731 161 L 785 161 L 785 215 L 731 215 Z M 731 161 "/>
+</clipPath>
+<clipPath id="clip8">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip9">
+  <path d="M 763 216 L 816 216 L 816 269 L 763 269 Z M 763 216 "/>
+</clipPath>
+<clipPath id="clip10">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip11">
+  <path d="M 700 216 L 753 216 L 753 269 L 700 269 Z M 700 216 "/>
+</clipPath>
+<clipPath id="clip12">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip13">
+  <path d="M 217 544 L 1064 544 L 1064 578 L 217 578 Z M 217 544 "/>
+</clipPath>
+<clipPath id="clip14">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+<clipPath id="clip15">
+  <path d="M 5 5 L 1275 5 L 1275 635 L 5 635 Z M 5 5 "/>
+</clipPath>
+<clipPath id="clip16">
+  <path d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</clipPath>
+</defs>
+<g id="surface855">
+<g clip-path="url(#clip1)" clip-rule="nonzero">
+<g clip-path="url(#clip2)" clip-rule="nonzero">
+<rect x="0" y="0" width="1280" height="640" fill="url(#gradient1)"/>
+</g>
+</g>
+<g clip-path="url(#clip3)" clip-rule="nonzero">
+<g clip-path="url(#clip4)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 501.804688 406.21875 C 501.804688 417.523438 500.535156 426.648438 497.992188 433.601562 C 495.449219 440.554688 491.835938 445.953125 487.140625 449.796875 C 482.453125 453.640625 476.828125 456.210938 470.273438 457.511719 C 463.714844 458.804688 456.421875 459.457031 448.398438 459.457031 C 437.539062 459.457031 429.234375 457.765625 423.472656 454.371094 C 417.707031 450.976562 414.824219 446.914062 414.824219 442.160156 C 414.824219 438.203125 416.429688 434.871094 419.652344 432.15625 C 422.875 429.445312 427.199219 428.09375 432.625 428.09375 C 436.695312 428.09375 439.949219 429.195312 442.375 431.398438 C 444.804688 433.601562 446.816406 435.78125 448.398438 437.921875 C 450.203125 440.296875 451.734375 441.878906 452.976562 442.671875 C 454.21875 443.460938 455.347656 443.859375 456.367188 443.859375 C 458.511719 443.859375 460.152344 442.585938 461.28125 440.046875 C 462.414062 437.5 462.976562 432.554688 462.976562 425.210938 L 462.976562 285.828125 L 501.804688 275.144531 Z M 557.109375 278.871094 L 557.109375 368.570312 C 557.109375 371.0625 557.585938 373.40625 558.546875 375.609375 C 559.507812 377.8125 560.839844 379.710938 562.53125 381.292969 C 564.234375 382.875 566.207031 384.148438 568.46875 385.109375 C 570.730469 386.070312 573.164062 386.546875 575.765625 386.546875 C 578.699219 386.546875 582.039062 384.902344 586.105469 382.054688 C 592.546875 377.542969 596.445312 374.445312 596.445312 370.773438 C 596.445312 369.898438 596.445312 278.871094 596.445312 278.871094 L 635.109375 278.871094 L 635.109375 406.21875 L 596.445312 406.21875 L 596.445312 394.347656 C 591.359375 398.640625 585.9375 402.085938 580.171875 404.6875 C 574.40625 407.289062 568.808594 408.59375 563.382812 408.59375 C 557.054688 408.59375 551.171875 407.542969 545.746094 405.453125 C 540.320312 403.359375 535.574219 400.507812 531.507812 396.890625 C 527.4375 393.273438 524.242188 389.03125 521.921875 384.171875 C 519.609375 379.3125 518.445312 374.109375 518.445312 368.570312 L 518.445312 278.871094 Z M 689.324219 406.21875 L 650.835938 406.21875 L 650.835938 227.664062 L 689.324219 216.984375 Z M 705.402344 285.828125 L 744.0625 275.144531 L 744.0625 406.21875 L 705.402344 406.21875 Z M 824.839844 340.421875 C 821.109375 342.011719 817.34375 343.84375 813.5625 345.9375 C 809.773438 348.03125 806.324219 350.320312 803.21875 352.804688 C 800.109375 355.289062 797.59375 357.949219 795.671875 360.773438 C 793.75 363.597656 792.789062 366.539062 792.789062 369.59375 C 792.789062 371.964844 793.101562 374.257812 793.71875 376.460938 C 794.34375 378.664062 795.21875 380.554688 796.351562 382.136719 C 797.484375 383.71875 798.726562 384.992188 800.085938 385.953125 C 801.4375 386.914062 802.90625 387.390625 804.492188 387.390625 C 807.65625 387.390625 810.847656 386.429688 814.070312 384.507812 C 817.292969 382.585938 820.878906 380.164062 824.839844 377.21875 Z M 863.664062 406.21875 L 824.839844 406.21875 L 824.839844 396.039062 C 822.6875 397.851562 820.597656 399.523438 818.5625 401.046875 C 816.53125 402.570312 814.242188 403.898438 811.695312 405.03125 C 809.15625 406.160156 806.300781 407.03125 803.132812 407.660156 C 799.96875 408.28125 796.234375 408.59375 791.945312 408.59375 C 786.0625 408.59375 780.777344 407.742188 776.089844 406.046875 C 771.398438 404.351562 767.414062 402.035156 764.136719 399.09375 C 760.855469 396.15625 758.34375 392.679688 756.589844 388.664062 C 754.835938 384.65625 753.960938 380.273438 753.960938 375.527344 C 753.960938 370.664062 754.890625 366.257812 756.757812 362.296875 C 758.625 358.34375 761.164062 354.785156 764.390625 351.617188 C 767.609375 348.453125 771.367188 345.625 775.664062 343.136719 C 779.957031 340.652344 784.570312 338.359375 789.484375 336.269531 C 794.398438 334.179688 799.515625 332.261719 804.828125 330.507812 C 810.140625 328.757812 815.398438 327.085938 820.597656 325.507812 L 824.839844 324.484375 L 824.839844 311.941406 C 824.839844 303.804688 823.28125 298.039062 820.175781 294.644531 C 817.0625 291.25 812.910156 289.558594 807.710938 289.558594 C 801.609375 289.558594 797.367188 291.03125 794.992188 293.964844 C 792.617188 296.90625 791.429688 300.460938 791.429688 304.648438 C 791.429688 307.019531 791.179688 309.339844 790.667969 311.601562 C 790.164062 313.863281 789.285156 315.835938 788.042969 317.539062 C 786.800781 319.230469 785.019531 320.589844 782.699219 321.601562 C 780.382812 322.625 777.472656 323.132812 773.96875 323.132812 C 768.542969 323.132812 764.136719 321.574219 760.742188 318.46875 C 757.351562 315.359375 755.65625 311.429688 755.65625 306.679688 C 755.65625 302.273438 757.152344 298.179688 760.148438 294.390625 C 763.148438 290.601562 767.183594 287.351562 772.273438 284.640625 C 777.359375 281.921875 783.238281 279.78125 789.90625 278.195312 C 796.574219 276.617188 803.640625 275.824219 811.101562 275.824219 C 820.257812 275.824219 828.144531 276.644531 834.757812 278.28125 C 841.367188 279.921875 846.824219 282.265625 851.117188 285.316406 C 855.414062 288.367188 858.578125 292.042969 860.609375 296.335938 C 862.648438 300.632812 863.664062 305.445312 863.664062 310.75 Z M 863.664062 406.21875 "/>
+</g>
+</g>
+<g clip-path="url(#clip5)" clip-rule="nonzero">
+<g clip-path="url(#clip6)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(25.1%,38.8%,84.7%);fill-opacity:1;" d="M 509.429688 242.382812 C 509.429688 256.886719 497.675781 268.632812 483.179688 268.632812 C 468.683594 268.632812 456.929688 256.886719 456.929688 242.382812 C 456.929688 227.890625 468.683594 216.132812 483.179688 216.132812 C 497.675781 216.132812 509.429688 227.890625 509.429688 242.382812 "/>
+</g>
+</g>
+<g clip-path="url(#clip7)" clip-rule="nonzero">
+<g clip-path="url(#clip8)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(22%,59.6%,14.9%);fill-opacity:1;" d="M 784.453125 187.757812 C 784.453125 202.253906 772.695312 214.007812 758.203125 214.007812 C 743.699219 214.007812 731.953125 202.253906 731.953125 187.757812 C 731.953125 173.261719 743.699219 161.507812 758.203125 161.507812 C 772.695312 161.507812 784.453125 173.261719 784.453125 187.757812 "/>
+</g>
+</g>
+<g clip-path="url(#clip9)" clip-rule="nonzero">
+<g clip-path="url(#clip10)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(58.4%,34.5%,69.8%);fill-opacity:1;" d="M 815.980469 242.382812 C 815.980469 256.886719 804.226562 268.632812 789.730469 268.632812 C 775.234375 268.632812 763.480469 256.886719 763.480469 242.382812 C 763.480469 227.890625 775.234375 216.132812 789.730469 216.132812 C 804.226562 216.132812 815.980469 227.890625 815.980469 242.382812 "/>
+</g>
+</g>
+<g clip-path="url(#clip11)" clip-rule="nonzero">
+<g clip-path="url(#clip12)" clip-rule="nonzero">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(79.6%,23.5%,20%);fill-opacity:1;" d="M 752.910156 242.382812 C 752.910156 256.886719 741.15625 268.632812 726.660156 268.632812 C 712.15625 268.632812 700.410156 256.886719 700.410156 242.382812 C 700.410156 227.890625 712.15625 216.132812 726.660156 216.132812 C 741.15625 216.132812 752.910156 227.890625 752.910156 242.382812 "/>
+</g>
+</g>
+<g clip-path="url(#clip13)" clip-rule="nonzero">
+<g clip-path="url(#clip14)" clip-rule="nonzero">
+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">
+  <use xlink:href="#glyph0-1" x="215.345" y="570"/>
+  <use xlink:href="#glyph0-2" x="234.49" y="570"/>
+  <use xlink:href="#glyph0-3" x="243.59" y="570"/>
+  <use xlink:href="#glyph0-1" x="263.05" y="570"/>
+  <use xlink:href="#glyph0-4" x="282.195" y="570"/>
+  <use xlink:href="#glyph0-5" x="296.51" y="570"/>
+  <use xlink:href="#glyph0-6" x="316.075" y="570"/>
+  <use xlink:href="#glyph0-7" x="335.22" y="570"/>
+  <use xlink:href="#glyph0-8" x="348.625" y="570"/>
+  <use xlink:href="#glyph0-9" x="362.17" y="570"/>
+  <use xlink:href="#glyph0-7" x="381.665" y="570"/>
+  <use xlink:href="#glyph0-10" x="395.07" y="570"/>
+  <use xlink:href="#glyph0-11" x="424.295" y="570"/>
+  <use xlink:href="#glyph0-12" x="442.775" y="570"/>
+  <use xlink:href="#glyph0-13" x="461.92" y="570"/>
+  <use xlink:href="#glyph0-6" x="480.715" y="570"/>
+  <use xlink:href="#glyph0-14" x="499.86" y="570"/>
+  <use xlink:href="#glyph0-9" x="506.86" y="570"/>
+  <use xlink:href="#glyph0-5" x="526.355" y="570"/>
+  <use xlink:href="#glyph0-6" x="545.92" y="570"/>
+  <use xlink:href="#glyph0-12" x="565.065" y="570"/>
+  <use xlink:href="#glyph0-14" x="584.21" y="570"/>
+  <use xlink:href="#glyph0-15" x="591.21" y="570"/>
+  <use xlink:href="#glyph0-9" x="608.815" y="570"/>
+  <use xlink:href="#glyph0-16" x="628.31" y="570"/>
+  <use xlink:href="#glyph0-7" x="647.385" y="570"/>
+  <use xlink:href="#glyph0-13" x="660.79" y="570"/>
+  <use xlink:href="#glyph0-6" x="679.585" y="570"/>
+  <use xlink:href="#glyph0-14" x="698.73" y="570"/>
+  <use xlink:href="#glyph0-5" x="705.73" y="570"/>
+  <use xlink:href="#glyph0-7" x="725.295" y="570"/>
+  <use xlink:href="#glyph0-9" x="738.7" y="570"/>
+  <use xlink:href="#glyph0-3" x="758.195" y="570"/>
+  <use xlink:href="#glyph0-7" x="777.655" y="570"/>
+  <use xlink:href="#glyph0-11" x="791.06" y="570"/>
+  <use xlink:href="#glyph0-10" x="809.54" y="570"/>
+  <use xlink:href="#glyph0-10" x="838.765" y="570"/>
+  <use xlink:href="#glyph0-2" x="867.99" y="570"/>
+  <use xlink:href="#glyph0-12" x="877.09" y="570"/>
+  <use xlink:href="#glyph0-3" x="896.235" y="570"/>
+  <use xlink:href="#glyph0-14" x="915.695" y="570"/>
+  <use xlink:href="#glyph0-17" x="922.695" y="570"/>
+  <use xlink:href="#glyph0-11" x="931.41" y="570"/>
+  <use xlink:href="#glyph0-12" x="949.89" y="570"/>
+  <use xlink:href="#glyph0-3" x="969.035" y="570"/>
+  <use xlink:href="#glyph0-16" x="988.495" y="570"/>
+  <use xlink:href="#glyph0-11" x="1007.57" y="570"/>
+  <use xlink:href="#glyph0-3" x="1026.05" y="570"/>
+  <use xlink:href="#glyph0-6" x="1045.51" y="570"/>
+</g>
+</g>
+</g>
+<g clip-path="url(#clip15)" clip-rule="nonzero">
+<g clip-path="url(#clip16)" clip-rule="nonzero">
+<path style="fill:none;stroke-width:3;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 640 635 L 25 635 C 13.953125 635 5 626.046875 5 615 L 5 25 C 5 13.953125 13.953125 5 25 5 L 1255 5 C 1266.046875 5 1275 13.953125 1275 25 L 1275 615 C 1275 626.046875 1266.046875 635 1255 635 Z M 640 635 "/>
+</g>
+</g>
+</g>
+</svg>
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index 7922dd7d67861..7e45e2176478d 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -59,14 +59,16 @@ However, you can create variables with names:
 Finally:
 `where` is parsed as an infix operator for writing parametric method and type definitions;
 `in` and `isa` are parsed as infix operators;
-and `outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop.
-Creation of variables named `where`, `in`, `isa` or `outer` is allowed though.
+`outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop;
+and `as` is used as a keyword to rename an identifier brought into scope by `import` or `using`.
+Creation of variables named `where`, `in`, `isa`, `outer` and `as` is allowed, though.
 
 ```@docs
 module
 export
 import
 using
+as
 baremodule
 function
 macro
@@ -152,6 +154,7 @@ Base.promote
 Base.oftype
 Base.widen
 Base.identity
+Base.WeakRef
 ```
 
 ## Properties of Types
@@ -290,7 +293,6 @@ Base.@label
 Base.@simd
 Base.@polly
 Base.@generated
-Base.@pure
 Base.@assume_effects
 Base.@deprecate
 ```
diff --git a/doc/src/base/constants.md b/doc/src/base/constants.md
index 4ba0e627b0c54..14ddbc02698d0 100644
--- a/doc/src/base/constants.md
+++ b/doc/src/base/constants.md
@@ -23,6 +23,3 @@ See also:
   * [`stderr`](@ref)
   * [`ENV`](@ref)
   * [`ENDIAN_BOM`](@ref)
-  * `Libc.MS_ASYNC`
-  * `Libc.MS_INVALIDATE`
-  * `Libc.MS_SYNC`
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index bdf91c991183f..62368424629c6 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -208,5 +208,5 @@ The complete list is in the parser code:
 Those that are parsed like `*` (in terms of precedence) include
 `* / ÷ % & ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗`
 and those that are parsed like `+` include
-`+ - |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣`
+`+ - |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⟇ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣`
 There are many others that are related to arrows, comparisons, and powers.
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 4932aef4cc938..fb75b21479707 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -10,6 +10,7 @@ Base.Threads.nthreads
 Base.Threads.threadpool
 Base.Threads.nthreadpools
 Base.Threads.threadpoolsize
+Base.Threads.ngcthreads
 ```
 
 See also [Multi-Threading](@ref man-multithreading).
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index ee84f4b8b445d..c9f24429fd0e5 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -26,8 +26,6 @@ Base.schedule
 
 ## [Synchronization](@id lib-task-sync)
 
-## Synchronization
-
 ```@docs
 Base.errormonitor
 Base.@sync
diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md
index 41b7096391a04..16e1839cf64a2 100644
--- a/doc/src/base/sort.md
+++ b/doc/src/base/sort.md
@@ -168,9 +168,11 @@ Base.Sort.defalg(::AbstractArray{<:Union{SmallInlineStrings, Missing}}) = Inline
 By default, `sort` and related functions use [`isless`](@ref) to compare two
 elements in order to determine which should come first. The
 [`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining
-alternate orderings on the same set of elements. Instances of `Ordering` define
-a [total order](https://en.wikipedia.org/wiki/Total_order) on a set of elements,
-so that for any elements `a`, `b`, `c` the following hold:
+alternate orderings on the same set of elements: when calling a sorting function like
+`sort`, an instance of `Ordering` can be provided with the keyword argument `order`.
+
+Instances of `Ordering` define a [total order](https://en.wikipedia.org/wiki/Total_order)
+on a set of elements, so that for any elements `a`, `b`, `c` the following hold:
 
 * Exactly one of the following is true: `a` is less than `b`, `b` is less than
   `a`, or `a` and `b` are equal (according to [`isequal`](@ref)).
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index 9ada683b1ddb0..9fd03ad9a667a 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -249,14 +249,16 @@ types exist in lowered form:
     While almost every part of a surface AST is represented by an `Expr`, the IR uses only a
     limited number of `Expr`s, mostly for calls and some top-level-only forms.
 
-  * `Slot`
+  * `SlotNumber`
 
-    Identifies arguments and local variables by consecutive numbering. `Slot` is an abstract type
-    with subtypes `SlotNumber` and `TypedSlot`. Both types have an integer-valued `id` field giving
-    the slot index. Most slots have the same type at all uses, and so are represented with `SlotNumber`.
-    The types of these slots are found in the `slottypes` field of their `CodeInfo` object.
-    Slots that require per-use type annotations are represented with `TypedSlot`, which has a `typ`
-    field.
+    Identifies arguments and local variables by consecutive numbering. It has an
+    integer-valued `id` field giving the slot index.
+    The types of these slots can be found in the `slottypes` field of their `CodeInfo` object.
+    When a slot has different types at different uses and thus requires per-use type annotations,
+    they are converted to temporary `Core.Compiler.TypedSlot` object. This object has an
+    additional `typ` field as well as the `id` field. Note that `Core.Compiler.TypedSlot`
+    only appears in an unoptimized lowered form that is scheduled for optimization,
+    and it never appears elsewhere.
 
   * `Argument`
 
@@ -322,7 +324,7 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
   * `=`
 
-    Assignment. In the IR, the first argument is always a Slot or a GlobalRef.
+    Assignment. In the IR, the first argument is always a `SlotNumber` or a `GlobalRef`.
 
   * `method`
 
@@ -581,7 +583,7 @@ A unique'd container describing the shared metadata for a single method.
     Pointers to non-AST things that have been interpolated into the AST, required by
     compression of the AST, type-inference, or the generation of native code.
 
-  * `nargs`, `isva`, `called`, `isstaged`, `pure`
+  * `nargs`, `isva`, `called`, `is_for_opaque_closure`,
 
     Descriptive bit-fields for the source code of this Method.
 
@@ -655,7 +657,7 @@ for important details on how to modify these fields safely.
     The ABI to use when calling `fptr`. Some significant ones include:
 
       * 0 - Not compiled yet
-      * 1 - JL_CALLABLE `jl_value_t *(*)(jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
+      * 1 - `JL_CALLABLE` `jl_value_t *(*)(jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
       * 2 - Constant (value stored in `rettype_const`)
       * 3 - With Static-parameters forwarded `jl_value_t *(*)(jl_svec_t *sparams, jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
       * 4 - Run in interpreter `jl_value_t *(*)(jl_method_instance_t *meth, jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
@@ -683,10 +685,10 @@ A (usually temporary) container for holding lowered source code.
 
     A `UInt8` array of slot properties, represented as bit flags:
 
-      * 2  - assigned (only false if there are *no* assignment statements with this var on the left)
-      * 8  - const (currently unused for local variables)
-      * 16 - statically assigned once
-      * 32 - might be used before assigned. This flag is only valid after type inference.
+      * 0x02 - assigned (only false if there are *no* assignment statements with this var on the left)
+      * 0x08 - used (if there is any read or write of the slot)
+      * 0x10 - statically assigned once
+      * 0x20 - might be used before assigned. This flag is only valid after type inference.
 
   * `ssavaluetypes`
 
@@ -759,11 +761,6 @@ Boolean properties:
     Whether this should propagate `@inbounds` when inlined for the purpose of eliding
     `@boundscheck` blocks.
 
-  * `pure`
-
-    Whether this is known to be a pure function of its arguments, without respect to the
-    state of the method caches or other mutable global state.
-
 
 `UInt8` settings:
 
diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md
index e812e383c0592..ad3871c2e70f0 100644
--- a/doc/src/devdocs/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -144,7 +144,7 @@ Notes for various architectures:
 Building Julia requires that the following software be installed:
 
 - **[GNU make]**                — building dependencies.
-- **[gcc & g++][gcc]** (>= 5.1) or **[Clang][clang]** (>= 3.5, >= 6.0 for Apple Clang) — compiling and linking C, C++.
+- **[gcc & g++][gcc]** (>= 7.1) or **[Clang][clang]** (>= 5.0, >= 9.3 for Apple Clang) — compiling and linking C, C++.
 - **[libatomic][gcc]**          — provided by **[gcc]** and needed to support atomic operations.
 - **[python]** (>=2.7)          — needed to build LLVM.
 - **[gfortran]**                — compiling and linking Fortran libraries.
@@ -169,7 +169,7 @@ repository) and then compiled from source the first time you run
 `make`. The specific version numbers of these libraries that Julia
 uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/julia/blob/master/deps/):
 
-- **[LLVM]** (14.0 + [patches](https://github.com/JuliaLang/llvm-project)) — compiler infrastructure (see [note below](#llvm)).
+- **[LLVM]** (15.0 + [patches](https://github.com/JuliaLang/llvm-project/tree/julia-release/15.x)) — compiler infrastructure (see [note below](#llvm)).
 - **[FemtoLisp]**            — packaged with Julia source, and used to implement the compiler front-end.
 - **[libuv]**  (custom fork) — portable, high-performance event-based I/O library.
 - **[OpenLibm]**             — portable libm library containing elementary math functions.
@@ -187,6 +187,7 @@ uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/juli
 - **[mbedtls]**              — library used for cryptography and transport layer security, used by libssh2
 - **[utf8proc]**             — a library for processing UTF-8 encoded Unicode strings.
 - **[LLVM libunwind]**       — LLVM's fork of [libunwind], a library that determines the call-chain of a program.
+- **[ITTAPI]**               — Intel's Instrumentation and Tracing Technology and Just-In-Time API.
 
 [GNU make]:     https://www.gnu.org/software/make
 [patch]:        https://www.gnu.org/software/patch
@@ -222,6 +223,7 @@ uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/juli
 [pkg-config]:   https://www.freedesktop.org/wiki/Software/pkg-config/
 [powershell]:   https://docs.microsoft.com/en-us/powershell/scripting/wmf/overview
 [which]:        https://carlowood.github.io/which/
+[ITTAPI]:       https://github.com/intel/ittapi
 
 ## Build dependencies
 
@@ -236,7 +238,7 @@ The most complicated dependency is LLVM, for which we require additional patches
 For packaging Julia with LLVM, we recommend either:
  - bundling a Julia-only LLVM library inside the Julia package, or
  - adding the patches to the LLVM package of the distribution.
-   * A complete list of patches is available in `deps/llvm.mk`, and the patches themselves are in `deps/patches/`.
+   * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/15.x` branch.
    * The only Julia-specific patch is the lib renaming (`llvm-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM.
    * The remaining patches are all upstream bug fixes, and have been contributed into upstream LLVM.
 
diff --git a/doc/src/devdocs/build/windows.md b/doc/src/devdocs/build/windows.md
index 4f7f40a030488..7192bb8a7a544 100644
--- a/doc/src/devdocs/build/windows.md
+++ b/doc/src/devdocs/build/windows.md
@@ -90,7 +90,8 @@ MinGW-w64 compilers available through Cygwin's package manager.
 
     3. Start the build
        ```sh
-       make -j 4   # Adjust the number of threads (4) to match your build environment.
+       make -j 4       # Adjust the number of threads (4) to match your build environment.
+       make -j 4 debug # This builds julia-debug.exe
        ```
 
 
diff --git a/doc/src/devdocs/external_profilers.md b/doc/src/devdocs/external_profilers.md
new file mode 100644
index 0000000000000..956d66508fc89
--- /dev/null
+++ b/doc/src/devdocs/external_profilers.md
@@ -0,0 +1,98 @@
+# External Profiler Support
+
+Julia provides explicit support for some external tracing profilers, enabling you to obtain a high-level overview of the runtime's execution behavior.
+
+The currently supported profilers are:
+- [Tracy](https://github.com/wolfpld/tracy)
+- [Intel VTune (ITTAPI)](https://github.com/intel/ittapi)
+
+### Adding New Zones
+
+To add new zones, use the `JL_TIMING` macro. You can find numerous examples throughout the codebase by searching for `JL_TIMING`. To add a new type of zone
+you add it to `JL_TIMING_OWNERS` (and possibly `JL_TIMING_EVENTS`).
+
+### Dynamically Enabling and Disabling Zones
+
+The `JULIA_TIMING_SUBSYSTEMS` environment variable allows you to enable or disable zones for a specific Julia run. For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable the `INFERENCE`
+zones.
+
+## Tracy Profiler
+
+[Tracy](https://github.com/wolfpld/tracy)  is a flexible profiler that can be optionally integrated with Julia.
+
+A typical Tracy session might look like this:
+
+![Typical Tracy usage](tracy.png)
+
+### Building Julia with Tracy
+
+To enable Tracy integration, build Julia with the extra option `WITH_TRACY=1` in the `Make.user` file.
+
+### Installing the Tracy Profile Viewer
+
+The easiest way to obtain the profile viewer is by adding the `TracyProfiler_jll` package and launching the profiler with:
+
+```julia
+run(TracyProfiler_jll.tracy())
+```
+
+!!! note
+    On macOS, you may want to set the `TRACY_DPI_SCALE` environment variable to `1.0` if the UI elements in the profiler appear excessively large.
+
+To run a "headless" instance that saves the trace to disk, use `TracyProfiler_jll.capture() -o mytracefile.tracy` instead.
+
+For information on using the Tracy UI, refer to the Tracy manual.
+
+### Profiling Julia with Tracy
+
+A typical workflow for profiling Julia with Tracy involves starting Julia using:
+
+```julia
+JULIA_WAIT_FOR_TRACY=1 ./julia -e '...'
+```
+
+The environment variable ensures that Julia waits until it has successfully connected to the Tracy profiler before continuing execution. Afterward, use the Tracy profiler UI, click `Connect`, and Julia execution should resume and profiling should start.
+
+### Profiling package precompilation with Tracy
+
+To profile a package precompilation process it is easiest to explicitly call into `Base.compilecache` with the package you want to precompile:
+
+```julia
+pkg = Base.identify_package("SparseArrays")
+withenv("JULIA_WAIT_FOR_TRACY" => 1, "TRACY_PORT" => 9001) do
+    Base.compilecache(pkg)
+end
+```
+
+Here, we use a custom port for tracy which makes it easier to find the correct client in the Tracy UI to connect to.
+
+### Adding metadata to zones
+
+The various `jl_timing_show_*` and `jl_timing_printf` functions can be used to attach a string (or strings) to a zone. For example, the trace zone for inference shows the method instance that is being inferred.
+
+The `TracyCZoneColor` function can be used to set the color of a certain zone. Search through the codebase to see how it is used.
+
+### Viewing Tracy files in your browser
+
+Visit https://topolarity.github.io/trace-viewer/ for an (experimental) web viewer for Tracy traces.
+
+You can open a local `.tracy` file or provide a URL from the web (e.g. a file in a Github repo). If you load a trace file from the web, you can also share the page URL directly with others, enabling them to view the same trace.
+
+### Enabling stack trace samples
+
+To enable call stack sampling in Tracy, build Julia with these options in your `Make.user` file:
+```
+WITH_TRACY := 1
+WITH_TRACY_CALLSTACKS := 1
+USE_BINARYBUILDER_LIBTRACYCLIENT := 0
+```
+
+You may also need to run `make -C deps clean-libtracyclient` to force a re-build of Tracy.
+
+This feature has a significant impact on trace size and profiling overhead, so it is recommended to leave call stack sampling off when possible, especially if you intend to share your trace files online.
+
+Note that the Julia JIT runtime does not yet have integration for Tracy's symbolification, so Julia functions will typically be unknown in these stack traces.
+
+## Intel VTune (ITTAPI) Profiler
+
+*This section is yet to be written.*
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index 5c54ae3c7121d..b6614d060a0c8 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -6,8 +6,8 @@ In Julia compiler, "type inference" refers to the process of deducing the types
 values from the types of input values. Julia's approach to inference has been described in
 the blog posts below:
 1. [Shows a simplified implementation of the data-flow analysis algorithm, that Julia's type inference routine is based on.](https://aviatesk.github.io/posts/data-flow-problem/)
-2. [Gives a high level view of inference with a focus on its inter-procedural convergence guarantee.](https://juliacomputing.com/blog/2016/04/inference-convergence/)
-3. [Explains a refinement on the algorithm introduced in 2.](https://juliacomputing.com/blog/2017/05/inference-converage2/)
+2. [Gives a high level view of inference with a focus on its inter-procedural convergence guarantee.](https://info.juliahub.com/inference-convergence-algorithm-in-julia)
+3. [Explains a refinement on the algorithm introduced in 2.](https://info.juliahub.com/inference-convergence-algorithm-in-julia-revisited)
 
 ## Debugging compiler.jl
 
diff --git a/doc/src/devdocs/init.md b/doc/src/devdocs/init.md
index 981a19b13fcf3..1e0e1173f8695 100644
--- a/doc/src/devdocs/init.md
+++ b/doc/src/devdocs/init.md
@@ -118,7 +118,7 @@ Other signals (`SIGINFO, SIGBUS, SIGILL, SIGTERM, SIGABRT, SIGQUIT, SIGSYS` and
 hooked up to [`sigdie_handler()`](https://github.com/JuliaLang/julia/blob/master/src/signals-unix.c)
 which prints a backtrace.
 
-[`jl_init_restored_modules()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) calls
+[`jl_init_restored_module()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) calls
 [`jl_module_run_initializer()`](https://github.com/JuliaLang/julia/blob/master/src/module.c) for
 each deserialized module to run the `__init__()` function.
 
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 6c6b33c2281dc..93b241d703714 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -82,6 +82,8 @@ Here are example settings using `bash` syntax:
   * `export JULIA_LLVM_ARGS=-debug-only=loop-vectorize` dumps LLVM `DEBUG(...)` diagnostics for
     loop vectorizer. If you get warnings about "Unknown command line argument", rebuild LLVM with
     `LLVM_ASSERTIONS = 1`.
+  * `export JULIA_LLVM_ARGS=-help` shows a list of available options.
+  * `export JULIA_LLVM_ARGS="-fatal-warnings -print-options"` is an example how to use multiple options.
 
 ## Debugging LLVM transformations in isolation
 
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index cf377c052bf15..caba6c3f12190 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -189,6 +189,8 @@ then tagged with its type:
 jl_value_t *jl_gc_allocobj(size_t nbytes);
 void jl_set_typeof(jl_value_t *v, jl_datatype_t *type);
 ```
+!!! note "Out of date Warning"
+    The documentation and usage for the function `jl_gc_allocobj` may be out of date
 
 Note that all objects are allocated in multiples of 4 bytes and aligned to the platform pointer
 size. Memory is allocated from a pool for smaller objects, or directly with `malloc()` for large
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
index 5cfd9f6a762f8..d15723e945462 100644
--- a/doc/src/devdocs/probes.md
+++ b/doc/src/devdocs/probes.md
@@ -27,28 +27,28 @@ to enable USDT probes.
 > readelf -n usr/lib/libjulia-internal.so.1
 
 Displaying notes found in: .note.gnu.build-id
-  Owner                Data size 	Description
-  GNU                  0x00000014	NT_GNU_BUILD_ID (unique build ID bitstring)
+  Owner                Data size  Description
+  GNU                  0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring)
     Build ID: 57161002f35548772a87418d2385c284ceb3ead8
 
 Displaying notes found in: .note.stapsdt
-  Owner                Data size 	Description
-  stapsdt              0x00000029	NT_STAPSDT (SystemTap probe descriptors)
+  Owner                Data size  Description
+  stapsdt              0x00000029 NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__begin
     Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
     Arguments:
-  stapsdt              0x00000032	NT_STAPSDT (SystemTap probe descriptors)
+  stapsdt              0x00000032 NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__stop_the_world
     Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
     Arguments:
-  stapsdt              0x00000027	NT_STAPSDT (SystemTap probe descriptors)
+  stapsdt              0x00000027 NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__end
     Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
     Arguments:
-  stapsdt              0x0000002d	NT_STAPSDT (SystemTap probe descriptors)
+  stapsdt              0x0000002d NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__finalizer
     Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
@@ -308,7 +308,7 @@ An example probe in the bpftrace format looks like:
 ```
 usdt:usr/lib/libjulia-internal.so:julia:gc__begin
 {
-	@start[pid] = nsecs;
+  @start[pid] = nsecs;
 }
 ```
 
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index a21e3ba265f9b..6706e30ce97b1 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -8,6 +8,9 @@ as many platforms as possible, so as to give vastly improved startup times.  On
 not ship with a precompiled system image file, one can be generated from the source files shipped
 in Julia's `DATAROOTDIR/julia/base` folder.
 
+Julia will by default generate its system image on half of the available system threads. This
+may be controlled by the [`JULIA_IMAGE_THREADS`](@ref env-image-threads) environment variable.
+
 This operation is useful for multiple reasons.  A user may:
 
   * Build a precompiled shared library system image on a platform that did not ship with one, thereby
@@ -101,7 +104,7 @@ See code comments for each components for more implementation details.
     (see comments in `MultiVersioning::runOnModule` for how this is done),
     the pass also generates metadata so that the runtime can load and initialize the
     system image correctly.
-    A detail description of the metadata is available in `src/processor.h`.
+    A detailed description of the metadata is available in `src/processor.h`.
 
 2. System image loading
 
diff --git a/doc/src/devdocs/tracy.png b/doc/src/devdocs/tracy.png
new file mode 100644
index 0000000000000..a0371be9db63e
Binary files /dev/null and b/doc/src/devdocs/tracy.png differ
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index d8ec8110b6d18..eab901adc2043 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -1118,9 +1118,7 @@ For more details on how to pass callbacks to C libraries, see this [blog post](h
 
 ## C++
 
-For direct C++ interfacing, see the [Cxx](https://github.com/Keno/Cxx.jl) package. For tools to create C++
-bindings, see the [CxxWrap](https://github.com/JuliaInterop/CxxWrap.jl) package.
-
+For tools to create C++ bindings, see the [CxxWrap](https://github.com/JuliaInterop/CxxWrap.jl) package.
 
 
 [^1]: Non-library function calls in both C and Julia can be inlined and thus may have
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index e35cbf5e313e7..8164299f01250 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -69,7 +69,7 @@ Note that although you should have a `~/.julia` directory once you've run Julia
 first time, you may need to create the `~/.julia/config` folder and the
 `~/.julia/config/startup.jl` file if you use it.
 
-To have startup code run only in [The Julia REPL] (and not when `julia` is *e.g.* run
+To have startup code run only in [The Julia REPL](@ref) (and not when `julia` is *e.g.* run
 on a script), use [`atreplinit`](@ref) in `startup.jl`:
 
 ```julia
@@ -106,8 +106,9 @@ The following is a complete list of command-line switches available when launchi
 |`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
 |`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
 |`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
-|`-t`, `--threads {N\|auto`}            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
-|`-p`, `--procs {N\|auto`}              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
+|`-t`, `--threads {N\|auto}`            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
+| `--gcthreads {N}`                     |Enable N GC threads; If unspecified is set to half of the compute worker threads.|
+|`-p`, `--procs {N\|auto}`              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
 |`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
 |`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
 |`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
@@ -125,8 +126,10 @@ The following is a complete list of command-line switches available when launchi
 |`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations ($)|
 |`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
 |`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)|
+|`--code-coverage=@<path>`              |Count executions but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--code-coverage=tracefile.info`       |Append coverage information to the LCOV tracefile (filename supports format tokens).|
 |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
+|`--track-allocation=@<path>`           |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--bug-report=KIND`                    |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
 |`--compile={yes*\|no\|all\|min}`       |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
 |`--output-o <name>`                    |Generate an object file (including system image data)|
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index ca6c241ca0583..9cab2ed1e4f24 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -140,7 +140,7 @@ when applied to `-1` versus `-1 + 0im` even though `-1 == -1 + 0im`:
 ```jldoctest
 julia> sqrt(-1)
 ERROR: DomainError with -1.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 
diff --git a/doc/src/manual/constructors.md b/doc/src/manual/constructors.md
index dad96e374742e..6ec206dade335 100644
--- a/doc/src/manual/constructors.md
+++ b/doc/src/manual/constructors.md
@@ -244,8 +244,8 @@ ERROR: UndefRefError: access to undefined reference
 This avoids the need to continually check for `null` values. However, not all object fields are
 references. Julia considers some types to be "plain data", meaning all of their data is self-contained
 and does not reference other objects. The plain data types consist of primitive types (e.g. `Int`)
-and immutable structs of other plain data types. The initial contents of a plain data type is
-undefined:
+and immutable structs of other plain data types (see also: [`isbits`](@ref), [`isbitstype`](@ref)).
+The initial contents of a plain data type is undefined:
 
 ```julia-repl
 julia> struct HasPlain
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index ba78a8c5b1670..5d12530892b1e 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -623,7 +623,7 @@ real value:
 ```jldoctest
 julia> sqrt(-1)
 ERROR: DomainError with -1.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -797,7 +797,7 @@ julia> sqrt_second(9)
 
 julia> sqrt_second(-9)
 ERROR: DomainError with -9.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index 82073c1446bf8..f0c156f21ea62 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -233,11 +233,11 @@ julia> promote(1 + 2im, 3//4)
 ```
 
 Floating-point values are promoted to the largest of the floating-point argument types. Integer
-values are promoted to the larger of either the native machine word size or the largest integer
-argument type. Mixtures of integers and floating-point values are promoted to a floating-point
-type big enough to hold all the values. Integers mixed with rationals are promoted to rationals.
-Rationals mixed with floats are promoted to floats. Complex values mixed with real values are
-promoted to the appropriate kind of complex value.
+values are promoted to the largest of the integer argument types. If the types are the same size
+but differ in signedness, the unsigned type is chosen. Mixtures of integers and floating-point
+values are promoted to a floating-point type big enough to hold all the values. Integers mixed
+with rationals are promoted to rationals. Rationals mixed with floats are promoted to floats.
+Complex values mixed with real values are promoted to the appropriate kind of complex value.
 
 That is really all there is to using promotions. The rest is just a matter of clever application,
 the most typical "clever" application being the definition of catch-all methods for numeric operations
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 9b8a67bb8c4c2..2b6e48c533849 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -6,7 +6,8 @@ calling Julia functions from C code. This can be used to integrate Julia code in
 C/C++ project, without the need to rewrite everything in C/C++. Julia has a C API to make
 this possible. As almost all programming languages have some way to call C functions, the
 Julia C API can also be used to build further language bridges (e.g. calling Julia from
-Python or C#).
+Python, Rust or C#). Even though Rust and C++ can use the C embedding API directly, both
+have packages helping with it, for C++ [Jluna](https://github.com/Clemapfel/jluna) is useful.
 
 ## High-Level Embedding
 
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index f29e5b7aaf8f7..ac5a6fad6cc08 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -78,6 +78,7 @@ and a global configuration search path of
 A directory path that indicates which project should be the initial active project.
 Setting this environment variable has the same effect as specifying the `--project`
 start-up option, but `--project` has higher precedence. If the variable is set to `@.`
+(note the trailing dot)
 then Julia tries to find a project directory that contains `Project.toml` or
 `JuliaProject.toml` file from the current directory and its parents. See also
 the chapter on [Code Loading](@ref code-loading).
@@ -276,7 +277,7 @@ To use Visual Studio Code on Windows, set `$JULIA_EDITOR` to `code.cmd`.
 
 ## Parallelization
 
-### `JULIA_CPU_THREADS`
+### [`JULIA_CPU_THREADS`](@id env-cpu-threads)
 
 Overrides the global variable [`Base.Sys.CPU_THREADS`](@ref), the number of
 logical CPU cores available.
@@ -315,6 +316,27 @@ then spinning threads never sleep. Otherwise, `$JULIA_THREAD_SLEEP_THRESHOLD` is
 interpreted as an unsigned 64-bit integer (`uint64_t`) and gives, in
 nanoseconds, the amount of time after which spinning threads should sleep.
 
+### [`JULIA_NUM_GC_THREADS`](@id env-gc-threads)
+
+Sets the number of threads used by Garbage Collection. If unspecified is set to
+half of the number of worker threads.
+
+!!! compat "Julia 1.10"
+    The environment variable was added in 1.10
+
+### [`JULIA_IMAGE_THREADS`](@id env-image-threads)
+
+An unsigned 32-bit integer that sets the number of threads used by image
+compilation in this Julia process. The value of this variable may be
+ignored if the module is a small module. If left unspecified, the smaller
+of the value of [`JULIA_CPU_THREADS`](@ref env-cpu-threads) or half the
+number of logical CPU cores is used in its place.
+
+### `JULIA_IMAGE_TIMINGS`
+
+A boolean value that determines if detailed timing information is printed during
+during image compilation. Defaults to 0.
+
 ### `JULIA_EXCLUSIVE`
 
 If set to anything besides `0`, then Julia's thread policy is consistent with
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index 741843bca33e5..e3960ee1a4690 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -402,7 +402,7 @@ Certain operations make mathematical sense but result in errors:
 ```jldoctest
 julia> sqrt(-2.0)
 ERROR: DomainError with -2.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -818,10 +818,13 @@ to strings); similarly, `repeat` can be used instead of `^` to repeat strings. T
 
 ### What is the difference between "using" and "import"?
 
-There is only one difference, and on the surface (syntax-wise) it may seem very minor. The difference
-between `using` and `import` is that with `using` you need to say `function Foo.bar(..` to
-extend module Foo's function bar with a new method, but with `import Foo.bar`,
-you only need to say `function bar(...` and it automatically extends module Foo's function bar.
+There are several differences between `using` and `import`
+(see the [Modules section](https://docs.julialang.org/en/v1/manual/modules/#modules)),
+but there is an important difference that may not seem intuitive at first glance,
+and on the surface (i.e. syntax-wise) it may seem very minor. When loading modules with `using`,
+you need to say `function Foo.bar(...` to extend module `Foo`'s function `bar` with a new method,
+but with `import Foo.bar`, you only need to say `function bar(...` and it automatically extends
+module `Foo`'s function `bar`.
 
 The reason this is important enough to have been given separate syntax is that you don't want
 to accidentally extend a function that you didn't know existed, because that could easily cause
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index a504f8e3511b2..8ca00aa1cfe76 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -322,10 +322,10 @@ julia> g(2.0, 3.0)
 ERROR: MethodError: g(::Float64, ::Float64) is ambiguous.
 
 Candidates:
-  g(x::Float64, y)
-    @ Main none:1
   g(x, y::Float64)
     @ Main none:1
+  g(x::Float64, y)
+    @ Main none:1
 
 Possible fix, define
   g(::Float64, ::Float64)
@@ -1247,5 +1247,6 @@ function f2(inc)
         x -> x - 1
     end
 end
+```
 
 [^Clarke61]: Arthur C. Clarke, *Profiles of the Future* (1961): Clarke's Third Law.
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index b012de27ac81f..be64390e473f2 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -72,6 +72,15 @@ julia> Threads.threadid()
     three processes have 2 threads enabled. For more fine grained control over worker
     threads use [`addprocs`](@ref) and pass `-t`/`--threads` as `exeflags`.
 
+### Multiple GC Threads
+
+The Garbage Collector (GC) can use multiple threads. The amount used is either half the number
+of compute worker threads or configured by either the `--gcthreads` command line argument or by using the
+[`JULIA_NUM_GC_THREADS`](@ref env-gc-threads) environment variable.
+
+!!! compat "Julia 1.10"
+    The `--gcthreads` command line argument requires at least Julia 1.10.
+
 ## [Threadpools](@id man-threadpools)
 
 When a program's threads are busy with many tasks to run, tasks may experience
@@ -104,19 +113,26 @@ the `:interactive` threadpool:
 ```julia-repl
 julia> using Base.Threads
 
-julia> nthreads()
-4
-
 julia> nthreadpools()
 2
 
 julia> threadpool()
 :default
 
+julia> nthreads(:default)
+3
+
 julia> nthreads(:interactive)
 1
+
+julia> nthreads()
+3
 ```
 
+!!! note
+    The zero-argument version of `nthreads` returns the number of threads
+    in the default pool.
+
 Either or both numbers can be replaced with the word `auto`, which causes
 Julia to choose a reasonable default.
 
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 1eee23e163a77..ffb84333e8e78 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -584,7 +584,7 @@ h_vararg(x::Vararg{Any, N}) where {N} = tuple(x...)
 Note that [`@code_typed`](@ref) and friends will always show you specialized code, even if Julia
 would not normally specialize that method call. You need to check the
 [method internals](@ref ast-lowered-method) if you want to see whether specializations are generated
-when argument types are changed, i.e., if `(@which f(...)).specializations` contains specializations
+when argument types are changed, i.e., if `Base.specializations(@which f(...))` contains specializations
 for the argument in question.
 
 ## Break functions into multiple definitions
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index b2c9d722f57e6..e5f1d6c417fa6 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -59,12 +59,13 @@ julia> @profile myfunc()
 
 To see the profiling results, there are several graphical browsers.
 One "family" of visualizers is based on [FlameGraphs.jl](https://github.com/timholy/FlameGraphs.jl), with each family member providing a different user interface:
-- [Juno](https://junolab.org/) is a full IDE with built-in support for profile visualization
+- [VS Code](https://www.julia-vscode.org/) is a full IDE with built-in support for profile visualization
 - [ProfileView.jl](https://github.com/timholy/ProfileView.jl) is a stand-alone visualizer based on GTK
 - [ProfileVega.jl](https://github.com/davidanthoff/ProfileVega.jl) uses VegaLight and integrates well with Jupyter notebooks
 - [StatProfilerHTML.jl](https://github.com/tkluck/StatProfilerHTML.jl) produces HTML and presents some additional summaries, and also integrates well with Jupyter notebooks
 - [ProfileSVG.jl](https://github.com/timholy/ProfileSVG.jl) renders SVG
 - [PProf.jl](https://github.com/JuliaPerf/PProf.jl) serves a local website for inspecting graphs, flamegraphs and more
+- [ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl) is a HTML canvas based profile viewer UI, used by the [Julia VS Code extension](https://www.julia-vscode.org/), but can also generate interactive HTML files.
 
 An entirely independent approach to profile visualization is [PProf.jl](https://github.com/vchuravy/PProf.jl), which uses the external `pprof` tool.
 
@@ -308,26 +309,6 @@ and specific lines triggering allocation can often be inferred from profiling vi
 collection that these lines incur. However, sometimes it is more efficient to directly measure
 the amount of memory allocated by each line of code.
 
-### Line-by-Line Allocation Tracking
-
-To measure allocation line-by-line, start Julia with the `--track-allocation=<setting>` command-line
-option, for which you can choose `none` (the default, do not measure allocation), `user` (measure
-memory allocation everywhere except Julia's core code), or `all` (measure memory allocation at
-each line of Julia code). Allocation gets measured for each line of compiled code. When you quit
-Julia, the cumulative results are written to text files with `.mem` appended after the file name,
-residing in the same directory as the source file. Each line lists the total number of bytes
-allocated. The [`Coverage` package](https://github.com/JuliaCI/Coverage.jl) contains some elementary
-analysis tools, for example to sort the lines in order of number of bytes allocated.
-
-In interpreting the results, there are a few important details. Under the `user` setting, the
-first line of any function directly called from the REPL will exhibit allocation due to events
-that happen in the REPL code itself. More significantly, JIT-compilation also adds to allocation
-counts, because much of Julia's compiler is written in Julia (and compilation usually requires
-memory allocation). The recommended procedure is to force compilation by executing all the commands
-you want to analyze, then call [`Profile.clear_malloc_data()`](@ref) to reset all allocation counters.
- Finally, execute the desired commands and quit Julia to trigger the generation of the `.mem`
-files.
-
 ### GC Logging
 
 While [`@time`](@ref) logs high-level stats about memory usage and garbage collection over the course
@@ -337,17 +318,20 @@ and how much garbage it collects each time. This can be enabled with
 [`GC.enable_logging(true)`](@ref), which causes Julia to log to stderr every time
 a garbage collection happens.
 
-### Allocation Profiler
+### [Allocation Profiler](@id allocation-profiler)
+
+!!! compat "Julia 1.8"
+    This functionality requires at least Julia 1.8.
 
 The allocation profiler records the stack trace, type, and size of each
 allocation while it is running. It can be invoked with
 [`Profile.Allocs.@profile`](@ref).
 
 This information about the allocations is returned as an array of `Alloc`
-objects, wrapped in an `AllocResults` object. The best way to visualize
-these is currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
-package, which can visualize the call stacks which are making the most
-allocations.
+objects, wrapped in an `AllocResults` object. The best way to visualize these is
+currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl) and
+[ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl) packages, which
+can visualize the call stacks which are making the most allocations.
 
 The allocation profiler does have significant overhead, so a `sample_rate`
 argument can be passed to speed it up by making it skip some allocations.
@@ -364,6 +348,32 @@ Passing `sample_rate=1.0` will make it record everything (which is slow);
     You can read more about the missing types and the plan to improve this, here:
     [issue #43688](https://github.com/JuliaLang/julia/issues/43688).
 
+#### Line-by-Line Allocation Tracking
+
+An alternative way to measure allocations is to start Julia with the `--track-allocation=<setting>` command-line
+option, for which you can choose `none` (the default, do not measure allocation), `user` (measure
+memory allocation everywhere except Julia's core code), or `all` (measure memory allocation at
+each line of Julia code). Allocation gets measured for each line of compiled code. When you quit
+Julia, the cumulative results are written to text files with `.mem` appended after the file name,
+residing in the same directory as the source file. Each line lists the total number of bytes
+allocated. The [`Coverage` package](https://github.com/JuliaCI/Coverage.jl) contains some elementary
+analysis tools, for example to sort the lines in order of number of bytes allocated.
+
+In interpreting the results, there are a few important details. Under the `user` setting, the
+first line of any function directly called from the REPL will exhibit allocation due to events
+that happen in the REPL code itself. More significantly, JIT-compilation also adds to allocation
+counts, because much of Julia's compiler is written in Julia (and compilation usually requires
+memory allocation). The recommended procedure is to force compilation by executing all the commands
+you want to analyze, then call [`Profile.clear_malloc_data()`](@ref) to reset all allocation counters.
+ Finally, execute the desired commands and quit Julia to trigger the generation of the `.mem`
+files.
+
+!!! note
+
+    `--track-allocation` changes code generation to log the allocations, and so the allocations may
+    be different than what happens without the option. We recommend using the
+    [allocation profiler](@ref allocation-profiler) instead.
+
 ## External Profiling
 
 Currently Julia supports `Intel VTune`, `OProfile` and `perf` as external profiling tools.
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index f73093e9c0b91..fca4fc75d9e0f 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -48,7 +48,7 @@ to a numeric value representing a
 [Unicode code point](https://en.wikipedia.org/wiki/Code_point).  (Julia packages may define
 other subtypes of `AbstractChar`, e.g. to optimize operations for other
 [text encodings](https://en.wikipedia.org/wiki/Character_encoding).) Here is how `Char` values are
-input and shown:
+input and shown (note that character literals are delimited with single quotes, not double quotes):
 
 ```jldoctest
 julia> c = 'x'
@@ -156,7 +156,7 @@ julia> 'A' + 1
 
 ## String Basics
 
-String literals are delimited by double quotes or triple double quotes:
+String literals are delimited by double quotes or triple double quotes (not single quotes):
 
 ```jldoctest helloworldstring
 julia> str = "Hello, world.\n"
@@ -535,7 +535,9 @@ Constructing strings using concatenation can become a bit cumbersome, however. T
 verbose calls to [`string`](@ref) or repeated multiplications, Julia allows interpolation into string literals
 using `$`, as in Perl:
 
-```jldoctest stringconcat
+```jldoctest
+julia> greet = "Hello"; whom = "world";
+
 julia> "$greet, $whom.\n"
 "Hello, world.\n"
 ```
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index cbe7e9b94eefc..d250fdd811387 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -378,7 +378,7 @@ You generally want to use [`isa`](@ref) and [`<:`](@ref) for testing types,
 not `==`. Checking types for exact equality typically only makes sense when comparing to a known
 concrete type (e.g. `T == Float64`), or if you *really, really* know what you're doing.
 
-## Do not write `x->f(x)`
+## Don't write a trivial anonymous function `x->f(x)` for a named function `f`
 
 Since higher-order functions are often called with anonymous functions, it is easy to conclude
 that this is desirable or even necessary. But any function can be passed directly, without being
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index ce61b1a25a0dc..430a006c67788 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -978,24 +978,29 @@ alias for `Tuple{Vararg{T,N}}`, i.e. a tuple type containing exactly `N` element
 
 Named tuples are instances of the [`NamedTuple`](@ref) type, which has two parameters: a tuple of
 symbols giving the field names, and a tuple type giving the field types.
+For convenience, `NamedTuple` types are printed using the [`@NamedTuple`](@ref) macro which provides a
+convenient `struct`-like syntax for declaring these types via `key::Type` declarations,
+where an omitted `::Type` corresponds to `::Any`.
+
 
 ```jldoctest
-julia> typeof((a=1,b="hello"))
-NamedTuple{(:a, :b), Tuple{Int64, String}}
+julia> typeof((a=1,b="hello")) # prints in macro form
+@NamedTuple{a::Int64, b::String}
+
+julia> NamedTuple{(:a, :b), Tuple{Int64, String}} # long form of the type
+@NamedTuple{a::Int64, b::String}
 ```
 
-The [`@NamedTuple`](@ref) macro provides a more convenient `struct`-like syntax for declaring
-`NamedTuple` types via `key::Type` declarations, where an omitted `::Type` corresponds to `::Any`.
+The `begin ... end` form of the `@NamedTuple` macro allows the declarations to be
+split across multiple lines (similar to a struct declaration), but is otherwise equivalent:
 
-```jldoctest
-julia> @NamedTuple{a::Int, b::String}
-NamedTuple{(:a, :b), Tuple{Int64, String}}
 
+```jldoctest
 julia> @NamedTuple begin
            a::Int
            b::String
        end
-NamedTuple{(:a, :b), Tuple{Int64, String}}
+@NamedTuple{a::Int64, b::String}
 ```
 
 A `NamedTuple` type can be used as a constructor, accepting a single tuple argument.
@@ -1003,10 +1008,10 @@ The constructed `NamedTuple` type can be either a concrete type, with both param
 or a type that specifies only field names:
 
 ```jldoctest
-julia> @NamedTuple{a::Float32,b::String}((1,""))
+julia> @NamedTuple{a::Float32,b::String}((1, ""))
 (a = 1.0f0, b = "")
 
-julia> NamedTuple{(:a, :b)}((1,""))
+julia> NamedTuple{(:a, :b)}((1, ""))
 (a = 1, b = "")
 ```
 
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 4d057a1ab4883..6c22719c1ce86 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -216,7 +216,7 @@ a [`mutable struct`](@ref).
 
 When you call a [function](@ref man-functions) in Julia, it behaves as if you *assigned*
 the argument values to new variable names corresponding to the function arguments, as discussed
-in [Argument-Passing Behavior](@ref man-functions).  (By [convention](@ref man-punctuation),
+in [Argument-Passing Behavior](@ref man-argument-passing).  (By [convention](@ref man-punctuation),
 functions that mutate one or more of their arguments have names ending with `!`.)
 
 
diff --git a/pkgimage.mk b/pkgimage.mk
new file mode 100644
index 0000000000000..dcf9dd1303d47
--- /dev/null
+++ b/pkgimage.mk
@@ -0,0 +1,124 @@
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+BUILDDIR := .
+JULIAHOME := $(SRCDIR)
+include $(JULIAHOME)/Make.inc
+
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
+
+export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
+
+$(JULIA_DEPOT_PATH):
+	mkdir -p $@
+
+print-depot-path:
+	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH')
+
+STDLIBS := ArgTools Artifacts Base64 CRC32c FileWatching Libdl NetworkOptions SHA Serialization \
+		   GMP_jll LLVMLibUnwind_jll LibUV_jll LibUnwind_jll MbedTLS_jll OpenLibm_jll PCRE2_jll \
+		   Zlib_jll dSFMT_jll libLLVM_jll libblastrampoline_jll OpenBLAS_jll Printf Random Tar \
+		   LibSSH2_jll MPFR_jll LinearAlgebra Dates Distributed Future LibGit2 Profile SparseArrays UUIDs \
+		   SharedArrays TOML Test LibCURL Downloads Pkg Dates LazyArtifacts Sockets Unicode Markdown \
+		   InteractiveUtils REPL DelimitedFiles
+
+all-release: $(addprefix cache-release-, $(STDLIBS))
+all-debug:   $(addprefix cache-debug-, $(STDLIBS))
+
+define pkgimg_builder
+$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
+    $$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1)
+$$(BUILDDIR)/stdlib/$1.release.image: $$($1_SRCS) $$(addsuffix .release.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys.$(SHLIB_EXT)
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
+	touch $$@
+cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
+$$(BUILDDIR)/stdlib/$1.debug.image: $$($1_SRCS) $$(addsuffix .debug.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
+cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
+.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
+endef
+
+# Used to just define them in the dependency graph
+# reside in the system image
+define sysimg_builder
+$$(BUILDDIR)/stdlib/$1.release.image:
+	touch $$@
+cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
+$$(BUILDDIR)/stdlib/$1.debug.image:
+	touch $$@
+cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
+.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
+endef
+
+# no dependencies
+$(eval $(call pkgimg_builder,MozillaCACerts_jll,))
+$(eval $(call sysimg_builder,ArgTools,))
+$(eval $(call sysimg_builder,Artifacts,))
+$(eval $(call sysimg_builder,Base64,))
+$(eval $(call sysimg_builder,CRC32c,))
+$(eval $(call sysimg_builder,FileWatching,))
+$(eval $(call sysimg_builder,Libdl,))
+$(eval $(call sysimg_builder,Logging,))
+$(eval $(call sysimg_builder,Mmap,))
+$(eval $(call sysimg_builder,NetworkOptions,))
+$(eval $(call sysimg_builder,SHA,))
+$(eval $(call sysimg_builder,Serialization,))
+$(eval $(call sysimg_builder,Sockets,))
+$(eval $(call sysimg_builder,Unicode,))
+$(eval $(call pkgimg_builder,Profile,))
+
+# 1-depth packages
+$(eval $(call pkgimg_builder,GMP_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,LLVMLibUnwind_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,LibUV_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,LibUnwind_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,MbedTLS_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,nghttp2_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,OpenLibm_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,PCRE2_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,Zlib_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,dSFMT_jll,Artifacts Libdl))
+$(eval $(call pkgimg_builder,libLLVM_jll,Artifacts Libdl))
+$(eval $(call sysimg_builder,libblastrampoline_jll,Artifacts Libdl))
+$(eval $(call sysimg_builder,OpenBLAS_jll,Artifacts Libdl))
+$(eval $(call sysimg_builder,Markdown,Base64))
+$(eval $(call sysimg_builder,Printf,Unicode))
+$(eval $(call sysimg_builder,Random,SHA))
+$(eval $(call sysimg_builder,Tar,ArgTools,SHA))
+$(eval $(call pkgimg_builder,DelimitedFiles,Mmap))
+
+# 2-depth packages
+$(eval $(call pkgimg_builder,LLD_jll,Zlib_jll libLLVM_jll Artifacts Libdl))
+$(eval $(call pkgimg_builder,LibSSH2_jll,Artifacts Libdl MbedTLS_jll))
+$(eval $(call pkgimg_builder,MPFR_jll,Artifacts Libdl GMP_jll))
+$(eval $(call sysimg_builder,LinearAlgebra,Libdl libblastrampoline_jll OpenBLAS_jll))
+$(eval $(call sysimg_builder,Dates,Printf))
+$(eval $(call pkgimg_builder,Distributed,Random Serialization Sockets))
+$(eval $(call sysimg_builder,Future,Random))
+$(eval $(call sysimg_builder,InteractiveUtils,Markdown))
+$(eval $(call sysimg_builder,LibGit2,NetworkOptions Printf SHA Base64))
+$(eval $(call sysimg_builder,UUIDs,Random SHA))
+
+ # 3-depth packages
+ # LibGit2_jll
+$(eval $(call pkgimg_builder,LibCURL_jll,LibSSH2_jll nghttp2_jll MbedTLS_jll Zlib_jll Artifacts Libdl))
+$(eval $(call sysimg_builder,REPL,InteractiveUtils Markdown Sockets Unicode))
+$(eval $(call pkgimg_builder,SharedArrays,Distributed Mmap Random Serialization))
+$(eval $(call sysimg_builder,TOML,Dates))
+$(eval $(call pkgimg_builder,Test,Logging Random Serialization InteractiveUtils))
+
+# 4-depth packages
+$(eval $(call sysimg_builder,LibCURL,LibCURL_jll MozillaCACerts_jll))
+
+# 5-depth packages
+$(eval $(call sysimg_builder,Downloads,ArgTools FileWatching LibCURL NetworkOptions))
+
+# 6-depth packages
+$(eval $(call sysimg_builder,Pkg,Dates LibGit2 Libdl Logging Printf Random SHA UUIDs)) # Markdown REPL
+
+# 7-depth packages
+$(eval $(call pkgimg_builder,LazyArtifacts,Artifacts Pkg))
+
+$(eval $(call pkgimg_builder,SparseArrays,Libdl LinearAlgebra Random Serialization))
+# SuiteSparse_jll
+# Statistics
diff --git a/src/Makefile b/src/Makefile
index 2e976282015d6..66a3f3ac1c24b 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -105,7 +105,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
 ifeq ($(OS),WINNT)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
 endif
@@ -163,9 +163,8 @@ LIBJULIA_PATH_REL := libjulia
 endif
 
 COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
-RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(MMTK_LIB)
-CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(MMTK_LIB)
-
+RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB)
+CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB)
 RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
 CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
 RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
@@ -186,8 +185,8 @@ endif
 CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o)
 CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
-DEBUGFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
-SHIPFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
+SHIPFLAGS  += $(FLAGS)
+DEBUGFLAGS += $(FLAGS)
 
 # if not absolute, then relative to the directory of the julia executable
 SHIPFLAGS  += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
@@ -310,7 +309,7 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 
 # additional dependency links
 $(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
-$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
 $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
@@ -378,7 +377,7 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 	@echo "#ifndef JL_VERSION_H" >> $@.$(JULIA_BUILD_MODE).tmp
 	@echo "#define JL_VERSION_H" >> $@.$(JULIA_BUILD_MODE).tmp
 	@echo "#define JULIA_VERSION_STRING" \"$(JULIA_VERSION)\" >> $@.$(JULIA_BUILD_MODE).tmp
-	@echo $(JULIA_VERSION) | awk 'BEGIN {FS="[.,-]"} \
+	@echo $(JULIA_VERSION) | awk 'BEGIN {FS="[.,+-]"} \
 	{print "#define JULIA_VERSION_MAJOR " $$1 "\n" \
 	"#define JULIA_VERSION_MINOR " $$2 "\n" \
 	"#define JULIA_VERSION_PATCH " $$3 ; \
@@ -407,6 +406,8 @@ $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libj
 $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(SHLIB_EXT): \
 		$(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)
+$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT)
 libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)
 libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT)
 endif
@@ -440,6 +441,12 @@ libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SH
 libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
 libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS)
 
+# set the exports for the source files based on where they are getting linked
+$(OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(CODEGEN_OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(CODEGEN_DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN
+
 clean:
 	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
 	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc
@@ -526,6 +533,8 @@ clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB
 		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
 		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
 
+# set the exports for the source files based on where they are getting linked
+clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS += -DJL_LIBRARY_EXPORTS
 
 # Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc`
 tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS)))
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 3f986cbbc489d..cf6378b4f926b 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -9,11 +9,7 @@
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/IR/DataLayout.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Target/TargetMachine.h>
 
 // analysis passes
@@ -51,11 +47,13 @@
 // for outputting code
 #include <llvm/Bitcode/BitcodeWriter.h>
 #include <llvm/Bitcode/BitcodeWriterPass.h>
+#include <llvm/Bitcode/BitcodeReader.h>
 #include "llvm/Object/ArchiveWriter.h"
 #include <llvm/IR/IRPrintingPasses.h>
 
 #include <llvm/IR/LegacyPassManagers.h>
 #include <llvm/Transforms/Utils/Cloning.h>
+#include <llvm/Support/FormatAdapters.h>
 #include <llvm/Linker/Linker.h>
 
 
@@ -65,6 +63,7 @@ using namespace llvm;
 #include "serialize.h"
 #include "julia_assert.h"
 #include "llvm-codegen-shared.h"
+#include "processor.h"
 
 #define DEBUG_TYPE "julia_aotcompile"
 
@@ -74,19 +73,13 @@ STATISTIC(CreateNativeMethods, "Number of methods compiled for jl_create_native"
 STATISTIC(CreateNativeMax, "Max number of methods compiled at once for jl_create_native");
 STATISTIC(CreateNativeGlobals, "Number of globals compiled for jl_create_native");
 
-template<class T> // for GlobalObject's
-static T *addComdat(T *G)
+static void addComdat(GlobalValue *G, Triple &T)
 {
-#if defined(_OS_WINDOWS_)
-    if (!G->isDeclaration()) {
+    if (T.isOSBinFormatCOFF() && !G->isDeclaration()) {
         // add __declspec(dllexport) to everything marked for export
-        if (G->getLinkage() == GlobalValue::ExternalLinkage)
-            G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
-        else
-            G->setDLLStorageClass(GlobalValue::DefaultStorageClass);
+        assert(G->hasExternalLinkage() && "Cannot set DLLExport on non-external linkage!");
+        G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
     }
-#endif
-    return G;
 }
 
 
@@ -99,7 +92,7 @@ typedef struct {
     std::vector<jl_code_instance_t*> jl_external_to_llvm;
 } jl_native_code_desc_t;
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
         int32_t *func_idx, int32_t *specfunc_idx)
 {
@@ -113,7 +106,7 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
 {
     // map a memory location (jl_value_t or jl_binding_t) to a GlobalVariable
@@ -122,7 +115,7 @@ void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
     memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*));
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_llvm_external_fns_impl(void *native_code, arraylist_t *external_fns)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -131,7 +124,7 @@ void jl_get_llvm_external_fns_impl(void *native_code, arraylist_t *external_fns)
         external_fns->len * sizeof(jl_code_instance_t*));
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -141,7 +134,7 @@ LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code)
         return NULL;
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -163,10 +156,12 @@ static void emit_offset_table(Module &mod, const std::vector<GlobalValue*> &vars
         addrs[i] = ConstantExpr::getBitCast(var, T_psize);
     }
     ArrayType *vars_type = ArrayType::get(T_psize, nvars);
-    new GlobalVariable(mod, vars_type, true,
+    auto GV = new GlobalVariable(mod, vars_type, true,
                        GlobalVariable::ExternalLinkage,
                        ConstantArray::get(vars_type, addrs),
                        name);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+    GV->setDSOLocal(true);
 }
 
 static bool is_safe_char(unsigned char c)
@@ -241,10 +236,10 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
         if ((jl_value_t*)*src_out == jl_nothing)
             *src_out = NULL;
         if (*src_out && jl_is_method(def))
-            *src_out = jl_uncompress_ir(def, codeinst, (jl_array_t*)*src_out);
+            *src_out = jl_uncompress_ir(def, codeinst, (jl_value_t*)*src_out);
     }
     if (*src_out == NULL || !jl_is_code_info(*src_out)) {
-        if (cgparams.lookup != jl_rettype_inferred) {
+        if (cgparams.lookup != jl_rettype_inferred_addr) {
             jl_error("Refusing to automatically run type inference with custom cache lookup.");
         }
         else {
@@ -261,8 +256,6 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
     *ci_out = codeinst;
 }
 
-void replaceUsesWithLoad(Function &F, function_ref<GlobalVariable *(Instruction &I)> should_replace, MDNode *tbaa_const);
-
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
@@ -270,9 +263,10 @@ void replaceUsesWithLoad(Function &F, function_ref<GlobalVariable *(Instruction
 // The `policy` flag switches between the default mode `0` and the extern mode `1` used by GPUCompiler.
 // `_imaging_mode` controls if raw pointers can be embedded (e.g. the code will be loaded into the same session).
 // `_external_linkage` create linkages between pkgimages.
-extern "C" JL_DLLEXPORT
-void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage)
+extern "C" JL_DLLEXPORT_CODEGEN
+void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world)
 {
+    JL_TIMING(NATIVE_AOT, NATIVE_Create);
     ++CreateNativeCalls;
     CreateNativeMax.updateMax(jl_array_len(methods));
     if (cgparams == NULL)
@@ -285,7 +279,9 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
     auto ct = jl_current_task;
-    ct->reentrant_timing++;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     orc::ThreadSafeContext ctx;
     orc::ThreadSafeModule backing;
     if (!llvmmod) {
@@ -303,11 +299,14 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     // compile all methods for the current world and type-inference world
 
     JL_LOCK(&jl_codegen_lock);
-    jl_codegen_params_t params(ctxt);
+    auto target_info = clone.withModuleDo([&](Module &M) {
+        return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+    });
+    jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second));
     params.params = cgparams;
     params.imaging = imaging;
     params.external_linkage = _external_linkage;
-    size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) };
+    size_t compile_for[] = { jl_typeinf_world, _world };
     for (int worlds = 0; worlds < 2; worlds++) {
         params.world = compile_for[worlds];
         if (!params.world)
@@ -357,10 +356,14 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     // process the globals array, before jl_merge_module destroys them
     std::vector<std::string> gvars(params.globals.size());
     data->jl_value_to_llvm.resize(params.globals.size());
+    StringSet<> gvars_names;
+    DenseSet<GlobalValue *> gvars_set;
 
     size_t idx = 0;
     for (auto &global : params.globals) {
         gvars[idx] = global.second->getName().str();
+        assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
+        assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
         data->jl_value_to_llvm[idx] = global.first;
         idx++;
     }
@@ -369,34 +372,19 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     size_t offset = gvars.size();
     data->jl_external_to_llvm.resize(params.external_fns.size());
 
-    auto tbaa_const = tbaa_make_child_with_context(*ctxt.getContext(), "jtbaa_const", nullptr, true).first;
     for (auto &extern_fn : params.external_fns) {
         jl_code_instance_t *this_code = std::get<0>(extern_fn.first);
         bool specsig = std::get<1>(extern_fn.first);
         assert(specsig && "Error external_fns doesn't handle non-specsig yet");
-        (void)specsig;
-        Function *F = extern_fn.second;
-        Module *M = F->getParent();
-
-        Type *T_funcp = F->getFunctionType()->getPointerTo();
-        // Can't create a GC with type FunctionType. Alias also doesn't work
-        GlobalVariable *GV = new GlobalVariable(*M, T_funcp, false,
-                                                GlobalVariable::ExternalLinkage,
-                                                Constant::getNullValue(T_funcp),
-                                                F->getName());
-
-
-        // Need to insert load instruction, thus we can't use replace all uses with
-        replaceUsesWithLoad(*F, [GV](Instruction &) { return GV; }, tbaa_const);
-
-        assert(F->getNumUses() == 0); // declaration counts as use
-        GV->takeName(F);
-        F->eraseFromParent();
-
+        (void) specsig;
+        GlobalVariable *F = extern_fn.second;
         size_t idx = gvars.size() - offset;
         assert(idx >= 0);
-        data->jl_external_to_llvm.at(idx) = this_code;
-        gvars.push_back(std::string(GV->getName()));
+        assert(idx < data->jl_external_to_llvm.size());
+        data->jl_external_to_llvm[idx] = this_code;
+        assert(gvars_set.insert(F).second && "Duplicate gvar in params!");
+        assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!");
+        gvars.push_back(std::string(F->getName()));
     }
 
     // clones the contents of the module `m` to the shadow_output collector
@@ -440,21 +428,24 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         //Safe b/c context is locked by params
         GlobalVariable *G = cast<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
         G->setInitializer(ConstantPointerNull::get(cast<PointerType>(G->getValueType())));
-        G->setLinkage(GlobalVariable::InternalLinkage);
+        G->setLinkage(GlobalValue::ExternalLinkage);
+        G->setVisibility(GlobalValue::HiddenVisibility);
+        G->setDSOLocal(true);
         data->jl_sysimg_gvars.push_back(G);
     }
     CreateNativeGlobals += gvars.size();
 
     //Safe b/c context is locked by params
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-    // setting the function personality enables stack unwinding and catching exceptions
-    // so make sure everything has something set
-    Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
-    Function *juliapersonality_func =
-       Function::Create(FunctionType::get(T_int32, true),
-           Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
-    juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
-#endif
+    auto TT = Triple(clone.getModuleUnlocked()->getTargetTriple());
+    Function *juliapersonality_func = nullptr;
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+        // setting the function personality enables stack unwinding and catching exceptions
+        // so make sure everything has something set
+        Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
+        juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
+            Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
+        juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
+    }
 
     // move everything inside, now that we've merged everything
     // (before adding the exported headers)
@@ -462,22 +453,26 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         //Safe b/c context is locked by params
         for (GlobalObject &G : clone.getModuleUnlocked()->global_objects()) {
             if (!G.isDeclaration()) {
-                G.setLinkage(Function::InternalLinkage);
+                G.setLinkage(GlobalValue::ExternalLinkage);
+                G.setVisibility(GlobalValue::HiddenVisibility);
+                G.setDSOLocal(true);
                 makeSafeName(G);
-                addComdat(&G);
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-                // Add unwind exception personalities to functions to handle async exceptions
-                if (Function *F = dyn_cast<Function>(&G))
-                    F->setPersonalityFn(juliapersonality_func);
-#endif
+                if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+                    // Add unwind exception personalities to functions to handle async exceptions
+                    if (Function *F = dyn_cast<Function>(&G))
+                        F->setPersonalityFn(juliapersonality_func);
+                }
             }
         }
     }
 
     data->M = std::move(clone);
-    if (!ct->reentrant_timing-- && measure_compile_time_enabled) {
-        auto end = jl_hrtime();
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
     }
     if (ctx.getContext()) {
         jl_ExecutionEngine->releaseContext(std::move(ctx));
@@ -485,15 +480,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     return (void*)data;
 }
 
-
-static void emit_result(std::vector<NewArchiveMember> &Archive, SmallVectorImpl<char> &OS,
-        StringRef Name, std::vector<std::string> &outputs)
-{
-    outputs.push_back({ OS.data(), OS.size() });
-    Archive.push_back(NewArchiveMember(MemoryBufferRef(outputs.back(), Name)));
-    OS.clear();
-}
-
 static object::Archive::Kind getDefaultForHost(Triple &triple)
 {
       if (triple.isOSDarwin())
@@ -508,6 +494,7 @@ static void reportWriterError(const ErrorInfoBase &E)
     jl_safe_printf("ERROR: failed to emit output file %s\n", err.c_str());
 }
 
+#if JULIA_FLOAT16_ABI == 1
 static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionType *FT)
 {
     Function *target = M.getFunction(alias);
@@ -524,113 +511,1070 @@ static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionT
     auto val = builder.CreateCall(target, CallArgs);
     builder.CreateRet(val);
 }
+#endif
+void multiversioning_preannotate(Module &M);
+
+// See src/processor.h for documentation about this table. Corresponds to jl_image_shard_t.
+static GlobalVariable *emit_shard_table(Module &M, Type *T_size, Type *T_psize, unsigned threads) {
+    SmallVector<Constant *, 0> tables(sizeof(jl_image_shard_t) / sizeof(void *) * threads);
+    for (unsigned i = 0; i < threads; i++) {
+        auto suffix = "_" + std::to_string(i);
+        auto create_gv = [&](StringRef name, bool constant) {
+            auto gv = new GlobalVariable(M, T_size, constant,
+                                         GlobalValue::ExternalLinkage, nullptr, name + suffix);
+            gv->setVisibility(GlobalValue::HiddenVisibility);
+            gv->setDSOLocal(true);
+            return gv;
+        };
+        auto table = tables.data() + i * sizeof(jl_image_shard_t) / sizeof(void *);
+        table[offsetof(jl_image_shard_t, fvar_base) / sizeof(void*)] = create_gv("jl_fvar_base", false);
+        table[offsetof(jl_image_shard_t, fvar_offsets) / sizeof(void*)] = create_gv("jl_fvar_offsets", true);
+        table[offsetof(jl_image_shard_t, fvar_idxs) / sizeof(void*)] = create_gv("jl_fvar_idxs", true);
+        table[offsetof(jl_image_shard_t, gvar_base) / sizeof(void*)] = create_gv("jl_gvar_base", false);
+        table[offsetof(jl_image_shard_t, gvar_offsets) / sizeof(void*)] = create_gv("jl_gvar_offsets", true);
+        table[offsetof(jl_image_shard_t, gvar_idxs) / sizeof(void*)] = create_gv("jl_gvar_idxs", true);
+        table[offsetof(jl_image_shard_t, clone_slots) / sizeof(void*)] = create_gv("jl_clone_slots", true);
+        table[offsetof(jl_image_shard_t, clone_offsets) / sizeof(void*)] = create_gv("jl_clone_offsets", true);
+        table[offsetof(jl_image_shard_t, clone_idxs) / sizeof(void*)] = create_gv("jl_clone_idxs", true);
+    }
+    auto tables_arr = ConstantArray::get(ArrayType::get(T_psize, tables.size()), tables);
+    auto tables_gv = new GlobalVariable(M, tables_arr->getType(), false,
+                                        GlobalValue::ExternalLinkage, tables_arr, "jl_shard_tables");
+    tables_gv->setVisibility(GlobalValue::HiddenVisibility);
+    tables_gv->setDSOLocal(true);
+    return tables_gv;
+}
+
+// See src/processor.h for documentation about this table. Corresponds to jl_image_ptls_t.
+static GlobalVariable *emit_ptls_table(Module &M, Type *T_size, Type *T_psize) {
+    std::array<Constant *, 3> ptls_table{
+        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_func_slot"),
+        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_key_slot"),
+        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_tls_offset"),
+    };
+    for (auto &gv : ptls_table) {
+        cast<GlobalVariable>(gv)->setVisibility(GlobalValue::HiddenVisibility);
+        cast<GlobalVariable>(gv)->setDSOLocal(true);
+    }
+    auto ptls_table_arr = ConstantArray::get(ArrayType::get(T_psize, ptls_table.size()), ptls_table);
+    auto ptls_table_gv = new GlobalVariable(M, ptls_table_arr->getType(), false,
+                                            GlobalValue::ExternalLinkage, ptls_table_arr, "jl_ptls_table");
+    ptls_table_gv->setVisibility(GlobalValue::HiddenVisibility);
+    ptls_table_gv->setDSOLocal(true);
+    return ptls_table_gv;
+}
+
+// See src/processor.h for documentation about this table. Corresponds to jl_image_header_t.
+static GlobalVariable *emit_image_header(Module &M, unsigned threads, unsigned nfvars, unsigned ngvars) {
+    constexpr uint32_t version = 1;
+    std::array<uint32_t, 4> header{
+        version,
+        threads,
+        nfvars,
+        ngvars,
+    };
+    auto header_arr = ConstantDataArray::get(M.getContext(), header);
+    auto header_gv = new GlobalVariable(M, header_arr->getType(), false,
+                                        GlobalValue::InternalLinkage, header_arr, "jl_image_header");
+    return header_gv;
+}
+
+// Grab fvars and gvars data from the module
+static void get_fvars_gvars(Module &M, DenseMap<GlobalValue *, unsigned> &fvars, DenseMap<GlobalValue *, unsigned> &gvars) {
+    auto fvars_gv = M.getGlobalVariable("jl_fvars");
+    auto gvars_gv = M.getGlobalVariable("jl_gvars");
+    auto fvars_idxs = M.getGlobalVariable("jl_fvar_idxs");
+    auto gvars_idxs = M.getGlobalVariable("jl_gvar_idxs");
+    assert(fvars_gv);
+    assert(gvars_gv);
+    assert(fvars_idxs);
+    assert(gvars_idxs);
+    auto fvars_init = cast<ConstantArray>(fvars_gv->getInitializer());
+    auto gvars_init = cast<ConstantArray>(gvars_gv->getInitializer());
+    for (unsigned i = 0; i < fvars_init->getNumOperands(); ++i) {
+        auto gv = cast<GlobalValue>(fvars_init->getOperand(i)->stripPointerCasts());
+        assert(gv && gv->hasName() && "fvar must be a named global");
+        assert(!fvars.count(gv) && "Duplicate fvar");
+        fvars[gv] = i;
+    }
+    assert(fvars.size() == fvars_init->getNumOperands());
+    for (unsigned i = 0; i < gvars_init->getNumOperands(); ++i) {
+        auto gv = cast<GlobalValue>(gvars_init->getOperand(i)->stripPointerCasts());
+        assert(gv && gv->hasName() && "gvar must be a named global");
+        assert(!gvars.count(gv) && "Duplicate gvar");
+        gvars[gv] = i;
+    }
+    assert(gvars.size() == gvars_init->getNumOperands());
+    fvars_gv->eraseFromParent();
+    gvars_gv->eraseFromParent();
+    fvars_idxs->eraseFromParent();
+    gvars_idxs->eraseFromParent();
+}
+
+// Weight computation
+// It is important for multithreaded image building to be able to split work up
+// among the threads equally. The weight calculated here is an estimation of
+// how expensive a particular function is going to be to compile.
+
+struct FunctionInfo {
+    size_t weight;
+    size_t bbs;
+    size_t insts;
+    size_t clones;
+};
+
+static FunctionInfo getFunctionWeight(const Function &F)
+{
+    FunctionInfo info;
+    info.weight = 1;
+    info.bbs = F.size();
+    info.insts = 0;
+    info.clones = 1;
+    for (const BasicBlock &BB : F) {
+        info.insts += BB.size();
+    }
+    if (F.hasFnAttribute("julia.mv.clones")) {
+        auto val = F.getFnAttribute("julia.mv.clones").getValueAsString();
+        // base16, so must be at most 4 * length bits long
+        // popcount gives number of clones
+        info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1;
+    }
+    info.weight += info.insts;
+    // more basic blocks = more complex than just sum of insts,
+    // add some weight to it
+    info.weight += info.bbs;
+    info.weight *= info.clones;
+    return info;
+}
+
+struct ModuleInfo {
+    size_t globals;
+    size_t funcs;
+    size_t bbs;
+    size_t insts;
+    size_t clones;
+    size_t weight;
+};
+
+ModuleInfo compute_module_info(Module &M) {
+    ModuleInfo info;
+    info.globals = 0;
+    info.funcs = 0;
+    info.bbs = 0;
+    info.insts = 0;
+    info.clones = 0;
+    info.weight = 0;
+    for (auto &G : M.global_values()) {
+        if (G.isDeclaration()) {
+            continue;
+        }
+        info.globals++;
+        if (auto F = dyn_cast<Function>(&G)) {
+            info.funcs++;
+            auto func_info = getFunctionWeight(*F);
+            info.bbs += func_info.bbs;
+            info.insts += func_info.insts;
+            info.clones += func_info.clones;
+            info.weight += func_info.weight;
+        } else {
+            info.weight += 1;
+        }
+    }
+    return info;
+}
+
+struct Partition {
+    StringSet<> globals;
+    StringMap<unsigned> fvars;
+    StringMap<unsigned> gvars;
+    size_t weight;
+};
+
+static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, size_t fvars_size, size_t gvars_size) {
+    bool bad = false;
+#ifndef JL_NDEBUG
+    SmallVector<uint32_t> fvars(fvars_size);
+    SmallVector<uint32_t> gvars(gvars_size);
+    StringMap<uint32_t> GVNames;
+    for (uint32_t i = 0; i < partitions.size(); i++) {
+        for (auto &name : partitions[i].globals) {
+            if (GVNames.count(name.getKey())) {
+                bad = true;
+                dbgs() << "Duplicate global name " << name.getKey() << " in partitions " << i << " and " << GVNames[name.getKey()] << "\n";
+            }
+            GVNames[name.getKey()] = i;
+        }
+        for (auto &fvar : partitions[i].fvars) {
+            if (fvars[fvar.second] != 0) {
+                bad = true;
+                dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars[fvar.second] - 1 << "\n";
+            }
+            fvars[fvar.second] = i+1;
+        }
+        for (auto &gvar : partitions[i].gvars) {
+            if (gvars[gvar.second] != 0) {
+                bad = true;
+                dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars[gvar.second] - 1 << "\n";
+            }
+            gvars[gvar.second] = i+1;
+        }
+    }
+    for (auto &GV : M.globals()) {
+        if (GV.isDeclaration()) {
+            if (GVNames.count(GV.getName())) {
+                bad = true;
+                dbgs() << "Global " << GV.getName() << " is a declaration but is in partition " << GVNames[GV.getName()] << "\n";
+            }
+        } else {
+            if (!GVNames.count(GV.getName())) {
+                bad = true;
+                dbgs() << "Global " << GV << " not in any partition\n";
+            }
+            if (!GV.hasExternalLinkage()) {
+                bad = true;
+                dbgs() << "Global " << GV << " has non-external linkage " << GV.getLinkage() << " but is in partition " << GVNames[GV.getName()] << "\n";
+            }
+        }
+    }
+    for (uint32_t i = 0; i < fvars_size; i++) {
+        if (fvars[i] == 0) {
+            bad = true;
+            dbgs() << "fvar " << i << " not in any partition\n";
+        }
+    }
+    for (uint32_t i = 0; i < gvars_size; i++) {
+        if (gvars[i] == 0) {
+            bad = true;
+            dbgs() << "gvar " << i << " not in any partition\n";
+        }
+    }
+#endif
+    return !bad;
+}
+
+// Chop a module up as equally as possible by weight into threads partitions
+static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
+    //Start by stripping fvars and gvars, which helpfully removes their uses as well
+    DenseMap<GlobalValue *, unsigned> fvars, gvars;
+    get_fvars_gvars(M, fvars, gvars);
+
+    // Partition by union-find, since we only have def->use traversal right now
+    struct Partitioner {
+        struct Node {
+            GlobalValue *GV;
+            unsigned parent;
+            unsigned size;
+            size_t weight;
+        };
+        std::vector<Node> nodes;
+        DenseMap<GlobalValue *, unsigned> node_map;
+        unsigned merged;
+
+        unsigned make(GlobalValue *GV, size_t weight) {
+            unsigned idx = nodes.size();
+            nodes.push_back({GV, idx, 1, weight});
+            node_map[GV] = idx;
+            return idx;
+        }
+
+        unsigned find(unsigned idx) {
+            while (nodes[idx].parent != idx) {
+                nodes[idx].parent = nodes[nodes[idx].parent].parent;
+                idx = nodes[idx].parent;
+            }
+            return idx;
+        }
+
+        unsigned merge(unsigned x, unsigned y) {
+            x = find(x);
+            y = find(y);
+            if (x == y)
+                return x;
+            if (nodes[x].size < nodes[y].size)
+                std::swap(x, y);
+            nodes[y].parent = x;
+            nodes[x].size += nodes[y].size;
+            nodes[x].weight += nodes[y].weight;
+            merged++;
+            return x;
+        }
+    };
+
+    Partitioner partitioner;
+
+    for (auto &G : M.global_values()) {
+        if (G.isDeclaration())
+            continue;
+        if (isa<Function>(G)) {
+            partitioner.make(&G, getFunctionWeight(cast<Function>(G)).weight);
+        } else {
+            partitioner.make(&G, 1);
+        }
+    }
+
+    // Merge all uses to go together into the same partition
+    for (unsigned i = 0; i < partitioner.nodes.size(); ++i) {
+        for (ConstantUses<GlobalValue> uses(partitioner.nodes[i].GV, M); !uses.done(); uses.next()) {
+            auto val = uses.get_info().val;
+            auto idx = partitioner.node_map.find(val);
+            assert(idx != partitioner.node_map.end());
+            partitioner.merge(i, idx->second);
+        }
+    }
+
+    SmallVector<Partition, 32> partitions(threads);
+    // always get the smallest partition first
+    auto pcomp = [](const Partition *p1, const Partition *p2) {
+        return p1->weight > p2->weight;
+    };
+    std::priority_queue<Partition *, std::vector<Partition *>, decltype(pcomp)> pq(pcomp);
+    for (unsigned i = 0; i < threads; ++i) {
+        pq.push(&partitions[i]);
+    }
+
+    std::vector<unsigned> idxs(partitioner.nodes.size());
+    std::iota(idxs.begin(), idxs.end(), 0);
+    std::sort(idxs.begin(), idxs.end(), [&](unsigned a, unsigned b) {
+        //because roots have more weight than their children,
+        //we can sort by weight and get the roots first
+        return partitioner.nodes[a].weight > partitioner.nodes[b].weight;
+    });
+
+    // Assign the root of each partition to a partition, then assign its children to the same one
+    for (unsigned idx = 0; idx < idxs.size(); ++idx) {
+        auto i = idxs[idx];
+        auto root = partitioner.find(i);
+        assert(root == i || partitioner.nodes[root].GV == nullptr);
+        if (partitioner.nodes[root].GV) {
+            auto &node = partitioner.nodes[root];
+            auto &P = *pq.top();
+            pq.pop();
+            auto name = node.GV->getName();
+            P.globals.insert(name);
+            if (fvars.count(node.GV))
+                P.fvars[name] = fvars[node.GV];
+            if (gvars.count(node.GV))
+                P.gvars[name] = gvars[node.GV];
+            P.weight += node.weight;
+            node.GV = nullptr;
+            node.size = &P - partitions.data();
+            pq.push(&P);
+        }
+        if (root != i) {
+            auto &node = partitioner.nodes[i];
+            assert(node.GV != nullptr);
+            // we assigned its root already, so just add it to the root's partition
+            // don't touch the priority queue, since we're not changing the weight
+            auto &P = partitions[partitioner.nodes[root].size];
+            auto name = node.GV->getName();
+            P.globals.insert(name);
+            if (fvars.count(node.GV))
+                P.fvars[name] = fvars[node.GV];
+            if (gvars.count(node.GV))
+                P.gvars[name] = gvars[node.GV];
+            node.GV = nullptr;
+            node.size = partitioner.nodes[root].size;
+        }
+    }
+
+    bool verified = verify_partitioning(partitions, M, fvars.size(), gvars.size());
+    assert(verified && "Partitioning failed to partition globals correctly");
+    (void) verified;
+
+    return partitions;
+}
+
+struct ImageTimer {
+    uint64_t elapsed = 0;
+    std::string name;
+    std::string desc;
 
+    void startTimer() {
+        elapsed = jl_hrtime();
+    }
+
+    void stopTimer() {
+        elapsed = jl_hrtime() - elapsed;
+    }
+
+    void init(const Twine &name, const Twine &desc) {
+        this->name = name.str();
+        this->desc = desc.str();
+    }
+
+    operator bool() const {
+        return elapsed != 0;
+    }
+
+    void print(raw_ostream &out, bool clear=false) {
+        if (!*this)
+            return;
+        out << llvm::formatv("{0:F3}  ", elapsed / 1e9) << name << "  " << desc << "\n";
+        if (clear)
+            elapsed = 0;
+    }
+};
+
+struct ShardTimers {
+    ImageTimer deserialize;
+    ImageTimer materialize;
+    ImageTimer construct;
+    ImageTimer deletion;
+    // impl timers
+    ImageTimer unopt;
+    ImageTimer optimize;
+    ImageTimer opt;
+    ImageTimer obj;
+    ImageTimer asm_;
+
+    std::string name;
+    std::string desc;
+
+    void print(raw_ostream &out, bool clear=false) {
+        StringRef sep = "===-------------------------------------------------------------------------===";
+        out << formatv("{0}\n{1}\n{0}\n", sep, fmt_align(name + " : " + desc, AlignStyle::Center, sep.size()));
+        auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed + deletion.elapsed +
+            unopt.elapsed + optimize.elapsed + opt.elapsed + obj.elapsed + asm_.elapsed;
+        out << "Time (s)  Name  Description\n";
+        deserialize.print(out, clear);
+        materialize.print(out, clear);
+        construct.print(out, clear);
+        deletion.print(out, clear);
+        unopt.print(out, clear);
+        optimize.print(out, clear);
+        opt.print(out, clear);
+        obj.print(out, clear);
+        asm_.print(out, clear);
+        out << llvm::formatv("{0:F3}  total  Total time taken\n", total / 1e9);
+    }
+};
+
+void emitFloat16Wrappers(Module &M, bool external);
+
+// Perform the actual optimization and emission of the output files
+static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *outputs, const std::string *names,
+                    NewArchiveMember *unopt, NewArchiveMember *opt, NewArchiveMember *obj, NewArchiveMember *asm_,
+                    ShardTimers &timers, unsigned shardidx) {
+    auto TM = std::unique_ptr<TargetMachine>(
+        SourceTM.getTarget().createTargetMachine(
+            SourceTM.getTargetTriple().str(),
+            SourceTM.getTargetCPU(),
+            SourceTM.getTargetFeatureString(),
+            SourceTM.Options,
+            SourceTM.getRelocationModel(),
+            SourceTM.getCodeModel(),
+            SourceTM.getOptLevel()));
+
+    if (unopt) {
+        timers.unopt.startTimer();
+        raw_string_ostream OS(*outputs);
+        PassBuilder PB;
+        AnalysisManagers AM{*TM, PB, OptimizationLevel::O0};
+        ModulePassManager MPM;
+        MPM.addPass(BitcodeWriterPass(OS));
+        MPM.run(M, AM.MAM);
+        *unopt = NewArchiveMember(MemoryBufferRef(*outputs++, *names++));
+        timers.unopt.stopTimer();
+    }
+    if (!opt && !obj && !asm_) {
+        return;
+    }
+    assert(!verifyModule(M, &errs()));
+
+    timers.optimize.startTimer();
+
+#ifndef JL_USE_NEW_PM
+    legacy::PassManager optimizer;
+    addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+    addOptimizationPasses(&optimizer, jl_options.opt_level, true, true);
+    addMachinePasses(&optimizer, jl_options.opt_level);
+#else
+
+    auto PMTM = std::unique_ptr<TargetMachine>(
+        SourceTM.getTarget().createTargetMachine(
+            SourceTM.getTargetTriple().str(),
+            SourceTM.getTargetCPU(),
+            SourceTM.getTargetFeatureString(),
+            SourceTM.Options,
+            SourceTM.getRelocationModel(),
+            SourceTM.getCodeModel(),
+            SourceTM.getOptLevel()));
+    NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), OptimizationOptions::defaults(true, true)};
+#endif
+    optimizer.run(M);
+    assert(!verifyModule(M, &errs()));
+    bool inject_aliases = false;
+    for (auto &F : M.functions()) {
+        if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
+            inject_aliases = true;
+            break;
+        }
+    }
+    // no need to inject aliases if we have no functions
+
+    if (inject_aliases) {
+#if JULIA_FLOAT16_ABI == 1
+        // We would like to emit an alias or an weakref alias to redirect these symbols
+        // but LLVM doesn't let us emit a GlobalAlias to a declaration...
+        // So for now we inject a definition of these functions that calls our runtime
+        // functions. We do so after optimization to avoid cloning these functions.
+        injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
+                FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+        injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
+                FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+        injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
+                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+        injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
+                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+        injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
+                FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+#else
+        emitFloat16Wrappers(M, false);
+#endif
+    }
+    timers.optimize.stopTimer();
+
+    if (opt) {
+        timers.opt.startTimer();
+        raw_string_ostream OS(*outputs);
+        PassBuilder PB;
+        AnalysisManagers AM{*TM, PB, OptimizationLevel::O0};
+        ModulePassManager MPM;
+        MPM.addPass(BitcodeWriterPass(OS));
+        MPM.run(M, AM.MAM);
+        *opt = NewArchiveMember(MemoryBufferRef(*outputs++, *names++));
+        timers.opt.stopTimer();
+    }
+
+    if (obj) {
+        timers.obj.startTimer();
+        SmallVector<char, 0> Buffer;
+        raw_svector_ostream OS(Buffer);
+        legacy::PassManager emitter;
+        addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
+            jl_safe_printf("ERROR: target does not support generation of object files\n");
+        emitter.run(M);
+        *outputs = { Buffer.data(), Buffer.size() };
+        *obj = NewArchiveMember(MemoryBufferRef(*outputs++, *names++));
+        timers.obj.stopTimer();
+    }
+
+    if (asm_) {
+        timers.asm_.startTimer();
+        SmallVector<char, 0> Buffer;
+        raw_svector_ostream OS(Buffer);
+        legacy::PassManager emitter;
+        addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
+            jl_safe_printf("ERROR: target does not support generation of assembly files\n");
+        emitter.run(M);
+        *outputs = { Buffer.data(), Buffer.size() };
+        *asm_ = NewArchiveMember(MemoryBufferRef(*outputs++, *names++));
+        timers.asm_.stopTimer();
+    }
+}
+
+// serialize module to bitcode
+static auto serializeModule(const Module &M) {
+    assert(!verifyModule(M, &errs()) && "Serializing invalid module!");
+    SmallVector<char, 0> ClonedModuleBuffer;
+    BitcodeWriter BCWriter(ClonedModuleBuffer);
+    BCWriter.writeModule(M);
+    BCWriter.writeSymtab();
+    BCWriter.writeStrtab();
+    return ClonedModuleBuffer;
+}
+
+// Modules are deserialized lazily by LLVM, to avoid deserializing
+// unnecessary functions. We take advantage of this by serializing
+// the entire module once, then deleting the bodies of functions
+// that are not in this partition. Once unnecesary functions are
+// deleted, we then materialize the entire module to make use-lists
+// consistent.
+static void materializePreserved(Module &M, Partition &partition) {
+    DenseSet<GlobalValue *> Preserve;
+    for (auto &GV : M.global_values()) {
+        if (!GV.isDeclaration()) {
+            if (partition.globals.count(GV.getName())) {
+                Preserve.insert(&GV);
+            }
+        }
+    }
+    for (auto &F : M.functions()) {
+        if (!F.isDeclaration()) {
+            if (!Preserve.contains(&F)) {
+                F.deleteBody();
+                F.setLinkage(GlobalValue::ExternalLinkage);
+                F.setVisibility(GlobalValue::HiddenVisibility);
+                F.setDSOLocal(true);
+            }
+        }
+    }
+    for (auto &GV : M.globals()) {
+        if (!GV.isDeclaration()) {
+            if (!Preserve.contains(&GV)) {
+                GV.setInitializer(nullptr);
+                GV.setLinkage(GlobalValue::ExternalLinkage);
+                GV.setVisibility(GlobalValue::HiddenVisibility);
+                GV.setDSOLocal(true);
+            }
+        }
+    }
+    // Global aliases are a pain to deal with. It is illegal to have an alias to a declaration,
+    // so we need to replace them with either a function or a global variable declaration. However,
+    // we can't just delete the alias, because that would break the users of the alias. Therefore,
+    // we do a dance where we point each global alias to a dummy function or global variable,
+    // then materialize the module to access use-lists, then replace all the uses, and finally commit
+    // to deleting the old alias.
+    SmallVector<std::pair<GlobalAlias *, GlobalValue *>> DeletedAliases;
+    for (auto &GA : M.aliases()) {
+        if (!GA.isDeclaration()) {
+            if (!Preserve.contains(&GA)) {
+                if (GA.getValueType()->isFunctionTy()) {
+                    auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
+                    // This is an extremely sad hack to make sure the global alias never points to an extern function
+                    auto BB = BasicBlock::Create(M.getContext(), "", F);
+                    new UnreachableInst(M.getContext(), BB);
+                    GA.setAliasee(F);
+
+                    DeletedAliases.push_back({ &GA, F });
+                }
+                else {
+                    auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
+                    DeletedAliases.push_back({ &GA, GV });
+                }
+            }
+        }
+    }
+    cantFail(M.materializeAll());
+    for (auto &Deleted : DeletedAliases) {
+        Deleted.second->takeName(Deleted.first);
+        Deleted.first->replaceAllUsesWith(Deleted.second);
+        Deleted.first->eraseFromParent();
+        // undo our previous sad hack
+        if (auto F = dyn_cast<Function>(Deleted.second)) {
+            F->deleteBody();
+        } else {
+            cast<GlobalVariable>(Deleted.second)->setInitializer(nullptr);
+        }
+    }
+}
+
+// Reconstruct jl_fvars, jl_gvars, jl_fvars_idxs, and jl_gvars_idxs from the partition
+static void construct_vars(Module &M, Partition &partition) {
+    std::vector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
+    fvar_pairs.reserve(partition.fvars.size());
+    for (auto &fvar : partition.fvars) {
+        auto F = M.getFunction(fvar.first());
+        assert(F);
+        assert(!F->isDeclaration());
+        fvar_pairs.push_back({ fvar.second, F });
+    }
+    std::vector<GlobalValue *> fvars;
+    std::vector<uint32_t> fvar_idxs;
+    fvars.reserve(fvar_pairs.size());
+    fvar_idxs.reserve(fvar_pairs.size());
+    std::sort(fvar_pairs.begin(), fvar_pairs.end());
+    for (auto &fvar : fvar_pairs) {
+        fvars.push_back(fvar.second);
+        fvar_idxs.push_back(fvar.first);
+    }
+    std::vector<std::pair<uint32_t, GlobalValue *>> gvar_pairs;
+    gvar_pairs.reserve(partition.gvars.size());
+    for (auto &gvar : partition.gvars) {
+        auto GV = M.getGlobalVariable(gvar.first());
+        assert(GV);
+        assert(!GV->isDeclaration());
+        gvar_pairs.push_back({ gvar.second, GV });
+    }
+    std::vector<GlobalValue *> gvars;
+    std::vector<uint32_t> gvar_idxs;
+    gvars.reserve(gvar_pairs.size());
+    gvar_idxs.reserve(gvar_pairs.size());
+    std::sort(gvar_pairs.begin(), gvar_pairs.end());
+    for (auto &gvar : gvar_pairs) {
+        gvars.push_back(gvar.second);
+        gvar_idxs.push_back(gvar.first);
+    }
+
+    // Now commit the fvars, gvars, and idxs
+    auto T_psize = M.getDataLayout().getIntPtrType(M.getContext())->getPointerTo();
+    emit_offset_table(M, fvars, "jl_fvars", T_psize);
+    emit_offset_table(M, gvars, "jl_gvars", T_psize);
+    auto fidxs = ConstantDataArray::get(M.getContext(), fvar_idxs);
+    auto fidxs_var = new GlobalVariable(M, fidxs->getType(), true,
+                                        GlobalVariable::ExternalLinkage,
+                                        fidxs, "jl_fvar_idxs");
+    fidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+    fidxs_var->setDSOLocal(true);
+    auto gidxs = ConstantDataArray::get(M.getContext(), gvar_idxs);
+    auto gidxs_var = new GlobalVariable(M, gidxs->getType(), true,
+                                        GlobalVariable::ExternalLinkage,
+                                        gidxs, "jl_gvar_idxs");
+    gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+    gidxs_var->setDSOLocal(true);
+}
+
+// Materialization will leave many unused declarations, which multiversioning would otherwise clone.
+// This function removes them to avoid unnecessary cloning of declarations.
+// The GlobalDCEPass is much better at this, but we only care about removing unused
+// declarations, not actually about seeing if code is dead (codegen knows it is live, by construction).
+static void dropUnusedGlobals(Module &M) {
+    std::vector<GlobalValue *> unused;
+    for (auto &G : M.global_values()) {
+        if (G.isDeclaration() && G.use_empty())
+            unused.push_back(&G);
+    }
+    for (auto &G : unused)
+        G->eraseFromParent();
+}
+
+// Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading,
+// as well as partitioning, serialization, and deserialization.
+static void add_output(Module &M, TargetMachine &TM, std::vector<std::string> &outputs, StringRef name,
+                std::vector<NewArchiveMember> &unopt, std::vector<NewArchiveMember> &opt,
+                std::vector<NewArchiveMember> &obj, std::vector<NewArchiveMember> &asm_,
+                bool unopt_out, bool opt_out, bool obj_out, bool asm_out,
+                unsigned threads, ModuleInfo module_info) {
+    unsigned outcount = unopt_out + opt_out + obj_out + asm_out;
+    assert(outcount);
+    outputs.resize(outputs.size() + outcount * threads * 2);
+    auto names_start = outputs.data() + outputs.size() - outcount * threads * 2;
+    auto outputs_start = names_start + outcount * threads;
+    unopt.resize(unopt.size() + unopt_out * threads);
+    opt.resize(opt.size() + opt_out * threads);
+    obj.resize(obj.size() + obj_out * threads);
+    asm_.resize(asm_.size() + asm_out * threads);
+    // Timers for timing purposes
+    TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str());
+    SmallVector<ShardTimers, 1> timers(threads);
+    for (unsigned i = 0; i < threads; ++i) {
+        auto idx = std::to_string(i);
+        timers[i].name = "shard_" + idx;
+        timers[i].desc = ("Timings for " + name + " module shard " + idx).str();
+        timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
+        timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
+        timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
+        timers[i].deletion.init("deletion_" + idx, "Delete dead declarations");
+        timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
+        timers[i].optimize.init("optimize_" + idx, "Optimize shard");
+        timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
+        timers[i].obj.init("obj_" + idx, "Emit object file");
+        timers[i].asm_.init("asm_" + idx, "Emit assembly file");
+    }
+    Timer partition_timer("partition", "Partition module", timer_group);
+    Timer serialize_timer("serialize", "Serialize module", timer_group);
+    Timer output_timer("output", "Add outputs", timer_group);
+    bool report_timings = false;
+    if (auto env = getenv("JULIA_IMAGE_TIMINGS")) {
+        char *endptr;
+        unsigned long val = strtoul(env, &endptr, 10);
+        if (endptr != env && !*endptr && val <= 1) {
+            report_timings = val;
+        } else {
+            if (StringRef("true").compare_insensitive(env) == 0)
+                report_timings = true;
+            else if (StringRef("false").compare_insensitive(env) == 0)
+                report_timings = false;
+            else
+                errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n";
+        }
+    }
+    for (unsigned i = 0; i < threads; ++i) {
+        auto start = names_start + i * outcount;
+        auto istr = std::to_string(i);
+        if (unopt_out)
+            *start++ = (name + "_unopt#" + istr + ".bc").str();
+        if (opt_out)
+            *start++ = (name + "_opt#" + istr + ".bc").str();
+        if (obj_out)
+            *start++ = (name + "#" + istr + ".o").str();
+        if (asm_out)
+            *start++ = (name + "#" + istr + ".s").str();
+    }
+    // Single-threaded case
+    if (threads == 1) {
+        output_timer.startTimer();
+        add_output_impl(M, TM, outputs_start, names_start,
+                        unopt_out ? unopt.data() + unopt.size() - 1 : nullptr,
+                        opt_out ? opt.data() + opt.size() - 1 : nullptr,
+                        obj_out ? obj.data() + obj.size() - 1 : nullptr,
+                        asm_out ? asm_.data() + asm_.size() - 1 : nullptr,
+                        timers[0], 0);
+        output_timer.stopTimer();
+
+        if (!report_timings) {
+            timer_group.clear();
+        } else {
+            timer_group.print(dbgs(), true);
+            for (auto &t : timers) {
+                t.print(dbgs(), true);
+            }
+        }
+        return;
+    }
+
+    partition_timer.startTimer();
+    uint64_t counter = 0;
+    // Partitioning requires all globals to have names.
+    // We use a prefix to avoid name conflicts with user code.
+    for (auto &G : M.global_values()) {
+        if (!G.isDeclaration() && !G.hasName()) {
+            G.setName("jl_ext_" + Twine(counter++));
+        }
+    }
+    auto partitions = partitionModule(M, threads);
+    partition_timer.stopTimer();
+
+    serialize_timer.startTimer();
+    auto serialized = serializeModule(M);
+    serialize_timer.stopTimer();
+
+    output_timer.startTimer();
+
+    auto unoptstart = unopt_out ? unopt.data() + unopt.size() - threads : nullptr;
+    auto optstart = opt_out ? opt.data() + opt.size() - threads : nullptr;
+    auto objstart = obj_out ? obj.data() + obj.size() - threads : nullptr;
+    auto asmstart = asm_out ? asm_.data() + asm_.size() - threads : nullptr;
+
+    // Start all of the worker threads
+    std::vector<std::thread> workers(threads);
+    for (unsigned i = 0; i < threads; i++) {
+        workers[i] = std::thread([&, i](){
+            LLVMContext ctx;
+            // Lazily deserialize the entire module
+            timers[i].deserialize.startTimer();
+            auto M = cantFail(getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx), "Error loading module");
+            timers[i].deserialize.stopTimer();
+
+            timers[i].materialize.startTimer();
+            materializePreserved(*M, partitions[i]);
+            timers[i].materialize.stopTimer();
+
+            timers[i].construct.startTimer();
+            construct_vars(*M, partitions[i]);
+            M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), "_" + std::to_string(i)));
+            // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
+            // or it may skip emitting debug info for that file. Here set it to ./julia#N
+            DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
+            for (DICompileUnit *CU : M->debug_compile_units())
+                CU->replaceOperandWith(0, topfile);
+            timers[i].construct.stopTimer();
+
+            timers[i].deletion.startTimer();
+            dropUnusedGlobals(*M);
+            timers[i].deletion.stopTimer();
+
+            add_output_impl(*M, TM, outputs_start + i * outcount, names_start + i * outcount,
+                            unoptstart ? unoptstart + i : nullptr,
+                            optstart ? optstart + i : nullptr,
+                            objstart ? objstart + i : nullptr,
+                            asmstart ? asmstart + i : nullptr,
+                            timers[i], i);
+        });
+    }
+
+    // Wait for all of the worker threads to finish
+    for (auto &w : workers)
+        w.join();
+
+    output_timer.stopTimer();
+
+    if (!report_timings) {
+        timer_group.clear();
+    } else {
+        timer_group.print(dbgs(), true);
+        for (auto &t : timers) {
+            t.print(dbgs(), true);
+        }
+        dbgs() << "Partition weights: [";
+        bool comma = false;
+        for (auto &p : partitions) {
+            if (comma)
+                dbgs() << ", ";
+            else
+                comma = true;
+            dbgs() << p.weight;
+        }
+        dbgs() << "]\n";
+    }
+}
+
+static unsigned compute_image_thread_count(const ModuleInfo &info) {
+    // 32-bit systems are very memory-constrained
+#ifdef _P32
+    LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n");
+    return 1;
+#endif
+    // This is not overridable because empty modules do occasionally appear, but they'll be very small and thus exit early to
+    // known easy behavior. Plus they really don't warrant multiple threads
+    if (info.weight < 1000) {
+        LLVM_DEBUG(dbgs() << "Small module, using a single thread\n");
+        return 1;
+    }
+
+    unsigned threads = std::max(jl_cpu_threads() / 2, 1);
+
+    auto max_threads = info.globals / 100;
+    if (max_threads < threads) {
+        LLVM_DEBUG(dbgs() << "Low global count limiting threads to " << max_threads << " (" << info.globals << "globals)\n");
+        threads = max_threads;
+    }
+
+    // environment variable override
+    const char *env_threads = getenv("JULIA_IMAGE_THREADS");
+    bool env_threads_set = false;
+    if (env_threads) {
+        char *endptr;
+        unsigned long requested = strtoul(env_threads, &endptr, 10);
+        if (*endptr || !requested) {
+            jl_safe_printf("WARNING: invalid value '%s' for JULIA_IMAGE_THREADS\n", env_threads);
+        } else {
+            LLVM_DEBUG(dbgs() << "Overriding threads to " << requested << " due to JULIA_IMAGE_THREADS\n");
+            threads = requested;
+            env_threads_set = true;
+        }
+    }
+
+    // more defaults
+    if (!env_threads_set && threads > 1) {
+        if (auto fallbackenv = getenv("JULIA_CPU_THREADS")) {
+            char *endptr;
+            unsigned long requested = strtoul(fallbackenv, &endptr, 10);
+            if (*endptr || !requested) {
+                jl_safe_printf("WARNING: invalid value '%s' for JULIA_CPU_THREADS\n", fallbackenv);
+            } else if (requested < threads) {
+                LLVM_DEBUG(dbgs() << "Overriding threads to " << requested << " due to JULIA_CPU_THREADS\n");
+                threads = requested;
+            }
+        }
+    }
+
+    threads = std::max(threads, 1u);
+
+    return threads;
+}
 
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_native_impl(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
         const char *asm_fname,
         const char *sysimg_data, size_t sysimg_len, ios_t *s)
 {
-    JL_TIMING(NATIVE_DUMP);
+    JL_TIMING(NATIVE_AOT, NATIVE_Dump);
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
+    if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) {
+        LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n");
+        delete data;
+        return;
+    }
     auto TSCtx = data->M.getContext();
     auto lock = TSCtx.getLock();
     LLVMContext &Context = *TSCtx.getContext();
     // We don't want to use MCJIT's target machine because
     // it uses the large code model and we may potentially
     // want less optimizations there.
-    Triple TheTriple = Triple(jl_ExecutionEngine->getTargetTriple());
     // make sure to emit the native object format, even if FORCE_ELF was set in codegen
-#if defined(_OS_WINDOWS_)
-    TheTriple.setObjectFormat(Triple::COFF);
-#elif defined(_OS_DARWIN_)
-    TheTriple.setObjectFormat(Triple::MachO);
-    TheTriple.setOS(llvm::Triple::MacOSX);
-#endif
-    std::unique_ptr<TargetMachine> TM(
+    Triple TheTriple(data->M.getModuleUnlocked()->getTargetTriple());
+    if (TheTriple.isOSWindows()) {
+        TheTriple.setObjectFormat(Triple::COFF);
+    } else if (TheTriple.isOSDarwin()) {
+        TheTriple.setObjectFormat(Triple::MachO);
+        TheTriple.setOS(llvm::Triple::MacOSX);
+    }
+    Optional<Reloc::Model> RelocModel;
+    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
+        RelocModel = Reloc::PIC_;
+    }
+    CodeModel::Model CMModel = CodeModel::Small;
+    if (TheTriple.isPPC()) {
+        // On PPC the small model is limited to 16bit offsets
+        CMModel = CodeModel::Medium;
+    }
+    std::unique_ptr<TargetMachine> SourceTM(
         jl_ExecutionEngine->getTarget().createTargetMachine(
             TheTriple.getTriple(),
             jl_ExecutionEngine->getTargetCPU(),
             jl_ExecutionEngine->getTargetFeatureString(),
             jl_ExecutionEngine->getTargetOptions(),
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-            Reloc::PIC_,
-#else
-            Optional<Reloc::Model>(),
-#endif
-#if defined(_CPU_PPC_) || defined(_CPU_PPC64_)
-            // On PPC the small model is limited to 16bit offsets
-            CodeModel::Medium,
-#else
-            // Use small model so that we can use signed 32bits offset in the function and GV tables
-            CodeModel::Small,
-#endif
+            RelocModel,
+            CMModel,
             CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
             ));
 
 
-    // set up optimization passes
-    SmallVector<char, 0> bc_Buffer;
-    SmallVector<char, 0> obj_Buffer;
-    SmallVector<char, 0> asm_Buffer;
-    SmallVector<char, 0> unopt_bc_Buffer;
-    raw_svector_ostream bc_OS(bc_Buffer);
-    raw_svector_ostream obj_OS(obj_Buffer);
-    raw_svector_ostream asm_OS(asm_Buffer);
-    raw_svector_ostream unopt_bc_OS(unopt_bc_Buffer);
     std::vector<NewArchiveMember> bc_Archive;
     std::vector<NewArchiveMember> obj_Archive;
     std::vector<NewArchiveMember> asm_Archive;
     std::vector<NewArchiveMember> unopt_bc_Archive;
     std::vector<std::string> outputs;
 
-    PassBuilder emptyPB;
-    AnalysisManagers empty(emptyPB);
-    ModulePassManager preopt, postopt;
-    legacy::PassManager emitter; // MC emission is only supported on legacy PM
-
-    if (unopt_bc_fname)
-        preopt.addPass(BitcodeWriterPass(unopt_bc_OS));
-
-    if (bc_fname)
-        postopt.addPass(BitcodeWriterPass(bc_OS));
-    //Is this necessary for TM?
-    addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-    if (obj_fname)
-        if (TM->addPassesToEmitFile(emitter, obj_OS, nullptr, CGFT_ObjectFile, false))
-            jl_safe_printf("ERROR: target does not support generation of object files\n");
-    if (asm_fname)
-        if (TM->addPassesToEmitFile(emitter, asm_OS, nullptr, CGFT_AssemblyFile, false))
-            jl_safe_printf("ERROR: target does not support generation of object files\n");
-
     // Reset the target triple to make sure it matches the new target machine
     auto dataM = data->M.getModuleUnlocked();
-    dataM->setTargetTriple(TM->getTargetTriple().str());
-    dataM->setDataLayout(jl_create_datalayout(*TM));
-
-#ifndef JL_USE_NEW_PM
-    legacy::PassManager optimizer;
-    addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-    addOptimizationPasses(&optimizer, jl_options.opt_level, true, true);
-    addMachinePasses(&optimizer, jl_options.opt_level);
-#else
-    NewPM optimizer{std::move(TM), getOptLevel(jl_options.opt_level), OptimizationOptions::defaults(true, true)};
-#endif
+    dataM->setTargetTriple(SourceTM->getTargetTriple().str());
+    dataM->setDataLayout(jl_create_datalayout(*SourceTM));
 
-    Type *T_size;
-    if (sizeof(size_t) == 8)
-        T_size = Type::getInt64Ty(Context);
-    else
-        T_size = Type::getInt32Ty(Context);
+    Type *T_size = dataM->getDataLayout().getIntPtrType(Context);
     Type *T_psize = T_size->getPointerTo();
 
+    bool imaging_mode = imaging_default() || jl_options.outputo;
+
+    unsigned threads = 1;
+    unsigned nfvars = 0;
+    unsigned ngvars = 0;
+
+    ModuleInfo module_info = compute_module_info(*dataM);
+    LLVM_DEBUG(dbgs()
+        << "Dumping module with stats:\n"
+        << "    globals: " << module_info.globals << "\n"
+        << "    functions: " << module_info.funcs << "\n"
+        << "    basic blocks: " << module_info.bbs << "\n"
+        << "    instructions: " << module_info.insts << "\n"
+        << "    clones: " << module_info.clones << "\n"
+        << "    weight: " << module_info.weight << "\n"
+    );
+
     // add metadata information
-    if (imaging_default() || jl_options.outputo) {
-        emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize);
-        emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_sysimg_fvars", T_psize);
+    if (imaging_mode) {
+        multiversioning_preannotate(*dataM);
+        {
+            DenseSet<GlobalValue *> fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end());
+            for (auto &F : *dataM) {
+                if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) {
+                    if (fvars.insert(&F).second) {
+                        data->jl_sysimg_fvars.push_back(&F);
+                    }
+                }
+            }
+        }
+        threads = compute_image_thread_count(module_info);
+        LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n");
+        nfvars = data->jl_sysimg_fvars.size();
+        ngvars = data->jl_sysimg_gvars.size();
+        emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
+        emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
+        std::vector<uint32_t> idxs;
+        idxs.resize(data->jl_sysimg_gvars.size());
+        std::iota(idxs.begin(), idxs.end(), 0);
+        auto gidxs = ConstantDataArray::get(Context, idxs);
+        auto gidxs_var = new GlobalVariable(*dataM, gidxs->getType(), true,
+                                            GlobalVariable::ExternalLinkage,
+                                            gidxs, "jl_gvar_idxs");
+        gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+        gidxs_var->setDSOLocal(true);
+        idxs.clear();
+        idxs.resize(data->jl_sysimg_fvars.size());
+        std::iota(idxs.begin(), idxs.end(), 0);
+        auto fidxs = ConstantDataArray::get(Context, idxs);
+        auto fidxs_var = new GlobalVariable(*dataM, fidxs->getType(), true,
+                                            GlobalVariable::ExternalLinkage,
+                                            fidxs, "jl_fvar_idxs");
+        fidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+        fidxs_var->setDSOLocal(true);
+        dataM->addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0"));
 
         // reflect the address of the jl_RTLD_DEFAULT_handle variable
         // back to the caller, so that we can check for consistency issues
@@ -640,97 +1584,117 @@ void jl_dump_native_impl(void *native_code,
                                      true,
                                      GlobalVariable::ExternalLinkage,
                                      jlRTLD_DEFAULT_var,
-                                     "jl_RTLD_DEFAULT_handle_pointer"));
-    }
-
-    // do the actual work
-    auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name, bool inject_crt) {
-        preopt.run(M, empty.MAM);
-        if (bc_fname || obj_fname || asm_fname) {
-            assert(!verifyModule(M, &errs()));
-            optimizer.run(M);
-            assert(!verifyModule(M, &errs()));
-        }
-
-        if (inject_crt) {
-            // We would like to emit an alias or an weakref alias to redirect these symbols
-            // but LLVM doesn't let us emit a GlobalAlias to a declaration...
-            // So for now we inject a definition of these functions that calls our runtime
-            // functions. We do so after optimization to avoid cloning these functions.
-            injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
-                    FunctionType::get(Type::getFloatTy(Context), { Type::getHalfTy(Context) }, false));
-            injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
-                    FunctionType::get(Type::getFloatTy(Context), { Type::getHalfTy(Context) }, false));
-            injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
-                    FunctionType::get(Type::getHalfTy(Context), { Type::getFloatTy(Context) }, false));
-            injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
-                    FunctionType::get(Type::getHalfTy(Context), { Type::getFloatTy(Context) }, false));
-            injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
-                    FunctionType::get(Type::getHalfTy(Context), { Type::getDoubleTy(Context) }, false));
-
-#if defined(_OS_WINDOWS_)
-            // Windows expect that the function `_DllMainStartup` is present in an dll.
-            // Normal compilers use something like Zig's crtdll.c instead we provide a
-            // a stub implementation.
-            auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo();
-            auto T_int32 = Type::getInt32Ty(Context);
-            auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false);
-            auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", M);
-            F->setCallingConv(CallingConv::X86_StdCall);
-
-            llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", F));
-            builder.CreateRet(ConstantInt::get(T_int32, 1));
-#endif
-        }
-
-        postopt.run(M, empty.MAM);
-
-        // Get target by snooping on multiversioning
-        GlobalVariable *target_ids = M.getNamedGlobal("jl_dispatch_target_ids");
-        if (s && target_ids) {
-            if(auto targets = dyn_cast<ConstantDataArray>(target_ids->getInitializer())) {
-                auto rawTargets = targets->getRawDataValues();
-                write_int32(s, rawTargets.size());
-                ios_write(s, rawTargets.data(), rawTargets.size());
-            };
+                                     "jl_RTLD_DEFAULT_handle_pointer"), TheTriple);
+
+        // let the compiler know we are going to internalize a copy of this,
+        // if it has a current usage with ExternalLinkage
+        auto small_typeof_copy = dataM->getGlobalVariable("small_typeof");
+        if (small_typeof_copy) {
+            small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+            small_typeof_copy->setDSOLocal(true);
         }
+    }
 
-        emitter.run(M);
+    // Reserve space for the output files and names
+    // DO NOT DELETE, this is necessary to ensure memorybuffers
+    // have a stable backing store for both their object files and
+    // their names
+    outputs.reserve((threads + 1) * (!!unopt_bc_fname + !!bc_fname + !!obj_fname + !!asm_fname) * 2);
 
-        if (unopt_bc_fname)
-            emit_result(unopt_bc_Archive, unopt_bc_Buffer, unopt_bc_Name, outputs);
-        if (bc_fname)
-            emit_result(bc_Archive, bc_Buffer, bc_Name, outputs);
-        if (obj_fname)
-            emit_result(obj_Archive, obj_Buffer, obj_Name, outputs);
-        if (asm_fname)
-            emit_result(asm_Archive, asm_Buffer, asm_Name, outputs);
-    };
+    auto compile = [&](Module &M, StringRef name, unsigned threads) { add_output(
+            M, *SourceTM, outputs, name,
+            unopt_bc_Archive, bc_Archive, obj_Archive, asm_Archive,
+            !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname,
+            threads, module_info
+    ); };
 
-    add_output(*dataM, "unopt.bc", "text.bc", "text.o", "text.s", true);
+    compile(*dataM, "text", threads);
 
-    orc::ThreadSafeModule sysimage(std::make_unique<Module>("sysimage", Context), TSCtx);
-    auto sysimageM = sysimage.getModuleUnlocked();
+    auto sysimageM = std::make_unique<Module>("sysimage", Context);
     sysimageM->setTargetTriple(dataM->getTargetTriple());
     sysimageM->setDataLayout(dataM->getDataLayout());
 #if JL_LLVM_VERSION >= 130000
     sysimageM->setStackProtectorGuard(dataM->getStackProtectorGuard());
     sysimageM->setOverrideStackAlignment(dataM->getOverrideStackAlignment());
 #endif
+
+    if (TheTriple.isOSWindows()) {
+        // Windows expect that the function `_DllMainStartup` is present in an dll.
+        // Normal compilers use something like Zig's crtdll.c instead we provide a
+        // a stub implementation.
+        auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo();
+        auto T_int32 = Type::getInt32Ty(Context);
+        auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false);
+        auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", *sysimageM);
+        F->setCallingConv(CallingConv::X86_StdCall);
+
+        llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
+        builder.CreateRet(ConstantInt::get(T_int32, 1));
+    }
+    bool has_veccall = dataM->getModuleFlag("julia.mv.veccall");
     data->M = orc::ThreadSafeModule(); // free memory for data->M
 
     if (sysimg_data) {
         Constant *data = ConstantDataArray::get(Context,
             ArrayRef<uint8_t>((const unsigned char*)sysimg_data, sysimg_len));
-        addComdat(new GlobalVariable(*sysimageM, data->getType(), false,
+        auto sysdata = new GlobalVariable(*sysimageM, data->getType(), false,
                                      GlobalVariable::ExternalLinkage,
-                                     data, "jl_system_image_data"))->setAlignment(Align(64));
+                                     data, "jl_system_image_data");
+        sysdata->setAlignment(Align(64));
+        addComdat(sysdata, TheTriple);
         Constant *len = ConstantInt::get(T_size, sysimg_len);
         addComdat(new GlobalVariable(*sysimageM, len->getType(), true,
                                      GlobalVariable::ExternalLinkage,
-                                     len, "jl_system_image_size"));
+                                     len, "jl_system_image_size"), TheTriple);
+    }
+    if (imaging_mode) {
+        auto specs = jl_get_llvm_clone_targets();
+        const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
+        std::vector<uint8_t> data;
+        auto push_i32 = [&] (uint32_t v) {
+            uint8_t buff[4];
+            memcpy(buff, &v, 4);
+            data.insert(data.end(), buff, buff + 4);
+        };
+        push_i32(specs.size());
+        for (uint32_t i = 0; i < specs.size(); i++) {
+            push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
+            auto &specdata = specs[i].data;
+            data.insert(data.end(), specdata.begin(), specdata.end());
+        }
+        auto value = ConstantDataArray::get(Context, data);
+        auto target_ids = new GlobalVariable(*sysimageM, value->getType(), true,
+                                      GlobalVariable::InternalLinkage,
+                                      value, "jl_dispatch_target_ids");
+        auto shards = emit_shard_table(*sysimageM, T_size, T_psize, threads);
+        auto ptls = emit_ptls_table(*sysimageM, T_size, T_psize);
+        auto header = emit_image_header(*sysimageM, threads, nfvars, ngvars);
+        auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*));
+        auto small_typeof_copy = new GlobalVariable(*sysimageM, AT, false,
+                                                    GlobalVariable::ExternalLinkage,
+                                                    Constant::getNullValue(AT),
+                                                    "small_typeof");
+        small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+        small_typeof_copy->setDSOLocal(true);
+        AT = ArrayType::get(T_psize, 5);
+        auto pointers = new GlobalVariable(*sysimageM, AT, false,
+                                           GlobalVariable::ExternalLinkage,
+                                           ConstantArray::get(AT, {
+                                                ConstantExpr::getBitCast(header, T_psize),
+                                                ConstantExpr::getBitCast(shards, T_psize),
+                                                ConstantExpr::getBitCast(ptls, T_psize),
+                                                ConstantExpr::getBitCast(small_typeof_copy, T_psize),
+                                                ConstantExpr::getBitCast(target_ids, T_psize)
+                                           }),
+                                           "jl_image_pointers");
+        addComdat(pointers, TheTriple);
+        if (s) {
+            write_int32(s, data.size());
+            ios_write(s, (const char *)data.data(), data.size());
+        }
     }
-    add_output(*sysimageM, "data.bc", "data.bc", "data.o", "data.s", false);
+
+    compile(*sysimageM, "data", 1);
 
     object::Archive::Kind Kind = getDefaultForHost(TheTriple);
     if (unopt_bc_fname)
@@ -837,6 +1801,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
                 PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
             }
         }
+#if JL_LLVM_VERSION < 150000
 #if defined(_COMPILER_ASAN_ENABLED_)
         PM->add(createAddressSanitizerFunctionPass());
 #endif
@@ -845,6 +1810,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
 #endif
 #if defined(_COMPILER_TSAN_ENABLED_)
         PM->add(createThreadSanitizerLegacyPassPass());
+#endif
 #endif
         return;
     }
@@ -996,6 +1962,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     }
     PM->add(createCombineMulAddPass());
     PM->add(createDivRemPairsPass());
+#if JL_LLVM_VERSION < 150000
 #if defined(_COMPILER_ASAN_ENABLED_)
     PM->add(createAddressSanitizerFunctionPass());
 #endif
@@ -1005,6 +1972,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
 #if defined(_COMPILER_TSAN_ENABLED_)
     PM->add(createThreadSanitizerLegacyPassPass());
 #endif
+#endif
 }
 
 // An LLVM module pass that just runs all julia passes in order. Useful for
@@ -1046,7 +2014,7 @@ static RegisterPass<JuliaPipeline<0,true>> XS("juliaO0-sysimg", "Runs the entire
 static RegisterPass<JuliaPipeline<2,true>> YS("julia-sysimg", "Runs the entire julia pipeline (at -O2/sysimg mode)", false, false);
 static RegisterPass<JuliaPipeline<3,true>> ZS("juliaO3-sysimg", "Runs the entire julia pipeline (at -O3/sysimg mode)", false, false);
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int lower_intrinsics) {
     addOptimizationPasses(unwrap(PM), opt_level, lower_intrinsics);
 }
@@ -1056,7 +2024,7 @@ void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int l
 // for use in reflection from Julia.
 // this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
 // misuse will leak memory or cause read-after-free
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
 {
     if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
@@ -1069,28 +2037,31 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
     // get the source code for this function
     jl_value_t *jlrettype = (jl_value_t*)jl_any_type;
     jl_code_info_t *src = NULL;
-    JL_GC_PUSH2(&src, &jlrettype);
-    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && jl_ir_flag_inferred((jl_array_t*)mi->def.method->source)) {
+    jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH3(&src, &jlrettype, &codeinst);
+    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && mi->def.method->source != jl_nothing && jl_ir_flag_inferred(mi->def.method->source)) {
         src = (jl_code_info_t*)mi->def.method->source;
         if (src && !jl_is_code_info(src))
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
-    } else {
-        jl_value_t *ci = jl_rettype_inferred(mi, world, world);
+            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
+    }
+    else {
+        jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
         if (ci != jl_nothing) {
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
+            codeinst = (jl_code_instance_t*)ci;
             src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
             if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
             jlrettype = codeinst->rettype;
+            codeinst = NULL; // not needed outside of this branch
         }
         if (!src || (jl_value_t*)src == jl_nothing) {
             src = jl_type_infer(mi, world, 0);
             if (src)
                 jlrettype = src->rettype;
             else if (jl_is_method(mi->def.method)) {
-                src = mi->def.method->generator ? jl_code_for_staged(mi) : (jl_code_info_t*)mi->def.method->source;
-                if (src && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
+                src = mi->def.method->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)mi->def.method->source;
+                if (src && (jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
+                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
             }
             // TODO: use mi->uninferred
         }
@@ -1105,7 +2076,10 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
         if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
         JL_LOCK(&jl_codegen_lock);
-        jl_codegen_params_t output(*ctx);
+        auto target_info = m.withModuleDo([&](Module &M) {
+            return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+        });
+        jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
         output.world = world;
         output.params = &params;
         auto decls = jl_emit_code(m, mi, src, jlrettype, output);
diff --git a/src/array.c b/src/array.c
index 86b1056ef4d07..8a064583bbc9e 100644
--- a/src/array.c
+++ b/src/array.c
@@ -510,7 +510,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
         s = jl_mmtk_gc_alloc_big(ptls, allocsz);
 #endif
     }
-    jl_set_typeof(s, jl_string_type);
+    jl_set_typetagof(s, jl_string_tag, 0);
     maybe_record_alloc_to_profile(s, len, jl_string_type);
     *(size_t*)s = len;
     jl_string_data(s)[len] = 0;
@@ -628,7 +628,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
     if (i >= jl_array_len(a))
         jl_bounds_error_int((jl_value_t*)a, i + 1);
     if (a->flags.ptrarray)
-        jl_atomic_store_release(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
+        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
     else if (a->flags.hasptr) {
         size_t elsize = a->elsize;
         jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
@@ -1162,7 +1162,7 @@ JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
     if (sz <= a->maxsize) {
         size_t dec = a->maxsize - sz;
         //if we don't save at least an eighth of maxsize then its not worth it to shrink
-        if (dec < a->maxsize / 8) return;
+        if (dec <= a->maxsize / 8) return;
         jl_array_shrink(a, dec);
     }
     else {
@@ -1198,7 +1198,7 @@ JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
 
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
 {
-    assert(jl_typeis(a, jl_array_any_type));
+    assert(jl_typetagis(a, jl_array_any_type));
     jl_array_grow_end(a, 1);
     size_t n = jl_array_nrows(a);
     jl_array_ptr_set(a, n - 1, item);
@@ -1206,8 +1206,8 @@ JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
 
 JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2)
 {
-    assert(jl_typeis(a, jl_array_any_type));
-    assert(jl_typeis(a2, jl_array_any_type));
+    assert(jl_typetagis(a, jl_array_any_type));
+    assert(jl_typetagis(a2, jl_array_any_type));
     size_t i;
     size_t n = jl_array_nrows(a);
     size_t n2 = jl_array_nrows(a2);
diff --git a/src/ast.c b/src/ast.c
index 0ff7c882ab8e7..97bbc6e8227ba 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -66,7 +66,6 @@ JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
 JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
 JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
 JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
-JL_DLLEXPORT jl_sym_t *jl_pure_sym;
 JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
 JL_DLLEXPORT jl_sym_t *jl_meta_sym;
 JL_DLLEXPORT jl_sym_t *jl_inert_sym;
@@ -328,7 +327,6 @@ void jl_init_common_symbols(void)
     jl_newvar_sym = jl_symbol("newvar");
     jl_copyast_sym = jl_symbol("copyast");
     jl_loopinfo_sym = jl_symbol("loopinfo");
-    jl_pure_sym = jl_symbol("pure");
     jl_meta_sym = jl_symbol("meta");
     jl_list_sym = jl_symbol("list");
     jl_unused_sym = jl_symbol("#unused#");
@@ -688,8 +686,8 @@ static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v, int ch
     if (check_valid) {
         if (jl_is_ssavalue(v))
             lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
-        if (jl_is_slot(v))
-            lerror(fl_ctx, symbol(fl_ctx, "error"), "Slot objects should not occur in an AST");
+        if (jl_is_slotnumber(v))
+            lerror(fl_ctx, symbol(fl_ctx, "error"), "SlotNumber objects should not occur in an AST");
     }
     value_t opaque = cvalue(fl_ctx, jl_ast_ctx(fl_ctx)->jvtype, sizeof(void*));
     *(jl_value_t**)cv_data((cvalue_t*)ptr(opaque)) = v;
@@ -702,11 +700,11 @@ static value_t julia_to_scm_noalloc(fl_context_t *fl_ctx, jl_value_t *v, int che
     if (julia_to_scm_noalloc1(fl_ctx, v, &retval))
         return retval;
     assert(!jl_is_expr(v) &&
-           !jl_typeis(v, jl_linenumbernode_type) &&
-           !jl_typeis(v, jl_gotonode_type) &&
-           !jl_typeis(v, jl_quotenode_type) &&
-           !jl_typeis(v, jl_newvarnode_type) &&
-           !jl_typeis(v, jl_globalref_type));
+           !jl_typetagis(v, jl_linenumbernode_type) &&
+           !jl_typetagis(v, jl_gotonode_type) &&
+           !jl_typetagis(v, jl_quotenode_type) &&
+           !jl_typetagis(v, jl_newvarnode_type) &&
+           !jl_typetagis(v, jl_globalref_type));
     return julia_to_scm_noalloc2(fl_ctx, v, check_valid);
 }
 
@@ -747,7 +745,7 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
     // GC Note: jl_fieldref(v, 0) allocates for GotoNode
     //          but we don't need a GC root here because julia_to_list2_noalloc
     //          shouldn't allocate in this case.
-    if (jl_typeis(v, jl_linenumbernode_type)) {
+    if (jl_typetagis(v, jl_linenumbernode_type)) {
         jl_value_t *file = jl_fieldref_noalloc(v,1);
         jl_value_t *line = jl_fieldref(v,0);
         value_t args = julia_to_list2_noalloc(fl_ctx, line, file, check_valid);
@@ -757,13 +755,13 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
         fl_free_gc_handles(fl_ctx, 1);
         return scmv;
     }
-    if (jl_typeis(v, jl_gotonode_type))
+    if (jl_typetagis(v, jl_gotonode_type))
         return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_goto_sym, jl_fieldref(v,0), check_valid);
-    if (jl_typeis(v, jl_quotenode_type))
+    if (jl_typetagis(v, jl_quotenode_type))
         return julia_to_list2(fl_ctx, (jl_value_t*)jl_inert_sym, jl_fieldref_noalloc(v,0), 0);
-    if (jl_typeis(v, jl_newvarnode_type))
+    if (jl_typetagis(v, jl_newvarnode_type))
         return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_newvar_sym, jl_fieldref(v,0), check_valid);
-    if (jl_typeis(v, jl_globalref_type)) {
+    if (jl_typetagis(v, jl_globalref_type)) {
         jl_module_t *m = jl_globalref_mod(v);
         jl_sym_t *sym = jl_globalref_name(v);
         if (m == jl_core_module)
@@ -785,7 +783,8 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
                                      jl_value_t *filename, size_t lineno,
                                      size_t offset, jl_value_t *options)
 {
-    JL_TIMING(PARSING);
+    JL_TIMING(PARSING, PARSING);
+    jl_timing_show_filename(jl_string_data(filename), JL_TIMING_CURRENT_BLOCK);
     if (offset > text_len) {
         jl_value_t *textstr = jl_pchar_to_string(text, text_len);
         JL_GC_PUSH1(&textstr);
@@ -1002,7 +1001,7 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
 static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error)
 {
     jl_task_t *ct = jl_current_task;
-    JL_TIMING(MACRO_INVOCATION);
+    JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION);
     size_t nargs = jl_array_len(args) + 1;
     JL_NARGSV("macrocall", 3); // macro name, location, and module
     jl_value_t **margs;
@@ -1012,7 +1011,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     // __source__ argument
     jl_value_t *lno = jl_array_ptr_ref(args, 1);
     margs[1] = lno;
-    if (!jl_typeis(lno, jl_linenumbernode_type)) {
+    if (!jl_typetagis(lno, jl_linenumbernode_type)) {
         margs[1] = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing);
     }
     margs[2] = (jl_value_t*)inmodule;
@@ -1026,10 +1025,10 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     jl_value_t *result;
     JL_TRY {
         margs[0] = jl_toplevel_eval(*ctx, margs[0]);
-        jl_method_instance_t *mfunc = jl_method_lookup(margs, nargs, world);
+        jl_method_instance_t *mfunc = jl_method_lookup(margs, nargs, ct->world_age);
         JL_GC_PROMISE_ROOTED(mfunc);
         if (mfunc == NULL) {
-            jl_method_error(margs[0], &margs[1], nargs, world);
+            jl_method_error(margs[0], &margs[1], nargs, ct->world_age);
             // unreachable
         }
         *ctx = mfunc->def.method->module;
@@ -1141,7 +1140,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
 
 JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_atomic_load_acquire(&jl_world_counter), 0);
@@ -1152,7 +1151,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
 
 JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_atomic_load_acquire(&jl_world_counter), 0);
@@ -1178,7 +1177,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmod
 JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
                                             const char *file, int line, size_t world)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
@@ -1191,7 +1190,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmod
 JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *inmodule,
                                                  const char *file, int line)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     jl_array_t *kwargs = NULL;
     JL_GC_PUSH2(&expr, &kwargs);
     expr = jl_copy_ast(expr);
@@ -1239,7 +1238,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
 JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
                                                  const char *file, int line)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
@@ -1260,8 +1259,8 @@ JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule)
 // Internal C entry point to parser
 // `text` is passed as a pointer to allow raw non-String buffers to be used
 // without copying.
-JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                                  size_t lineno, size_t offset, jl_value_t *options)
+jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
+                     size_t lineno, size_t offset, jl_value_t *options)
 {
     jl_value_t *core_parse = NULL;
     if (jl_core_module) {
diff --git a/src/ast.scm b/src/ast.scm
index bbb2180a8a92f..88220c03a7aa6 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -226,13 +226,13 @@
                               ""))
                         "")
                     (string.rep "    " ilvl) "end"))
-	   ((do)
-	    (let ((call (cadr e))
-		  (args (cdr (cadr (caddr e))))
-		  (body (caddr (caddr e))))
-	      (deparse-block (string (deparse call) " do" (if (null? args) "" " ")
-				     (deparse-arglist args))
-			     (cdr body) ilvl)))
+           ((do)
+            (let ((call (cadr e))
+                  (args (cdr (cadr (caddr e))))
+                  (body (caddr (caddr e))))
+              (deparse-block (string (deparse call) " do" (if (null? args) "" " ")
+                                     (deparse-arglist args))
+                             (cdr body) ilvl)))
            ((struct)
             (string (if (equal? (cadr e) '(true)) "mutable " "")
                     "struct "
@@ -329,8 +329,8 @@
         (else
          (case (car v)
            ((...)
-	    (arg-name (cadr v)) ;; to check for errors
-	    (decl-var (cadr v)))
+            (arg-name (cadr v)) ;; to check for errors
+            (decl-var (cadr v)))
            ((|::|)
             (if (not (symbol? (cadr v)))
                 (bad-formal-argument (cadr v)))
diff --git a/src/builtins.c b/src/builtins.c
index b090e952cc1cf..a6c904c851c95 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -35,8 +35,8 @@ extern "C" {
 static int bits_equal(const void *a, const void *b, int sz) JL_NOTSAFEPOINT
 {
     switch (sz) {
-    case 1:  return *(int8_t*)a == *(int8_t*)b;
-        // Let compiler constant folds the following.
+    case 1:  return *(uint8_t*)a == *(uint8_t*)b;
+        // Let compiler constant folds the following, though we may not know alignment of them
     case 2:  return memcmp(a, b, 2) == 0;
     case 4:  return memcmp(a, b, 4) == 0;
     case 8:  return memcmp(a, b, 8) == 0;
@@ -147,10 +147,10 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
 {
     if (a == b)
         return 1;
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
-    if (dt != (jl_datatype_t*)jl_typeof(b))
+    uintptr_t dtag = jl_typetagof(a);
+    if (dtag != jl_typetagof(b))
         return 0;
-    if (dt == jl_datatype_type) {
+    if (dtag == jl_datatype_tag << 4) {
         jl_datatype_t *dta = (jl_datatype_t*)a;
         jl_datatype_t *dtb = (jl_datatype_t*)b;
         if (dta->name != dtb->name)
@@ -164,7 +164,7 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
         }
         return 1;
     }
-    if (dt == jl_tvar_type) {
+    if (dtag == jl_tvar_tag << 4) {
         jl_typeenv_t *pe = env;
         while (pe != NULL) {
             if (pe->var == (jl_tvar_t*)a)
@@ -173,7 +173,7 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
         }
         return 0;
     }
-    if (dt == jl_unionall_type) {
+    if (dtag == jl_unionall_tag << 4) {
         jl_unionall_t *ua = (jl_unionall_t*)a;
         jl_unionall_t *ub = (jl_unionall_t*)b;
         if (tvar_names && ua->var->name != ub->var->name)
@@ -183,11 +183,11 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
         jl_typeenv_t e = { ua->var, (jl_value_t*)ub->var, env };
         return egal_types(ua->body, ub->body, &e, tvar_names);
     }
-    if (dt == jl_uniontype_type) {
+    if (dtag == jl_uniontype_tag << 4) {
         return egal_types(((jl_uniontype_t*)a)->a, ((jl_uniontype_t*)b)->a, env, tvar_names) &&
             egal_types(((jl_uniontype_t*)a)->b, ((jl_uniontype_t*)b)->b, env, tvar_names);
     }
-    if (dt == jl_vararg_type) {
+    if (dtag == jl_vararg_tag << 4) {
         jl_vararg_t *vma = (jl_vararg_t*)a;
         jl_vararg_t *vmb = (jl_vararg_t*)b;
         jl_value_t *vmaT = vma->T ? vma->T : (jl_value_t*)jl_any_type;
@@ -198,10 +198,8 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
             return egal_types(vma->N, vmb->N, env, tvar_names);
         return !vma->N && !vmb->N;
     }
-    if (dt == jl_symbol_type || dt == jl_module_type)
-        return 0;
-    assert(!dt->name->mutabl);
-    return jl_egal__bits(a, b, dt);
+    assert(dtag == jl_symbol_tag << 4 || dtag == jl_module_tag << 4 || !((jl_datatype_t*)jl_typeof(a))->name->mutabl);
+    return jl_egal__bitstag(a, b, dtag);
 }
 
 JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b)
@@ -215,36 +213,72 @@ JL_DLLEXPORT int (jl_egal)(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value
     return jl_egal(a, b);
 }
 
-JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
     // warning: a,b may NOT have been gc-rooted by the caller
-    return jl_egal__unboxed_(a, b, dt);
-}
-
-int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
-{
-    if (dt == jl_simplevector_type)
-        return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
-    if (dt == jl_datatype_type) {
-        jl_datatype_t *dta = (jl_datatype_t*)a;
-        jl_datatype_t *dtb = (jl_datatype_t*)b;
-        if (dta->name != dtb->name)
+    return jl_egal__unboxed_(a, b, dtag);
+}
+
+JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
+{
+    if (dtag < jl_max_tags << 4) {
+        switch ((enum jlsmall_typeof_tags)(dtag >> 4)) {
+        case jl_int8_tag:
+        case jl_uint8_tag:
+            return *(uint8_t*)a == *(uint8_t*)b;
+        case jl_int16_tag:
+        case jl_uint16_tag:
+            return *(uint16_t*)a == *(uint16_t*)b;
+        case jl_int32_tag:
+        case jl_uint32_tag:
+        case jl_char_tag:
+            return *(uint32_t*)a == *(uint32_t*)b;
+        case jl_int64_tag:
+        case jl_uint64_tag:
+            return *(uint64_t*)a == *(uint64_t*)b;
+        case jl_unionall_tag:
+            return egal_types(a, b, NULL, 1);
+        case jl_uniontype_tag:
+            return compare_fields(a, b, jl_uniontype_type);
+        case jl_vararg_tag:
+            return compare_fields(a, b, jl_vararg_type);
+        case jl_task_tag:
+        case jl_tvar_tag:
+        case jl_symbol_tag:
+        case jl_module_tag:
+        case jl_bool_tag:
             return 0;
-        if (dta->name != jl_tuple_typename && (dta->isconcretetype || dtb->isconcretetype))
-            return 0;
-        return compare_svec(dta->parameters, dtb->parameters);
-    }
-    if (dt == jl_string_type) {
-        size_t l = jl_string_len(a);
-        if (jl_string_len(b) != l)
-            return 0;
-        return !memcmp(jl_string_data(a), jl_string_data(b), l);
+        case jl_simplevector_tag:
+            return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
+        case jl_string_tag: {
+                size_t l = jl_string_len(a);
+                if (jl_string_len(b) != l)
+                    return 0;
+                return !memcmp(jl_string_data(a), jl_string_data(b), l);
+            }
+        case jl_datatype_tag: {
+                jl_datatype_t *dta = (jl_datatype_t*)a;
+                jl_datatype_t *dtb = (jl_datatype_t*)b;
+                if (dta->name != dtb->name)
+                    return 0;
+                if (dta->name != jl_tuple_typename && (dta->isconcretetype || dtb->isconcretetype))
+                    return 0;
+                return compare_svec(dta->parameters, dtb->parameters);
+            }
+#ifndef NDEBUG
+        default:
+#endif
+        case jl_max_tags:
+        case jl_null_tag:
+        case jl_typeofbottom_tag:
+        case jl_tags_count:
+            abort();
+        }
     }
-    assert(0 && "unreachable");
-    return 0;
+    return jl_egal__bits(a, b, (jl_datatype_t*)dtag);
 }
 
-int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+inline int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     size_t sz = jl_datatype_size(dt);
     if (sz == 0)
@@ -252,8 +286,6 @@ int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_
     size_t nf = jl_datatype_nfields(dt);
     if (nf == 0 || !dt->layout->haspadding)
         return bits_equal(a, b, sz);
-    if (dt == jl_unionall_type)
-        return egal_types(a, b, NULL, 1);
     return compare_fields(a, b, dt);
 }
 
@@ -347,6 +379,8 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
     }
     if (tv == jl_symbol_type)
         return ((jl_sym_t*)v)->hash;
+    if (tv == jl_module_type)
+        return ((jl_module_t*)v)->hash;
     assert(!tv->name->mutabl);
     return immut_id_(tv, v, tv->hash);
 }
@@ -1318,7 +1352,7 @@ JL_CALLABLE(jl_f_apply_type)
                 jl_type_error_rt("Tuple", "parameter", (jl_value_t*)jl_type_type, pi);
             }
         }
-        return (jl_value_t*)jl_apply_tuple_type_v(&args[1], nargs-1);
+        return jl_apply_tuple_type_v(&args[1], nargs-1);
     }
     else if (args[0] == (jl_value_t*)jl_uniontype_type) {
         // Union{} has extra restrictions, so it needs to be checked after
@@ -1415,6 +1449,7 @@ JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_
         jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub);
     jl_task_t *ct = jl_current_task;
     jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ct->ptls, sizeof(jl_tvar_t), jl_tvar_type);
+    jl_set_typetagof(tv, jl_tvar_tag, 0);
     tv->name = name;
     tv->lb = lb;
     tv->ub = ub;
@@ -1648,7 +1683,7 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
             if (!jl_has_free_typevars(tb) || !jl_egal(ta, tb))
                 return 0;
         }
-        else if (jl_has_free_typevars(tb) || jl_typeof(ta) != jl_typeof(tb) ||
+        else if (jl_has_free_typevars(tb) || jl_typetagof(ta) != jl_typetagof(tb) ||
                  !jl_types_equal(ta, tb)) {
             return 0;
         }
@@ -1761,7 +1796,7 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     if (!jl_is_datatype(dta))
         return 0;
     jl_datatype_t *dtb = (jl_datatype_t*)jl_unwrap_unionall(tb);
-    if (!(jl_typeof(dta) == jl_typeof(dtb) &&
+    if (!(jl_typetagof(dta) == jl_typetagof(dtb) &&
           dta->name->name == dtb->name->name &&
           dta->name->abstract == dtb->name->abstract &&
           dta->name->mutabl == dtb->name->mutabl &&
@@ -1891,7 +1926,7 @@ static void add_intrinsic_properties(enum intrinsic f, unsigned nargs, void (*pf
 
 static void add_intrinsic(jl_module_t *inm, const char *name, enum intrinsic f) JL_GC_DISABLED
 {
-    jl_value_t *i = jl_permbox32(jl_intrinsic_type, (int32_t)f);
+    jl_value_t *i = jl_permbox32(jl_intrinsic_type, 0, (int32_t)f);
     jl_sym_t *sym = jl_symbol(name);
     jl_set_const(inm, sym, i);
     jl_module_export(inm, sym);
@@ -1918,6 +1953,11 @@ void jl_init_intrinsic_functions(void) JL_GC_DISABLED
         (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type),
         "OpaqueClosure", jl_f_opaque_closure_call);
 
+    // Save a reference to the just created OpaqueClosure method, so we can provide special
+    // codegen for it later.
+    jl_opaque_closure_method = (jl_method_t*)jl_methtable_lookup(jl_opaque_closure_typename->mt,
+        (jl_value_t*)jl_anytuple_type, 1);
+
 #define ADD_I(name, nargs) add_intrinsic(inm, #name, name);
 #define ADD_HIDDEN(name, nargs)
 #define ALIAS ADD_I
@@ -2020,6 +2060,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Tuple", (jl_value_t*)jl_anytuple_type);
     add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type);
     add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type);
+    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL));
 
     add_builtin("Module", (jl_value_t*)jl_module_type);
     add_builtin("MethodTable", (jl_value_t*)jl_methtable_type);
@@ -2029,9 +2070,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("TypeMapLevel", (jl_value_t*)jl_typemap_level_type);
     add_builtin("Symbol", (jl_value_t*)jl_symbol_type);
     add_builtin("SSAValue", (jl_value_t*)jl_ssavalue_type);
-    add_builtin("Slot", (jl_value_t*)jl_abstractslot_type);
     add_builtin("SlotNumber", (jl_value_t*)jl_slotnumber_type);
-    add_builtin("TypedSlot", (jl_value_t*)jl_typedslot_type);
     add_builtin("Argument", (jl_value_t*)jl_argument_type);
     add_builtin("Const", (jl_value_t*)jl_const_type);
     add_builtin("PartialStruct", (jl_value_t*)jl_partial_struct_type);
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 2dea1e07ca45b..90f7417c03524 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -57,11 +57,10 @@ GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M)
 static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_name,
                             GlobalVariable *&lib, GlobalVariable *&sym)
 {
-    auto M = &ctx.emission_context.shared_module(*jl_Module);
+    auto M = &ctx.emission_context.shared_module();
     bool runtime_lib = false;
     GlobalVariable *libptrgv;
     jl_codegen_params_t::SymMapGV *symMap;
-#ifdef _OS_WINDOWS_
     if ((intptr_t)f_lib == (intptr_t)JL_EXE_LIBNAME) {
         libptrgv = prepare_global_in(M, jlexe_var);
         symMap = &ctx.emission_context.symMapExe;
@@ -74,9 +73,7 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
         libptrgv = prepare_global_in(M, jldll_var);
         symMap = &ctx.emission_context.symMapDll;
     }
-    else
-#endif
-    if (f_lib == NULL) {
+    else if (f_lib == NULL) {
         libptrgv = jl_emit_RTLD_DEFAULT_var(M);
         symMap = &ctx.emission_context.symMapDefault;
     }
@@ -168,7 +165,7 @@ static Value *runtime_sym_lookup(
         }
         else {
             // f_lib is actually one of the special sentinel values
-            libname = ConstantExpr::getIntToPtr(ConstantInt::get(getSizeTy(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
+            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
         }
         llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
                     { libname, nameval, libptrgv });
@@ -236,7 +233,7 @@ static GlobalVariable *emit_plt_thunk(
         bool runtime_lib)
 {
     ++PLTThunks;
-    auto M = &ctx.emission_context.shared_module(*jl_Module);
+    auto M = &ctx.emission_context.shared_module();
     PointerType *funcptype = PointerType::get(functype, 0);
     libptrgv = prepare_global_in(M, libptrgv);
     llvmgv = prepare_global_in(M, llvmgv);
@@ -279,14 +276,13 @@ static GlobalVariable *emit_plt_thunk(
     else {
         // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
         // Known failures includes vararg (not needed here) and sret.
-
-#if (defined(_CPU_X86_) || defined(_CPU_X86_64_) || (defined(_CPU_AARCH64_) && !defined(_OS_DARWIN_)))
-        // Ref https://bugs.llvm.org/show_bug.cgi?id=47058
-        // LLVM, as of 10.0.1 emits wrong/worse code when musttail is set
-        // Apple silicon macs give an LLVM ERROR if musttail is set here #44107.
-        if (!attrs.hasAttrSomewhere(Attribute::ByVal))
-            ret->setTailCallKind(CallInst::TCK_MustTail);
-#endif
+        if (ctx.emission_context.TargetTriple.isX86() || (ctx.emission_context.TargetTriple.isAArch64() && !ctx.emission_context.TargetTriple.isOSDarwin())) {
+            // Ref https://bugs.llvm.org/show_bug.cgi?id=47058
+            // LLVM, as of 10.0.1 emits wrong/worse code when musttail is set
+            // Apple silicon macs give an LLVM ERROR if musttail is set here #44107.
+            if (!attrs.hasAttrSomewhere(Attribute::ByVal))
+                ret->setTailCallKind(CallInst::TCK_MustTail);
+        }
         if (functype->getReturnType() == getVoidTy(irbuilder.getContext())) {
             irbuilder.CreateRetVoid();
         }
@@ -470,7 +466,7 @@ static Value *runtime_apply_type_env(jl_codectx_t &ctx, jl_value_t *ty)
         ctx.builder.CreateInBoundsGEP(
                 ctx.types().T_prjlvalue,
                 ctx.spvals_ptr,
-                ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(jl_svec_t) / sizeof(jl_value_t*)))
+                ConstantInt::get(ctx.types().T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*)))
     };
     auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args));
     addRetAttr(call, Attribute::getWithAlignment(ctx.builder.getContext(), Align(16)));
@@ -517,7 +513,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
                 ctx.builder.CreateCondBr(istype, passBB, failBB);
 
                 ctx.builder.SetInsertPoint(failBB);
-                emit_type_error(ctx, mark_julia_type(ctx, vx, true, jl_any_type), boxed(ctx, jlto_runtime), msg);
+                just_emit_type_error(ctx, mark_julia_type(ctx, vx, true, jl_any_type), boxed(ctx, jlto_runtime), msg);
                 ctx.builder.CreateUnreachable();
                 ctx.builder.SetInsertPoint(passBB);
             }
@@ -569,8 +565,15 @@ typedef struct {
     jl_value_t *gcroot;
 } native_sym_arg_t;
 
+static inline const char *invalid_symbol_err_msg(bool ccall)
+{
+    return ccall ?
+        "ccall: first argument not a pointer or valid constant expression" :
+        "cglobal: first argument not a pointer or valid constant expression";
+}
+
 // --- parse :sym or (:sym, :lib) argument into address info ---
-static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, const char *fname, bool llvmcall)
+static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, bool ccall, bool llvmcall)
 {
     Value *&jl_ptr = out.jl_ptr;
     void (*&fptr)(void) = out.fptr;
@@ -600,13 +603,11 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         jl_cgval_t arg1 = emit_expr(ctx, arg);
         jl_value_t *ptr_ty = arg1.typ;
         if (!jl_is_cpointer_type(ptr_ty)) {
-            const char *errmsg = !strcmp(fname, "ccall") ?
-                "ccall: first argument not a pointer or valid constant expression" :
-                "cglobal: first argument not a pointer or valid constant expression";
+            const char *errmsg = invalid_symbol_err_msg(ccall);
             emit_cpointercheck(ctx, arg1, errmsg);
         }
         arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
-        jl_ptr = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), arg1, (jl_value_t*)jl_voidpointer_type);
+        jl_ptr = emit_unbox(ctx, ctx.types().T_size, arg1, (jl_value_t*)jl_voidpointer_type);
     }
     else {
         out.gcroot = ptr;
@@ -627,16 +628,12 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
                 std::string iname("i");
                 iname += f_name;
                 if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0)) {
-#ifdef _OS_WINDOWS_
                     f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME;
-#endif
                     f_name = jl_symbol_name(jl_symbol(iname.c_str()));
                 }
-#ifdef _OS_WINDOWS_
                 else {
-                    f_lib = jl_dlfind_win32(f_name);
+                    f_lib = jl_dlfind(f_name);
                 }
-#endif
             }
         }
         else if (jl_is_cpointer_type(jl_typeof(ptr))) {
@@ -648,8 +645,6 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
                 f_name = jl_symbol_name((jl_sym_t*)t0);
             else if (jl_is_string(t0))
                 f_name = jl_string_data(t0);
-            else
-                JL_TYPECHKS(fname, symbol, t0);
 
             jl_value_t *t1 = jl_fieldref(ptr, 1);
             if (jl_is_symbol(t1))
@@ -657,10 +652,7 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
             else if (jl_is_string(t1))
                 f_lib = jl_string_data(t1);
             else
-                JL_TYPECHKS(fname, symbol, t1);
-        }
-        else {
-            JL_TYPECHKS(fname, pointer, ptr);
+                f_name = NULL;
         }
     }
 }
@@ -694,10 +686,18 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     else {
         rt = (jl_value_t*)jl_voidpointer_type;
     }
-    Type *lrt = getSizeTy(ctx.builder.getContext());
+    Type *lrt = ctx.types().T_size;
     assert(lrt == julia_type_to_llvm(ctx, rt));
 
-    interpret_symbol_arg(ctx, sym, args[1], "cglobal", false);
+    interpret_symbol_arg(ctx, sym, args[1], /*ccall=*/false, false);
+
+    if (sym.f_name == NULL && sym.fptr == NULL && sym.jl_ptr == NULL && sym.gcroot != NULL) {
+        const char *errmsg = invalid_symbol_err_msg(/*ccall=*/false);
+        jl_cgval_t arg1 = emit_expr(ctx, args[1]);
+        emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
+        JL_GC_POP();
+        return jl_cgval_t();
+    }
 
     if (sym.jl_ptr != NULL) {
         res = ctx.builder.CreateBitCast(sym.jl_ptr, lrt);
@@ -719,7 +719,8 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
             void *symaddr;
 
             void* libsym = jl_get_library_(sym.f_lib, 0);
-            if (!libsym || !jl_dlsym(libsym, sym.f_name, &symaddr, 0)) {
+            int symbol_found = jl_dlsym(libsym, sym.f_name, &symaddr, 0);
+            if (!libsym || !symbol_found) {
                 // Error mode, either the library or the symbol couldn't be find during compiletime.
                 // Fallback to a runtime symbol lookup.
                 res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
@@ -802,7 +803,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         }
         ir = jl_fieldref(ir, 0);
 
-        if (!jl_is_string(ir) && !jl_typeis(ir, jl_array_uint8_type)) {
+        if (!jl_is_string(ir) && !jl_typetagis(ir, jl_array_uint8_type)) {
             emit_error(ctx, "Module IR passed to llvmcall must be a string or an array of bytes");
             JL_GC_POP();
             return jl_cgval_t();
@@ -832,7 +833,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
      * type. Otherwise we pass a pointer to a jl_value_t.
      */
     std::vector<llvm::Type*> argtypes;
-    Value **argvals = (Value**)alloca(nargt * sizeof(Value*));
+    SmallVector<Value *, 8> argvals(nargt);
     for (size_t i = 0; i < nargt; ++i) {
         jl_value_t *tti = jl_svecref(tt,i);
         bool toboxed;
@@ -957,10 +958,8 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     // copy module properties that should always match
     Mod->setTargetTriple(jl_Module->getTargetTriple());
     Mod->setDataLayout(jl_Module->getDataLayout());
-#if JL_LLVM_VERSION >= 130000
     Mod->setStackProtectorGuard(jl_Module->getStackProtectorGuard());
     Mod->setOverrideStackAlignment(jl_Module->getOverrideStackAlignment());
-#endif
 
     // verify the definition
     Function *def = Mod->getFunction(ir_name);
@@ -979,7 +978,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     Function *decl = Function::Create(decl_typ, def->getLinkage(), def->getAddressSpace(),
                                       def->getName(), jl_Module);
     decl->setAttributes(def->getAttributes());
-    CallInst *inst = ctx.builder.CreateCall(decl, ArrayRef<Value *>(&argvals[0], nargt));
+    CallInst *inst = ctx.builder.CreateCall(decl, argvals);
 
     // save the module to be linked later.
     // we cannot do this right now, because linking mutates the destination module,
@@ -1096,14 +1095,11 @@ std::string generate_func_sig(const char *fname)
             abi->use_sret(jl_voidpointer_type, LLVMCtx);
         }
         else if (abi->use_sret((jl_datatype_t*)rt, LLVMCtx)) {
-#if JL_LLVM_VERSION >= 140000
             AttrBuilder retattrs(LLVMCtx);
-#else
-            AttrBuilder retattrs;
-#endif
-#if !defined(_OS_WINDOWS_) // llvm used to use the old mingw ABI, skipping this marking works around that difference
-            retattrs.addStructRetAttr(lrt);
-#endif
+            if (!ctx->TargetTriple.isOSWindows()) {
+                // llvm used to use the old mingw ABI, skipping this marking works around that difference
+                retattrs.addStructRetAttr(lrt);
+            }
             retattrs.addAttribute(Attribute::NoAlias);
             paramattrs.push_back(AttributeSet::get(LLVMCtx, retattrs));
             fargt_sig.push_back(PointerType::get(lrt, 0));
@@ -1118,11 +1114,7 @@ std::string generate_func_sig(const char *fname)
     }
 
     for (size_t i = 0; i < nccallargs; ++i) {
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder ab(LLVMCtx);
-#else
-        AttrBuilder ab;
-#endif
         jl_value_t *tti = jl_svecref(at, i);
         Type *t = NULL;
         bool isboxed;
@@ -1346,14 +1338,20 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     bool llvmcall = false;
     std::tie(cc, llvmcall) = convert_cconv(cc_sym);
 
-    interpret_symbol_arg(ctx, symarg, args[1], "ccall", llvmcall);
+    interpret_symbol_arg(ctx, symarg, args[1], /*ccall=*/true, llvmcall);
     Value *&jl_ptr = symarg.jl_ptr;
     void (*&fptr)(void) = symarg.fptr;
     const char *&f_name = symarg.f_name;
     const char *&f_lib = symarg.f_lib;
 
     if (f_name == NULL && fptr == NULL && jl_ptr == NULL) {
-        emit_error(ctx, "ccall: null function pointer");
+        if (symarg.gcroot != NULL) { // static_eval(ctx, args[1]) could not be interpreted to a function pointer
+            const char *errmsg = invalid_symbol_err_msg(/*ccall=*/true);
+            jl_cgval_t arg1 = emit_expr(ctx, args[1]);
+            emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
+        } else {
+            emit_error(ctx, "ccall: null function pointer");
+        }
         JL_GC_POP();
         return jl_cgval_t();
     }
@@ -1367,25 +1365,26 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if ((uintptr_t)fptr == ptr)
             return true;
         if (f_lib) {
-#ifdef _OS_WINDOWS_
             if ((f_lib == JL_EXE_LIBNAME) || // preventing invalid pointer access
                 (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
-                (f_lib == JL_LIBJULIA_DL_LIBNAME) ||
-                (!strcmp(f_lib, jl_crtdll_basename))) {
+                (f_lib == JL_LIBJULIA_DL_LIBNAME)) {
+                // libjulia-like
+            }
+            else
+#ifdef _OS_WINDOWS_
+            if (strcmp(f_lib, jl_crtdll_basename) == 0) {
                 // libjulia-like
             }
             else
-                return false;
-#else
-            return false;
 #endif
+            return false;
         }
         return f_name && f_name == name;
     };
 #define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(XSTR(name)))
 
     // emit arguments
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs);
+    SmallVector<jl_cgval_t, 4> argv(nccallargs);
     for (size_t i = 0; i < nccallargs; i++) {
         // Julia (expression) value of current parameter
         jl_value_t *argi = ccallarg(i);
@@ -1468,7 +1467,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     (void)isVa; // prevent compiler warning
     if (is_libjulia_func(jl_array_ptr)) {
         ++CCALL_STAT(jl_array_ptr);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         const jl_cgval_t &ary = argv[0];
         JL_GC_POP();
@@ -1477,14 +1476,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     }
     else if (is_libjulia_func(jl_value_ptr)) {
         ++CCALL_STAT(jl_value_ptr);
-        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == getSizeTy(ctx.builder.getContext()));
+        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         jl_value_t *tti = jl_svecref(at, 0);
         Type *largty;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            largty = getSizeTy(ctx.builder.getContext());
+            largty = ctx.types().T_size;
             isboxed = false;
         }
         else {
@@ -1506,28 +1505,39 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, retval, retboxed, rt, unionall, static_rt);
     }
-    else if (is_libjulia_func(jl_cpu_pause)) {
+    else if (is_libjulia_func(jl_cpu_pause)||is_libjulia_func(jl_cpu_suspend)) {
         ++CCALL_STAT(jl_cpu_pause);
         // Keep in sync with the julia_threads.h version
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
 #ifdef __MIC__
-        // TODO
-#elif defined(_CPU_X86_64_) || defined(_CPU_X86_)  /* !__MIC__ */
-        auto pauseinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "pause",
-                                               "~{memory}", true);
-        ctx.builder.CreateCall(pauseinst);
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
-#elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-        auto wfeinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "wfe",
-                                             "~{memory}", true);
-        ctx.builder.CreateCall(wfeinst);
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
+    //TODO
 #else
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
+        if (ctx.emission_context.TargetTriple.isX86()) {
+            auto pauseinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "pause",
+                                                "~{memory}", true);
+            ctx.builder.CreateCall(pauseinst);
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        } else if (ctx.emission_context.TargetTriple.isAArch64()
+                    || (ctx.emission_context.TargetTriple.isARM()
+                        && ctx.emission_context.TargetTriple.getSubArch() != Triple::SubArchType::NoSubArch
+                        // ARMv7 and above is < armv6
+                        && ctx.emission_context.TargetTriple.getSubArch() < Triple::SubArchType::ARMSubArch_v6)) {
+            InlineAsm* wait_inst;
+            if (is_libjulia_func(jl_cpu_pause))
+                wait_inst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "isb",
+                                                "~{memory}", true);
+            else
+                wait_inst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "wfe",
+                                                "~{memory}", true);
+            ctx.builder.CreateCall(wait_inst);
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        } else {
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        }
 #endif
     }
     else if (is_libjulia_func(jl_cpu_wake)) {
@@ -1538,13 +1548,18 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 #if JL_CPU_WAKE_NOOP == 1
         JL_GC_POP();
         return ghostValue(ctx, jl_nothing_type);
-#elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-        auto sevinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "sev",
-                                             "~{memory}", true);
-        ctx.builder.CreateCall(sevinst);
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
 #endif
+        if (ctx.emission_context.TargetTriple.isAArch64()
+            || (ctx.emission_context.TargetTriple.isARM()
+                && ctx.emission_context.TargetTriple.getSubArch() != Triple::SubArchType::NoSubArch
+                // ARMv7 and above is < armv6
+                && ctx.emission_context.TargetTriple.getSubArch() < Triple::SubArchType::ARMSubArch_v6)) {
+            auto sevinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "sev",
+                                                "~{memory}", true);
+            ctx.builder.CreateCall(sevinst);
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        }
     }
     else if (is_libjulia_func(jl_gc_safepoint)) {
         ++CCALL_STAT(jl_gc_safepoint);
@@ -1552,12 +1567,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
-        emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
+        emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
     else if (is_libjulia_func("jl_get_ptls_states")) {
         ++CCALL_STAT(jl_get_ptls_states);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx,
@@ -1571,7 +1586,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext()));
         const int tid_offset = offsetof(jl_task_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(getSizeTy(ctx.builder.getContext()), tid_offset / sizeof(int16_t)));
+        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t)));
         LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t)));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
         ai.decorateInst(tid);
@@ -1585,7 +1600,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext()));
         const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
-        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(getSizeTy(ctx.builder.getContext()), finh_offset / 4));
+        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4));
         LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t)));
         Value *newval;
         if (is_libjulia_func(jl_gc_disable_finalizers_internal)) {
@@ -1614,7 +1629,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
-        Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nt_offset / sizeof(void*)));
+        Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*)));
         ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt);
         return ghostValue(ctx, jl_nothing_type);
     }
@@ -1655,9 +1670,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
                 checkBB, contBB);
         ctx.builder.SetInsertPoint(checkBB);
         ctx.builder.CreateLoad(
-                getSizeTy(ctx.builder.getContext()),
-                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()),
-                    get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
+                ctx.types().T_size,
+                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
+                    get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
                 true);
         ctx.builder.CreateBr(contBB);
         ctx.f->getBasicBlockList().push_back(contBB);
@@ -1671,11 +1686,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         Value *len;
         if (svecv.constant && svecv.typ == (jl_value_t*)jl_simplevector_type) {
             // Check the type as well before we call
-            len = ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_svec_len(svecv.constant));
+            len = ConstantInt::get(ctx.types().T_size, jl_svec_len(svecv.constant));
         }
         else {
-            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), getSizePtrTy(ctx.builder.getContext()));
-            len = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ptr, Align(sizeof(size_t)));
+            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_size->getPointerTo());
+            len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr);
             // Only mark with TBAA if we are sure about the type.
             // This could otherwise be in a dead branch
             if (svecv.typ == (jl_value_t*)jl_simplevector_type) {
@@ -1684,7 +1699,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             }
             MDBuilder MDB(ctx.builder.getContext());
             auto rng = MDB.createRange(
-                Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX / sizeof(void*) - 1));
+                Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX / sizeof(void*) - 1));
             cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
         }
         JL_GC_POP();
@@ -1696,8 +1711,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(!isVa && !llvmcall && nccallargs == 2);
         const jl_cgval_t &svecv = argv[0];
         const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+        Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_long_type);
+        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, 1));
         auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
         Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
                                                          decay_derived(ctx, ptr), idx);
@@ -1730,15 +1745,15 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
                                                 false, rt, unionall, static_rt);
             }
             else if (!jl_has_free_typevars(ety)) {
-                Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_ulong_type);
+                Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_ulong_type);
                 Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), ctx.types().T_pprjlvalue);
                 if (!ptrarray) {
                     size_t elsz = jl_datatype_size(ety);
                     unsigned align = jl_datatype_align(ety);
                     size_t stride = LLT_ALIGN(elsz, align) / sizeof(jl_value_t*);
                     if (stride != 1)
-                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), stride));
-                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ((jl_datatype_t*)ety)->layout->first_ptr));
+                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(ctx.types().T_size, stride));
+                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ((jl_datatype_t*)ety)->layout->first_ptr));
                 }
                 Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx);
                 LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*)));
@@ -1753,20 +1768,20 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     }
     else if (is_libjulia_func(jl_string_ptr)) {
         ++CCALL_STAT(jl_string_ptr);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
                                 ctx.types().T_pprjlvalue);
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
         auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
-        strp = ctx.builder.CreatePtrToInt(strp, getSizeTy(ctx.builder.getContext()));
+        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_symbol_name)) {
         ++CCALL_STAT(jl_symbol_name);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
                                 ctx.types().T_pprjlvalue);
@@ -1774,7 +1789,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         // a null pointer.
         auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
             ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        strp = ctx.builder.CreatePtrToInt(strp, getSizeTy(ctx.builder.getContext()));
+        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
@@ -1783,16 +1798,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemCpy(
                 emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(1),
                 emit_inttoptr(ctx,
-                    emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), src, (jl_value_t*)jl_voidpointer_type),
+                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
                     getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(0),
-                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+                emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
         return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
@@ -1803,13 +1818,13 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &val = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
         Value *val32 = emit_unbox(ctx, getInt32Ty(ctx.builder.getContext()), val, (jl_value_t*)jl_uint32_type);
         Value *val8 = ctx.builder.CreateTrunc(val32, getInt8Ty(ctx.builder.getContext()), "memset_val");
         ctx.builder.CreateMemSet(
             emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
             val8,
-            emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+            emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
             MaybeAlign(1)
         );
         JL_GC_POP();
@@ -1821,16 +1836,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemMove(
                 emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(0),
                 emit_inttoptr(ctx,
-                    emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), src, (jl_value_t*)jl_voidpointer_type),
+                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
                     getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(0),
-                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+                emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
         return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
@@ -1843,9 +1858,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (val.typ == (jl_value_t*)jl_symbol_type) {
             JL_GC_POP();
             const int hash_offset = offsetof(jl_sym_t, hash);
-            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), getSizePtrTy(ctx.builder.getContext()));
-            Value *ph2 = ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), ph1, ConstantInt::get(getSizeTy(ctx.builder.getContext()), hash_offset / sizeof(size_t)));
-            LoadInst *hashval = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ph2, Align(sizeof(size_t)));
+            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo());
+            Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr));
+            LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             ai.decorateInst(hashval);
             return mark_or_box_ccall_result(ctx, hashval, retboxed, rt, unionall, static_rt);
@@ -1857,7 +1872,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             if (!val.isghost && !val.ispointer())
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
-                emit_typeof_boxed(ctx, val),
+                emit_typeof(ctx, val),
                 val.isghost ? ConstantPointerNull::get(T_pint8_derived) :
                     ctx.builder.CreateBitCast(
                         decay_derived(ctx, data_pointer(ctx, val)),
@@ -1872,7 +1887,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     jl_cgval_t retval = sig.emit_a_ccall(
             ctx,
             symarg,
-            argv,
+            argv.data(),
             gc_uses,
             static_rt);
     JL_GC_POP();
@@ -1894,7 +1909,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
 
     FunctionType *functype = this->functype(ctx.builder.getContext());
 
-    Value **argvals = (Value**) alloca((nccallargs + sret) * sizeof(Value*));
+    SmallVector<Value *, 8> argvals(nccallargs + sret);
     for (size_t ai = 0; ai < nccallargs; ai++) {
         // Current C function parameter
         jl_cgval_t &arg = argv[ai];
@@ -1959,9 +1974,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             // XXX: result needs to be zero'd and given a GC root here
             // and has incorrect write barriers.
             // instead this code path should behave like `unsafe_load`
-            assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance");
-            result = emit_allocobj(ctx, jl_datatype_size(rt),
-                                   literal_pointer_val(ctx, (jl_value_t*)rt));
+            result = emit_allocobj(ctx, (jl_datatype_t*)rt);
             sretty = ctx.types().T_jlvalue;
             sretboxed = true;
             gc_uses.push_back(result);
@@ -2057,7 +2070,8 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         else {
             void *symaddr;
             void *libsym = jl_get_library_(symarg.f_lib, 0);
-            if (!libsym || !jl_dlsym(libsym, symarg.f_name, &symaddr, 0)) {
+            int symbol_found = jl_dlsym(libsym, symarg.f_name, &symaddr, 0);
+            if (!libsym || !symbol_found) {
                 ++DeferredCCallLookups;
                 // either the library or the symbol could not be found, place a runtime
                 // lookup here instead.
@@ -2074,7 +2088,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     OperandBundleDef OpBundle("jl_roots", gc_uses);
     // the actual call
     CallInst *ret = ctx.builder.CreateCall(functype, llvmf,
-            ArrayRef<Value*>(&argvals[0], nccallargs + sret),
+            argvals,
             ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
     ((CallInst*)ret)->setAttributes(attributes);
 
@@ -2122,15 +2136,13 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         else if (jlretboxed && !retboxed) {
             assert(jl_is_datatype(rt));
             if (static_rt) {
-                Value *runtime_bt = literal_pointer_val(ctx, rt);
-                size_t rtsz = jl_datatype_size(rt);
-                assert(rtsz > 0);
-                Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
+                Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt);
                 MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
                 int boxalign = julia_alignment(rt);
                 // copy the data from the return value to the new struct
                 const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
+                size_t rtsz = jl_datatype_size(rt);
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
                     // When this happens, cast through memory.
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index 9f4d69137c0fd..b627224e027a9 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -860,12 +860,14 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
                                                     StringRef SectionName)
 {
     // allocating more than one code section can confuse libunwind.
-#if !defined(_COMPILER_MSAN_ENABLED_)
-    // TODO: Figure out why msan needs this.
+#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_)
+    // TODO: Figure out why msan and now asan too need this.
     assert(!code_allocated);
     code_allocated = true;
 #endif
     total_allocated += Size;
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
     if (exe_alloc)
         return (uint8_t*)exe_alloc->alloc(Size, Alignment);
     return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
@@ -879,6 +881,8 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
                                                     bool isReadOnly)
 {
     total_allocated += Size;
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
     if (!isReadOnly)
         return (uint8_t*)rw_alloc.alloc(Size, Alignment);
     if (ro_alloc)
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 7acfb24b91615..9e42a6b246e9b 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -153,6 +153,39 @@ Metadata *to_md_tree(jl_value_t *val, LLVMContext &ctxt) {
 
 // --- Debug info ---
 
+static DICompileUnit *getOrCreateJuliaCU(Module &M,
+    DICompileUnit::DebugEmissionKind emissionKind,
+    DICompileUnit::DebugNameTableKind tableKind)
+{
+    // TODO: share debug objects globally in the context, instead of allocating a new one every time
+    // or figure out how to delete them afterwards?
+    // But at least share them a little bit here
+    auto CUs = M.debug_compile_units();
+    for (DICompileUnit *CU : CUs) {
+        if (CU->getEmissionKind() == emissionKind &&
+            CU->getNameTableKind() == tableKind)
+        return CU;
+    }
+    DIFile *topfile = DIFile::get(M.getContext(), "julia", ".");
+    DIBuilder dbuilder(M);
+    DICompileUnit *CU =
+        dbuilder.createCompileUnit(llvm::dwarf::DW_LANG_Julia
+                                   ,topfile      // File
+                                   ,"julia"      // Producer
+                                   ,true         // isOptimized
+                                   ,""           // Flags
+                                   ,0            // RuntimeVersion
+                                   ,""           // SplitName
+                                   ,emissionKind // Kind
+                                   ,0            // DWOId
+                                   ,true         // SplitDebugInlining
+                                   ,false        // DebugInfoForProfiling
+                                   ,tableKind    // NameTableKind
+                                   );
+    dbuilder.finalize();
+    return CU;
+}
+
 static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debuginfo, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed)
 {
     jl_datatype_t *jdt = (jl_datatype_t*)jt;
@@ -290,7 +323,7 @@ static Value *get_gc_root_for(const jl_cgval_t &x)
 
 static inline Constant *literal_static_pointer_val(const void *p, Type *T);
 
-static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
+static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
 {
     // emit a GlobalVariable for a jl_value_t named "cname"
     // store the name given so we can reuse it (facilitating merging later)
@@ -322,36 +355,36 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
     return gv;
 }
 
-static Value *julia_pgv(jl_codectx_t &ctx, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr)
+static Constant *julia_pgv(jl_codectx_t &ctx, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr)
 {
     // emit a GlobalVariable for a jl_value_t, using the prefix, name, and module to
-    // to create a readable name of the form prefixModA.ModB.name
-    size_t len = strlen(jl_symbol_name(name)) + strlen(prefix) + 1;
+    // to create a readable name of the form prefixModA.ModB.name#
+    // reverse-of-reverse algorithm
+    std::string finalname;
+    StringRef name_str(jl_symbol_name(name));
+    finalname.resize(name_str.size() + 1);
+    finalname[0] = '#';
+    std::reverse_copy(name_str.begin(), name_str.end(), finalname.begin() + 1);
     jl_module_t *parent = mod, *prev = NULL;
-    while (parent != NULL && parent != prev) {
-        len += strlen(jl_symbol_name(parent->name))+1;
-        prev = parent;
-        parent = parent->parent;
-    }
-    char *fullname = (char*)alloca(len);
-    strcpy(fullname, prefix);
-    len -= strlen(jl_symbol_name(name)) + 1;
-    strcpy(fullname + len, jl_symbol_name(name));
-    parent = mod;
-    prev = NULL;
-    while (parent != NULL && parent != prev) {
-        size_t part = strlen(jl_symbol_name(parent->name)) + 1;
-        strcpy(fullname + len - part, jl_symbol_name(parent->name));
-        fullname[len - 1] = '.';
-        len -= part;
+    while (parent && parent != prev) {
+        size_t orig_end = finalname.size() + 1;
+        StringRef parent_name(jl_symbol_name(parent->name));
+        finalname.resize(orig_end + parent_name.size());
+        finalname[orig_end - 1] = '.';
+        std::reverse_copy(parent_name.begin(), parent_name.end(), finalname.begin() + orig_end);
         prev = parent;
         parent = parent->parent;
     }
-    return julia_pgv(ctx, fullname, addr);
+    size_t orig_end = finalname.size();
+    StringRef prefix_name(prefix);
+    finalname.resize(orig_end + prefix_name.size());
+    std::reverse_copy(prefix_name.begin(), prefix_name.end(), finalname.begin() + orig_end);
+    std::reverse(finalname.begin(), finalname.end());
+    return julia_pgv(ctx, finalname.c_str(), addr);
 }
 
 static JuliaVariable *julia_const_gv(jl_value_t *val);
-static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
+static Constant *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
 {
     // emit a pointer to a jl_value_t* which will allow it to be valid across reloading code
     // also, try to give it a nice name for gdb, for easy identification
@@ -371,6 +404,12 @@ static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
     }
     if (jl_is_datatype(p)) {
         jl_datatype_t *addr = (jl_datatype_t*)p;
+        if (addr->smalltag) {
+            // some common builtin datatypes have a special pool for accessing them by smalltag id
+            Constant *tag = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), addr->smalltag << 4);
+            Constant *smallp = ConstantExpr::getInBoundsGetElementPtr(getInt8Ty(ctx.builder.getContext()), prepare_global_in(jl_Module, jlsmall_typeof_var), tag);
+            return ConstantExpr::getBitCast(smallp, ctx.types().T_ppjlvalue);
+        }
         // DataTypes are prefixed with a +
         return julia_pgv(ctx, "+", addr->name->name, addr->name->module, p);
     }
@@ -521,7 +560,7 @@ static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
 static Value *julia_binding_pvalue(jl_codectx_t &ctx, Value *bv)
 {
     bv = emit_bitcast(ctx, bv, ctx.types().T_pprjlvalue);
-    Value *offset = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_binding_t, value) / sizeof(size_t));
+    Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_binding_t, value) / ctx.types().sizeof_ptr);
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, bv, offset);
 }
 
@@ -596,7 +635,7 @@ static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed
     return _julia_type_to_llvm(&ctx.emission_context, ctx.builder.getContext(), jt, isboxed);
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 Type *jl_type_to_llvm_impl(jl_value_t *jt, LLVMContextRef ctxt, bool *isboxed)
 {
     return _julia_type_to_llvm(NULL, *unwrap(ctxt), jt, isboxed);
@@ -850,7 +889,8 @@ static bool is_uniontype_allunboxed(jl_value_t *typ)
     return for_each_uniontype_small([&](unsigned, jl_datatype_t*) {}, typ, counter);
 }
 
-static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull=false);
+static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool justtag, bool notag=false);
+static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull=false, bool justtag=false);
 
 static unsigned get_box_tindex(jl_datatype_t *jt, jl_value_t *ut)
 {
@@ -902,18 +942,17 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     // If the types are small and simple, use load and store directly.
     // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
     // that interferes with other optimizations.
-#ifndef JL_LLVM_OPAQUE_POINTERS
     // TODO: Restore this for opaque pointers? Needs extra type information from the caller.
-    if (sz <= 64) {
+    if (ctx.builder.getContext().supportsTypedPointers() && sz <= 64) {
         // The size limit is arbitrary but since we mainly care about floating points and
         // machine size vectors this should be enough.
         const DataLayout &DL = jl_Module->getDataLayout();
         auto srcty = cast<PointerType>(src->getType());
         //TODO unsafe nonopaque pointer
-        auto srcel = srcty->getPointerElementType();
+        auto srcel = srcty->getNonOpaquePointerElementType();
         auto dstty = cast<PointerType>(dst->getType());
         //TODO unsafe nonopaque pointer
-        auto dstel = dstty->getPointerElementType();
+        auto dstel = dstty->getNonOpaquePointerElementType();
         while (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) {
             src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0);
             srcel = srcel->getArrayElementType();
@@ -942,7 +981,6 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
             return;
         }
     }
-#endif
     ++EmittedMemcpys;
 
     // the memcpy intrinsic does not allow to specify different alias tags
@@ -1002,49 +1040,84 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN
     return load;
 }
 
-static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v,  bool is_promotable=false);
-
-static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull);
+static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt)
+{
+    if (dt->smalltag)
+        return ConstantInt::get(ctx.types().T_size, dt->smalltag << 4);
+    return ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, (jl_value_t*)dt), ctx.types().T_size);
+}
 
-static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull)
+// Returns justtag ? ctx.types.T_size : ctx.types().T_prjlvalue
+static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull, bool justtag)
 {
     // given p, compute its type
+    jl_datatype_t *dt = NULL;
     if (p.constant)
-        return mark_julia_const(ctx, jl_typeof(p.constant));
-    if (p.isboxed && !jl_is_concrete_type(p.typ)) {
-        if (jl_is_type_type(p.typ)) {
-            jl_value_t *tp = jl_tparam0(p.typ);
-            if (!jl_is_type(tp) || jl_is_concrete_type(tp)) {
-                // convert 1::Type{1} ==> typeof(1) ==> Int
-                return mark_julia_const(ctx, jl_typeof(tp));
-            }
+        dt = (jl_datatype_t*)jl_typeof(p.constant);
+    else if (jl_is_concrete_type(p.typ))
+        dt = (jl_datatype_t*)p.typ;
+    if (dt) {
+        if (justtag)
+            return emit_tagfrom(ctx, dt);
+        return track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)dt));
+    }
+    auto notag = [justtag] (jl_value_t *typ) {
+        // compute if the tag is always a type (not a builtin tag)
+        // based on having no intersection with one of the special types
+        // this doesn't matter if the user just wants the tag value
+        if (justtag)
+            return false;
+        jl_value_t *uw = jl_unwrap_unionall(typ);
+        if (jl_is_datatype(uw)) { // quick path to catch common cases
+            jl_datatype_t *dt = (jl_datatype_t*)uw;
+            assert(!dt->smalltag);
+            if (!dt->name->abstract)
+                return true;
+            if (dt == jl_any_type)
+                return false;
         }
-        return mark_julia_type(ctx, emit_typeof(ctx, p.V, maybenull), true, jl_datatype_type);
-    }
+        if (jl_has_intersect_type_not_kind(typ))
+            return false;
+        for (size_t i = 0; i < jl_tags_count; i++) {
+            jl_datatype_t *dt = small_typeof[(i << 4) / sizeof(*small_typeof)];
+            if (dt && !jl_has_empty_intersection((jl_value_t*)dt, typ))
+                return false;
+        }
+        return true;
+    };
+    if (p.isboxed)
+        return emit_typeof(ctx, p.V, maybenull, justtag, notag(p.typ));
     if (p.TIndex) {
         Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         bool allunboxed = is_uniontype_allunboxed(p.typ);
-        Value *datatype_or_p = ctx.emission_context.imaging ? Constant::getNullValue(ctx.types().T_ppjlvalue) : Constant::getNullValue(ctx.types().T_prjlvalue);
+        Type *expr_type = justtag ? ctx.types().T_size : ctx.emission_context.imaging ? ctx.types().T_pjlvalue : ctx.types().T_prjlvalue;
+        Value *datatype_or_p = Constant::getNullValue(ctx.emission_context.imaging ? expr_type->getPointerTo() : expr_type);
         unsigned counter = 0;
         for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
-                Value *ptr;
-                if (ctx.emission_context.imaging) {
-                    ptr = literal_pointer_val_slot(ctx, (jl_value_t*)jt);
-                }
-                else {
-                    ptr = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt));
+                Constant *ptr;
+                if (justtag && jt->smalltag) {
+                    ptr = ConstantInt::get(expr_type, jt->smalltag << 4);
+                    if (ctx.emission_context.imaging)
+                        ptr = get_pointer_to_constant(ctx.emission_context, ptr, "_j_tag", *jl_Module);
                 }
+                else if (ctx.emission_context.imaging)
+                    ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx, (jl_value_t*)jt), datatype_or_p->getType());
+                else if (justtag)
+                    ptr = ConstantInt::get(expr_type, (uintptr_t)jt);
+                else
+                    ptr = ConstantExpr::getAddrSpaceCast(literal_static_pointer_val((jl_value_t*)jt, ctx.types().T_pjlvalue), expr_type);
                 datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p);
             },
             p.typ,
             counter);
         auto emit_unboxty = [&] () -> Value* {
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-            if (ctx.emission_context.imaging)
-                return track_pjlvalue(
-                    ctx, ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, datatype_or_p, Align(sizeof(void*)))));
+            if (ctx.emission_context.imaging) {
+                Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*))));
+                return justtag ? datatype : track_pjlvalue(ctx, datatype);
+            }
             return datatype_or_p;
         };
         Value *res;
@@ -1055,7 +1128,7 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybe
             BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge", ctx.f);
             ctx.builder.CreateCondBr(isnull, boxBB, unboxBB);
             ctx.builder.SetInsertPoint(boxBB);
-            auto boxTy = emit_typeof(ctx, p.Vboxed, maybenull);
+            auto boxTy = emit_typeof(ctx, p.Vboxed, maybenull, justtag, notag(p.typ));
             ctx.builder.CreateBr(mergeBB);
             boxBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(unboxBB);
@@ -1063,7 +1136,7 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybe
             ctx.builder.CreateBr(mergeBB);
             unboxBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(mergeBB);
-            auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
+            auto phi = ctx.builder.CreatePHI(boxTy->getType(), 2);
             phi->addIncoming(boxTy, boxBB);
             phi->addIncoming(unboxTy, unboxBB);
             res = phi;
@@ -1071,21 +1144,15 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybe
         else {
             res = emit_unboxty();
         }
-        return mark_julia_type(ctx, res, true, jl_datatype_type);
+        return res;
     }
-    return mark_julia_const(ctx, p.typ);
-}
-
-// Returns ctx.types().T_prjlvalue
-static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull)
-{
-    return boxed(ctx, emit_typeof(ctx, p, maybenull));
+    assert(0 && "what is this struct"); abort();
 }
 
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
     Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue);
-    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, types) / sizeof(void*));
+    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     return ai.decorateInst(ctx.builder.CreateAlignedLoad(
                 ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
@@ -1093,19 +1160,19 @@ static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 
 static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 {
-    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), getSizePtrTy(ctx.builder.getContext()));
+    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo());
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    return ai.decorateInst(ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), type_svec, Align(sizeof(void*))));
+    return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*))));
 }
 
 static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext())->getPointerTo());
-    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
+    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
     Ptr = ctx.builder.CreateInBoundsGEP(getInt32PtrTy(ctx.builder.getContext()), Ptr, Idx);
     Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
-    Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
+    Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
     Ptr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx);
     return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t))));
 }
@@ -1133,7 +1200,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB);
             ctx.builder.SetInsertPoint(dynloadBB);
-            Value *datatype = emit_typeof(p.V);
+            Value *datatype = emit_typeof(ctx, p.V, false, false);
             Value *dyn_size = emit_datatype_size(ctx, datatype);
             ctx.builder.CreateBr(postBB);
             dynloadBB = ctx.builder.GetInsertBlock(); // could have changed
@@ -1153,7 +1220,7 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
         return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(p.typ));
     }
     else {
-        Value *datatype = emit_typeof_boxed(ctx, p);
+        Value *datatype = emit_typeof(ctx, p, false, false);
         Value *dyn_size = emit_datatype_size(ctx, datatype);
         return dyn_size;
     }
@@ -1163,10 +1230,10 @@ static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppint8);
-    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, name));
+    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, name));
     Value *Nam = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8PtrTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8PtrTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
-    Value *Idx2 = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
+    Value *Idx2 = ConstantInt::get(ctx.types().T_size, offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
     Value *mutabl = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Nam, Idx2), Align(1)));
     mutabl = ctx.builder.CreateLShr(mutabl, 1);
@@ -1191,7 +1258,7 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
     Value *vptr = ctx.builder.CreateInBoundsGEP(
             ctx.types().T_pjlvalue,
             emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue),
-            ConstantInt::get(getSizeTy(ctx.builder.getContext()), n));
+            ConstantInt::get(ctx.types().T_size, n));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
 }
@@ -1338,29 +1405,54 @@ static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
 
 // Returns typeof(v), or null if v is a null pointer at run time and maybenull is true.
 // This is used when the value might have come from an undefined value (a PhiNode),
-// yet we try to read its type to compute a union index when moving the value (a PiNode).
+// yet jl_max_tags try to read its type to compute a union index when moving the value (a PiNode).
 // Returns a ctx.types().T_prjlvalue typed Value
-static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull)
+static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool justtag, bool notag)
 {
     ++EmittedTypeof;
     assert(v != NULL && !isa<AllocaInst>(v) && "expected a conditionally boxed value");
+    Value *nonnull = maybenull ? null_pointer_cmp(ctx, v) : ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
     Function *typeof = prepare_call(jl_typeof_func);
-    if (maybenull)
-        return emit_guarded_test(ctx, null_pointer_cmp(ctx, v), Constant::getNullValue(typeof->getReturnType()), [&] {
-            // e.g. emit_typeof(ctx, v)
-            return ctx.builder.CreateCall(typeof, {v});
+    return emit_guarded_test(ctx, nonnull, Constant::getNullValue(justtag ? ctx.types().T_size : typeof->getReturnType()), [&] {
+        // e.g. emit_typeof(ctx, v)
+        Value *typetag = ctx.builder.CreateCall(typeof, {v});
+        if (notag)
+            return typetag;
+        Value *tag = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, typetag), ctx.types().T_size);
+        if (justtag)
+            return tag;
+        auto issmall = ctx.builder.CreateICmpULT(tag, ConstantInt::get(tag->getType(), (uintptr_t)jl_max_tags << 4));
+        return emit_guarded_test(ctx, issmall, typetag, [&] {
+            // we lied a bit: this wasn't really an object (though it was valid for GC rooting)
+            // and we need to use it as an index to get the real object now
+            Module *M = jl_Module;
+            Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jlsmall_typeof_var), tag);
+            smallp = ctx.builder.CreateBitCast(smallp, typetag->getType()->getPointerTo(0));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0));
+            small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None));
+            return ai.decorateInst(small);
         });
-    return ctx.builder.CreateCall(typeof, {v});
+    });
 }
 
+static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v,  bool is_promotable=false);
 
-static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+static void just_emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
 {
     Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg);
     ctx.builder.CreateCall(prepare_call(jltypeerror_func),
                        { msg_val, maybe_decay_untracked(ctx, type), mark_callee_rooted(ctx, boxed(ctx, x))});
 }
 
+static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+{
+    just_emit_type_error(ctx, x, type, msg);
+    ctx.builder.CreateUnreachable();
+    BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_type_error", ctx.f);
+    ctx.builder.SetInsertPoint(cont);
+}
+
 // Should agree with `emit_isa` below
 static bool _can_optimize_isa(jl_value_t *type, int &counter)
 {
@@ -1392,12 +1484,38 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
 }
 
 // a simple case of emit_isa that is obvious not to include a safe-point
-static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_value_t *dt)
-{
-    assert(jl_is_concrete_type(dt));
-    return ctx.builder.CreateICmpEQ(
-            emit_typeof_boxed(ctx, arg),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, dt)));
+static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt)
+{
+    assert(jl_is_concrete_type((jl_value_t*)dt));
+    if (arg.TIndex) {
+        unsigned tindex = get_box_tindex(dt, arg.typ);
+        if (tindex > 0) {
+            // optimize more when we know that this is a split union-type where tindex = 0 is invalid
+            Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+            return ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex));
+        }
+        else if (arg.Vboxed) {
+            // test for (arg.TIndex == 0x80 && typeof(arg.V) == type)
+            Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+            BasicBlock *currBB = ctx.builder.GetInsertBlock();
+            BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
+            BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
+            ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
+            ctx.builder.SetInsertPoint(isaBB);
+            Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            ctx.builder.CreateBr(postBB);
+            isaBB = ctx.builder.GetInsertBlock(); // could have changed
+            ctx.builder.SetInsertPoint(postBB);
+            PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+            istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB);
+            istype->addIncoming(istype_boxed, isaBB);
+            return istype;
+        } else {
+            // handle the case where we know that `arg` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed
+            return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
+        }
+    }
+    return ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt));
 }
 
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
@@ -1443,9 +1561,6 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     if (known_isa) {
         if (!*known_isa && msg) {
             emit_type_error(ctx, x, literal_pointer_val(ctx, type), *msg);
-            ctx.builder.CreateUnreachable();
-            BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
-            ctx.builder.SetInsertPoint(failBB);
         }
         return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), *known_isa), true);
     }
@@ -1459,17 +1574,17 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     if (intersected_type == (jl_value_t*)jl_type_type) {
         // Inline jl_is_kind(jl_typeof(x))
         // N.B. We do the comparison with untracked pointers, because that gives
-        // LLVM more optimization opportunities. That means it is poosible for
+        // LLVM more optimization opportunities. That means it is possible for
         // `typ` to get GC'ed, but we don't actually care, because we don't ever
         // dereference it.
-        Value *typ = emit_pointer_from_objref(ctx, emit_typeof_boxed(ctx, x));
+        Value *typ = emit_typeof(ctx, x, false, true);
         auto val = ctx.builder.CreateOr(
             ctx.builder.CreateOr(
-                ctx.builder.CreateICmpEQ(typ, literal_pointer_val(ctx, (jl_value_t*)jl_uniontype_type)),
-                ctx.builder.CreateICmpEQ(typ, literal_pointer_val(ctx, (jl_value_t*)jl_datatype_type))),
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_uniontype_type)),
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_datatype_type))),
             ctx.builder.CreateOr(
-                ctx.builder.CreateICmpEQ(typ, literal_pointer_val(ctx, (jl_value_t*)jl_unionall_type)),
-                ctx.builder.CreateICmpEQ(typ, literal_pointer_val(ctx, (jl_value_t*)jl_typeofbottom_type))));
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_unionall_type)),
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_typeofbottom_type))));
         return std::make_pair(val, false);
     }
     // intersection with Type needs to be handled specially
@@ -1486,36 +1601,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     }
     // tests for isa concretetype can be handled with pointer comparisons
     if (jl_is_concrete_type(intersected_type)) {
-        if (x.TIndex) {
-            unsigned tindex = get_box_tindex((jl_datatype_t*)intersected_type, x.typ);
-            if (tindex > 0) {
-                // optimize more when we know that this is a split union-type where tindex = 0 is invalid
-                Value *xtindex = ctx.builder.CreateAnd(x.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
-                return std::make_pair(ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)), false);
-            }
-            else if (x.Vboxed) {
-                // test for (x.TIndex == 0x80 && typeof(x.V) == type)
-                Value *isboxed = ctx.builder.CreateICmpEQ(x.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
-                BasicBlock *currBB = ctx.builder.GetInsertBlock();
-                BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
-                BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
-                ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
-                ctx.builder.SetInsertPoint(isaBB);
-                Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, x.Vboxed, false),
-                    track_pjlvalue(ctx, literal_pointer_val(ctx, intersected_type)));
-                ctx.builder.CreateBr(postBB);
-                isaBB = ctx.builder.GetInsertBlock(); // could have changed
-                ctx.builder.SetInsertPoint(postBB);
-                PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
-                istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB);
-                istype->addIncoming(istype_boxed, isaBB);
-                return std::make_pair(istype, false);
-            } else {
-                // handle the case where we know that `x` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed
-                return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), false);
-            }
-        }
-        return std::make_pair(emit_exactly_isa(ctx, x, intersected_type), false);
+        return std::make_pair(emit_exactly_isa(ctx, x, (jl_datatype_t*)intersected_type), false);
     }
     jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(intersected_type);
     if (jl_is_datatype(dt) && !dt->name->abstract && jl_subtype(dt->name->wrapper, type)) {
@@ -1523,7 +1609,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         // so the isa test reduces to a comparison of the typename by pointer
         return std::make_pair(
                 ctx.builder.CreateICmpEQ(
-                    emit_datatype_name(ctx, emit_typeof_boxed(ctx, x)),
+                    emit_datatype_name(ctx, emit_typeof(ctx, x, false, false)),
                     literal_pointer_val(ctx, (jl_value_t*)dt->name)),
                 false);
     }
@@ -1552,7 +1638,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     // everything else can be handled via subtype tests
     return std::make_pair(ctx.builder.CreateICmpNE(
             ctx.builder.CreateCall(prepare_call(jlsubtype_func),
-              { emit_typeof_boxed(ctx, x),
+              { emit_typeof(ctx, x, false, false),
                 track_pjlvalue(ctx, literal_pointer_val(ctx, type)) }),
             ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), false);
 }
@@ -1583,7 +1669,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         ctx.builder.CreateCondBr(istype, passBB, failBB);
         ctx.builder.SetInsertPoint(failBB);
 
-        emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
+        just_emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         ctx.builder.CreateUnreachable();
 
         ctx.f->getBasicBlockList().push_back(passBB);
@@ -1627,7 +1713,7 @@ static bool bounds_check_enabled(jl_codectx_t &ctx, jl_value_t *inbounds) {
 
 static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ty, Value *i, Value *len, jl_value_t *boundscheck)
 {
-    Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+    Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
 #if CHECK_BOUNDS==1
     if (bounds_check_enabled(ctx, boundscheck)) {
         ++EmittedBoundschecks;
@@ -1682,12 +1768,8 @@ std::vector<unsigned> first_ptr(Type *T)
                 num_elements = AT->getNumElements();
             else {
                 VectorType *VT = cast<VectorType>(T);
-#if JL_LLVM_VERSION >= 120000
                 ElementCount EC = VT->getElementCount();
                 num_elements = EC.getKnownMinValue();
-#else
-                num_elements = VT->getNumElements();
-#endif
             }
             if (num_elements == 0)
                 return {};
@@ -1929,12 +2011,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         assert(Order != AtomicOrdering::NotAtomic && r);
         // we can't handle isboxed here as a workaround for really bad LLVM
         // design issue: plain Xchg only works with integers
-#if JL_LLVM_VERSION >= 130000
         auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
-#else
-        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
-        store->setAlignment(Align(alignment));
-#endif
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
         ai.decorateInst(store);
@@ -2084,12 +2161,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 FailOrder = AtomicOrdering::Monotonic;
             else if (FailOrder == AtomicOrdering::Unordered)
                 FailOrder = AtomicOrdering::Monotonic;
-#if JL_LLVM_VERSION >= 130000
             auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
-#else
-            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
-            store->setAlignment(Align(alignment));
-#endif
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
             ai.decorateInst(store);
@@ -2220,7 +2292,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     size_t nfields = jl_datatype_nfields(stt);
     bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
     auto idx0 = [&]() {
-        return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nfields), inbounds);
+        return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(ctx.types().T_size, nfields), inbounds);
     };
     if (nfields == 0) {
         (void)idx0();
@@ -2344,7 +2416,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             return true;
         }
         else if (strct.isboxed) {
-            idx = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+            idx = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
             Value *fld = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, strct), idx });
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
@@ -2422,7 +2494,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 addr = ctx.builder.CreateInBoundsGEP(
                         getInt8Ty(ctx.builder.getContext()),
                         emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), byte_offset));
+                        ConstantInt::get(ctx.types().T_size, byte_offset));
             }
             else {
                 addr = staddr;
@@ -2621,7 +2693,7 @@ static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *
     MDNode *tbaa = ctx.tbaa().tbaa_arraysize;
     if (arraytype_constdim(tinfo.typ, &ndim)) {
         if (ndim == 0)
-            return ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
+            return ConstantInt::get(ctx.types().T_size, 1);
         if (ndim == 1) {
             if (auto d = dyn_cast<ConstantInt>(dim)) {
                 if (d->getZExtValue() == 1) {
@@ -2632,7 +2704,7 @@ static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *
         if (ndim > 1) {
             if (tinfo.constant && isa<ConstantInt>(dim)) {
                 auto n = cast<ConstantInt>(dim)->getZExtValue() - 1;
-                return ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_array_dim(tinfo.constant, n));
+                return ConstantInt::get(ctx.types().T_size, jl_array_dim(tinfo.constant, n));
             }
             tbaa = ctx.tbaa().tbaa_const;
         }
@@ -2643,9 +2715,9 @@ static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *
     auto load = emit_nthptr_recast(ctx,
             t,
             ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)),
-            tbaa, getSizeTy(ctx.builder.getContext()));
+            tbaa, ctx.types().T_size);
     MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), arraytype_maxsize(tinfo.typ)));
+    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ)));
     load->setMetadata(LLVMContext::MD_range, rng);
     return load;
 }
@@ -2667,10 +2739,10 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
     MDNode *tbaa = ctx.tbaa().tbaa_arraylen;
     if (arraytype_constdim(ty, &ndim)) {
         if (ndim == 0)
-            return ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
+            return ConstantInt::get(ctx.types().T_size, 1);
         if (ndim != 1) {
             if (tinfo.constant)
-                return ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_array_len(tinfo.constant));
+                return ConstantInt::get(ctx.types().T_size, jl_array_len(tinfo.constant));
             tbaa = ctx.tbaa().tbaa_const;
         }
     }
@@ -2679,10 +2751,10 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
     Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
             emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
             1); //index (not offset) of length field in ctx.types().T_pjlarray
-    LoadInst *len = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), addr, Align(sizeof(size_t)));
+    LoadInst *len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, addr, ctx.types().alignof_ptr);
     len->setOrdering(AtomicOrdering::NotAtomic);
     MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), arraytype_maxsize(tinfo.typ)));
+    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ)));
     len->setMetadata(LLVMContext::MD_range, rng);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
     return ai.decorateInst(len);
@@ -2803,7 +2875,7 @@ static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int n
 static Value *emit_arraysize_for_unsafe_dim(jl_codectx_t &ctx,
         const jl_cgval_t &tinfo, jl_value_t *ex, size_t dim, size_t nd)
 {
-    return dim > nd ? ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1) : emit_arraysize(ctx, tinfo, ex, dim);
+    return dim > nd ? ConstantInt::get(ctx.types().T_size, 1) : emit_arraysize(ctx, tinfo, ex, dim);
 }
 
 // `nd == -1` means the dimension is unknown.
@@ -2813,8 +2885,8 @@ static Value *emit_array_nd_index(
 {
     ++EmittedArrayNdIndex;
     Value *a = boxed(ctx, ainfo);
-    Value *i = Constant::getNullValue(getSizeTy(ctx.builder.getContext()));
-    Value *stride = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
+    Value *i = Constant::getNullValue(ctx.types().T_size);
+    Value *stride = ConstantInt::get(ctx.types().T_size, 1);
 #if CHECK_BOUNDS==1
     bool bc = bounds_check_enabled(ctx, inbounds);
     BasicBlock *failBB = NULL, *endBB = NULL;
@@ -2823,13 +2895,13 @@ static Value *emit_array_nd_index(
         endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
     }
 #endif
-    Value **idxs = (Value**)alloca(sizeof(Value*) * nidxs);
+    SmallVector<Value *> idxs(nidxs);
     for (size_t k = 0; k < nidxs; k++) {
-        idxs[k] = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
+        idxs[k] = emit_unbox(ctx, ctx.types().T_size, argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
     }
     Value *ii = NULL;
     for (size_t k = 0; k < nidxs; k++) {
-        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(ctx.types().T_size, 1));
         i = ctx.builder.CreateAdd(i, ctx.builder.CreateMul(ii, stride));
         if (k < nidxs - 1) {
             assert(nd >= 0);
@@ -2874,23 +2946,23 @@ static Value *emit_array_nd_index(
             for (size_t k = nidxs+1; k < (size_t)nd; k++) {
                 BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok");
                 Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd);
-                ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1)), dimsokBB, failBB);
+                ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)), dimsokBB, failBB);
                 ctx.f->getBasicBlockList().push_back(dimsokBB);
                 ctx.builder.SetInsertPoint(dimsokBB);
             }
             Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd);
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1)), endBB, failBB);
+            ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)), endBB, failBB);
         }
 
         ctx.f->getBasicBlockList().push_back(failBB);
         ctx.builder.SetInsertPoint(failBB);
         // CreateAlloca is OK here since we are on an error branch
-        Value *tmp = ctx.builder.CreateAlloca(getSizeTy(ctx.builder.getContext()), ConstantInt::get(getSizeTy(ctx.builder.getContext()), nidxs));
+        Value *tmp = ctx.builder.CreateAlloca(ctx.types().T_size, ConstantInt::get(ctx.types().T_size, nidxs));
         for (size_t k = 0; k < nidxs; k++) {
-            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), tmp, ConstantInt::get(getSizeTy(ctx.builder.getContext()), k)), Align(sizeof(size_t)));
+            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(ctx.types().T_size, tmp, ConstantInt::get(ctx.types().T_size, k)), ctx.types().alignof_ptr);
         }
         ctx.builder.CreateCall(prepare_call(jlboundserrorv_func),
-            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nidxs) });
+            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(ctx.types().T_size, nidxs) });
         ctx.builder.CreateUnreachable();
 
         ctx.f->getBasicBlockList().push_back(endBB);
@@ -2903,7 +2975,7 @@ static Value *emit_array_nd_index(
 
 // --- boxing ---
 
-static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt);
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt);
 
 static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
                             unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
@@ -2966,12 +3038,8 @@ static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant
     if (const auto *CC = dyn_cast<ConstantAggregate>(constant))
         nargs = CC->getNumOperands();
     else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant)) {
-#if JL_LLVM_VERSION >= 130000
         // SVE: Elsewhere we use `getMinKownValue`
         nargs = CAZ->getElementCount().getFixedValue();
-#else
-        nargs = CAZ->getNumElements();
-#endif
     }
     else if (const auto *CDS = dyn_cast<ConstantDataSequential>(constant))
         nargs = CDS->getNumElements();
@@ -3004,7 +3072,8 @@ static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant
     return obj;
 }
 
-static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction *intr, Value *v)
+template<typename TypeFn_t>
+static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction<TypeFn_t> *intr, Value *v)
 {
     Function *F = prepare_call(intr);
     CallInst *Call = ctx.builder.CreateCall(F, v);
@@ -3093,14 +3162,14 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     return box;
 }
 
-static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t *supertype, jl_value_t *ut)
+static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype_tag, jl_value_t *supertype, jl_value_t *ut)
 {
     Value *tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0);
     unsigned counter = 0;
     for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 if (jl_subtype((jl_value_t*)jt, supertype)) {
-                    Value *cmp = ctx.builder.CreateICmpEQ(track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt)), datatype);
+                    Value *cmp = ctx.builder.CreateICmpEQ(emit_tagfrom(ctx, jt), datatype_tag);
                     tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx), tindex);
                 }
             },
@@ -3118,7 +3187,7 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
         return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
     if (val.TIndex)
         return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
-    Value *typof = emit_typeof_boxed(ctx, val, maybenull);
+    Value *typof = emit_typeof(ctx, val, maybenull, true);
     return compute_box_tindex(ctx, typof, val.typ, typ);
 }
 
@@ -3211,7 +3280,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
                     jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL);
                     box = _boxed_special(ctx, vinfo_r, t);
                     if (!box) {
-                        box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
+                        box = emit_allocobj(ctx, jt);
                         init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
                     }
                 }
@@ -3292,13 +3361,11 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
             IntrinsicInst *call = cast<IntrinsicInst>(User);
             call->setCalledFunction(mangleIntrinsic(call));
         }
-#ifndef JL_LLVM_OPAQUE_POINTERS
         else if (isa<BitCastInst>(User)) {
             BitCastInst *Inst = cast<BitCastInst>(User);
             Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
-#endif
     }
 }
 
@@ -3339,12 +3406,10 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
             if (do_promote && is_promotable) {
                 auto IP = ctx.builder.saveIP();
                 ctx.builder.SetInsertPoint(vinfo.promotion_point);
-                box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt);
                 Value *decayed = decay_derived(ctx, box);
                 AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V);
-#ifndef JL_LLVM_OPAQUE_POINTERS
-                decayed = maybe_bitcast(ctx, decayed, PointerType::get(originalAlloca->getType()->getPointerElementType(), AddressSpace::Derived));
-#endif
+                decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived));
                 // Warning: Very illegal IR here temporarily
                 originalAlloca->mutateType(decayed->getType());
                 recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived);
@@ -3353,7 +3418,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
                 originalAlloca->eraseFromParent();
                 ctx.builder.restoreIP(IP);
             } else {
-                box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt);
                 init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
             }
         }
@@ -3443,7 +3508,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     else {
         assert(src.isboxed && "expected boxed value for sizeof/alignment computation");
         auto f = [&] {
-            Value *datatype = emit_typeof_boxed(ctx, src);
+            Value *datatype = emit_typeof(ctx, src, false, false);
             Value *copy_bytes = emit_datatype_size(ctx, datatype);
             emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src, copy_bytes, /*TODO: min-align*/1, isVolatile);
             return nullptr;
@@ -3459,8 +3524,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
 static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std::string &msg)
 {
     ++EmittedCPointerChecks;
-    Value *t = emit_typeof_boxed(ctx, x);
-    emit_typecheck(ctx, mark_julia_type(ctx, t, true, jl_any_type), (jl_value_t*)jl_datatype_type, msg);
+    Value *t = emit_typeof(ctx, x, false, false);
 
     Value *istype =
         ctx.builder.CreateICmpEQ(emit_datatype_name(ctx, t),
@@ -3470,7 +3534,7 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
     ctx.builder.CreateCondBr(istype, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
 
-    emit_type_error(ctx, x, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_type), msg);
+    just_emit_type_error(ctx, x, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_type), msg);
     ctx.builder.CreateUnreachable();
 
     ctx.f->getBasicBlockList().push_back(passBB);
@@ -3484,15 +3548,18 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
     ++EmittedAllocObjs;
     Value *current_task = get_current_task(ctx);
     Function *F = prepare_call(jl_alloc_obj_func);
-    auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(getSizeTy(ctx.builder.getContext()), static_size), maybe_decay_untracked(ctx, jt)});
+    auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(ctx.types().T_size, static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
     if (static_size > 0)
-    {
-        call->addRetAttr(Attribute::getWithDereferenceableBytes(ctx.builder.getContext(),static_size));
-    }
+        call->addRetAttr(Attribute::getWithDereferenceableBytes(ctx.builder.getContext(), static_size));
     return call;
 }
 
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt)
+{
+    return emit_allocobj(ctx, jl_datatype_size(jt), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jt), ctx.types().T_pjlvalue));
+}
+
 // allocation for unknown object from an untracked pointer
 static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 {
@@ -3571,7 +3638,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         addr = ctx.builder.CreateInBoundsGEP(
                 getInt8Ty(ctx.builder.getContext()),
                 emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(getSizeTy(ctx.builder.getContext()), byte_offset)); // TODO: use emit_struct_gep
+                ConstantInt::get(ctx.types().T_size, byte_offset)); // TODO: use emit_struct_gep
     }
     jl_value_t *jfty = jl_field_type(sty, idx0);
     if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
@@ -3585,7 +3652,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
             return jl_cgval_t();
         Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()),
                 emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(getSizeTy(ctx.builder.getContext()), fsz));
+                ConstantInt::get(ctx.types().T_size, fsz));
         if (needlock)
             emit_lockstate_value(ctx, strct, true);
         BasicBlock *ModifyBB = NULL;
@@ -3735,11 +3802,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     // avoid unboxing the argument explicitly
                     // and use memcpy instead
                     Instruction *inst;
-#ifndef JL_LLVM_OPAQUE_POINTERS
                     dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
-#else
-                    dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), strct, offs));
-#endif
                     // Our promotion point needs to come before
                     //  A) All of our arguments' promotion points
                     //  B) Any instructions we insert at any of our arguments' promotion points
@@ -3874,8 +3937,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 return ret;
             }
         }
-        Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
-                                     literal_pointer_val(ctx, (jl_value_t*)ty));
+        Value *strct = emit_allocobj(ctx, sty);
         jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty);
         strct = decay_derived(ctx, strct);
         undef_derived_strct(ctx, strct, sty, strctinfo.tbaa);
@@ -3885,7 +3947,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 ai.decorateInst(ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
                         ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, strct, getInt8PtrTy(ctx.builder.getContext())),
-                                ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
+                                ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
                         Align(1)));
             }
         }
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 9f7a5e22d22b4..086d925802f63 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -191,15 +191,6 @@ class GCChecker
     }
     return f(TD->getName());
   }
-  static bool isValueCollection(QualType QT) {
-    if (QT->isPointerType() || QT->isArrayType())
-      return isValueCollection(
-          clang::QualType(QT->getPointeeOrArrayElementType(), 0));
-    const TagDecl *TD = QT->getUnqualifiedDesugaredType()->getAsTagDecl();
-    if (!TD)
-      return false;
-    return declHasAnnotation(TD, "julia_rooted_value_collection");
-  }
   template <typename callback>
   static SymbolRef walkToRoot(callback f, const ProgramStateRef &State,
                               const MemRegion *Region);
@@ -768,8 +759,7 @@ static bool isMutexUnlock(StringRef name) {
 #endif
 
 bool GCChecker::isGCTrackedType(QualType QT) {
-  return isValueCollection(QT) ||
-         isJuliaType(
+  return isJuliaType(
              [](StringRef Name) {
                if (Name.endswith_lower("jl_value_t") ||
                    Name.endswith_lower("jl_svec_t") ||
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index fccef22586e5d..1c52f969a11f7 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -48,6 +48,8 @@ JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t
     jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
 }
 
+JL_DLLEXPORT void jl_generate_fptr_for_oc_wrapper_fallback(jl_code_instance_t *unspec) UNAVAILABLE
+
 JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
 {
     return 0;
@@ -67,7 +69,7 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void)
     return 0;
 }
 
-JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage) UNAVAILABLE
+JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world) UNAVAILABLE
 
 JL_DLLEXPORT void jl_dump_compiles_fallback(void *s)
 {
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 29e4d30040a98..a5d54f16ed2e6 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -3,12 +3,6 @@
 #undef DEBUG
 #include "llvm-version.h"
 #include "platform.h"
-#if defined(_CPU_X86_)
-#define JL_NEED_FLOATTEMP_VAR 1
-#endif
-#if defined(_OS_WINDOWS_) || defined(_OS_FREEBSD_) || defined(_COMPILER_MSAN_ENABLED_)
-#define JL_DISABLE_FPO
-#endif
 
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS
@@ -26,11 +20,7 @@
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/Support/Host.h>
 #include <llvm/Support/TargetSelect.h>
@@ -88,6 +78,27 @@
 
 using namespace llvm;
 
+static bool jl_fpo_disabled(const Triple &TT) {
+#ifdef JL_DISABLE_FPO
+    return true;
+#endif
+#ifdef _COMPILER_MSAN_ENABLED_
+    // MSAN doesn't support FPO
+    return true;
+#endif
+    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD()) {
+        return true;
+    }
+    return false;
+}
+
+static bool jl_floattemp_var_needed(const Triple &TT) {
+#ifdef JL_NEED_FLOATTEMP_VAR
+    return true;
+#endif
+    return TT.getArch() == Triple::x86;
+}
+
 //Drag some useful type functions into our namespace
 //to reduce verbosity of our code
 auto getInt1Ty(LLVMContext &ctxt) {
@@ -141,13 +152,6 @@ auto getFloatPtrTy(LLVMContext &ctxt) {
 auto getDoublePtrTy(LLVMContext &ctxt) {
     return Type::getDoublePtrTy(ctxt);
 }
-auto getSizePtrTy(LLVMContext &ctxt) {
-    if (sizeof(size_t) > sizeof(uint32_t)) {
-        return getInt64PtrTy(ctxt);
-    } else {
-        return getInt32PtrTy(ctxt);
-    }
-}
 
 typedef Instruction TerminatorInst;
 
@@ -185,7 +189,7 @@ STATISTIC(GeneratedCCallables, "Number of C-callable functions generated");
 STATISTIC(GeneratedInvokeWrappers, "Number of invoke wrappers generated");
 STATISTIC(EmittedFunctions, "Number of functions emitted");
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_emitted_mi_name_impl(void *s)
 {
     **jl_ExecutionEngine->get_dump_emitted_mi_name_stream() = (ios_t*)s;
@@ -225,6 +229,7 @@ extern void _chkstk(void);
 
 // types
 struct jl_typecache_t {
+    Type *T_size;
     Type *T_jlvalue;
     Type *T_pjlvalue;
     Type *T_prjlvalue;
@@ -238,6 +243,8 @@ struct jl_typecache_t {
     IntegerType *T_sigatomic;
 
     Type *T_ppint8;
+    unsigned sizeof_ptr;
+    Align alignof_ptr;
 
     bool initialized;
 
@@ -247,13 +254,17 @@ struct jl_typecache_t {
         T_pjlarray(nullptr), T_jlfunc(nullptr), T_jlfuncparams(nullptr),
         T_sigatomic(nullptr), T_ppint8(nullptr), initialized(false) {}
 
-    void initialize(LLVMContext &context) {
+    void initialize(LLVMContext &context, const DataLayout &DL) {
         if (initialized) {
             return;
         }
         initialized = true;
         T_ppint8 = PointerType::get(getInt8PtrTy(context), 0);
         T_sigatomic = Type::getIntNTy(context, sizeof(sig_atomic_t) * 8);
+        T_size = DL.getIntPtrType(context);
+        sizeof_ptr = DL.getPointerSize();
+        // use pointer abi alignment for intptr_t
+        alignof_ptr = DL.getPointerABIAlignment(0);
         T_jlvalue = JuliaType::get_jlvalue_ty(context);
         T_pjlvalue = PointerType::get(T_jlvalue, 0);
         T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
@@ -266,7 +277,7 @@ struct jl_typecache_t {
         assert(T_jlfuncparams != NULL);
 
         Type *vaelts[] = {PointerType::get(getInt8Ty(context), AddressSpace::Loaded)
-                        , getSizeTy(context)
+                        , T_size
                         , getInt16Ty(context)
                         , getInt16Ty(context)
                         , getInt32Ty(context)
@@ -464,14 +475,15 @@ struct JuliaVariable {
 public:
     StringLiteral name;
     bool isconst;
-    Type *(*_type)(LLVMContext &C);
+    Type *(*_type)(Type *T_size);
 
     JuliaVariable(const JuliaVariable&) = delete;
     JuliaVariable(const JuliaVariable&&) = delete;
     GlobalVariable *realize(Module *m) {
         if (GlobalValue *V = m->getNamedValue(name))
             return cast<GlobalVariable>(V);
-        return new GlobalVariable(*m, _type(m->getContext()),
+        auto T_size = m->getDataLayout().getIntPtrType(m->getContext());
+        return new GlobalVariable(*m, _type(T_size),
                 isconst, GlobalVariable::ExternalLinkage,
                 NULL, name);
     }
@@ -482,10 +494,31 @@ static inline void add_named_global(JuliaVariable *name, void *addr)
     add_named_global(name->name, addr);
 }
 
+
+typedef FunctionType *(*TypeFnContextOnly)(LLVMContext &C);
+typedef FunctionType *(*TypeFnContextAndSizeT)(LLVMContext &C, Type *T_size);
+typedef FunctionType *(*TypeFnContextAndTriple)(LLVMContext &C, const Triple &triple);
+
+FunctionType *invoke_type(TypeFnContextOnly f, Module &M)
+{
+    return f(M.getContext());
+}
+
+FunctionType *invoke_type(TypeFnContextAndSizeT f, Module &M)
+{
+    return f(M.getContext(), M.getDataLayout().getIntPtrType(M.getContext()));
+}
+
+FunctionType *invoke_type(TypeFnContextAndTriple f, Module &M)
+{
+    return f(M.getContext(), Triple(M.getTargetTriple()));
+}
+
+template<typename TypeFn_t = TypeFnContextOnly>
 struct JuliaFunction {
 public:
     llvm::StringLiteral name;
-    llvm::FunctionType *(*_type)(llvm::LLVMContext &C);
+    TypeFn_t _type;
     llvm::AttributeList (*_attrs)(llvm::LLVMContext &C);
 
     JuliaFunction(const JuliaFunction&) = delete;
@@ -493,7 +526,7 @@ struct JuliaFunction {
     llvm::Function *realize(llvm::Module *m) {
         if (llvm::GlobalValue *V = m->getNamedValue(name))
             return llvm::cast<llvm::Function>(V);
-        llvm::Function *F = llvm::Function::Create(_type(m->getContext()),
+        llvm::Function *F = llvm::Function::Create(invoke_type(_type, *m),
                          llvm::Function::ExternalLinkage,
                          name, m);
         if (_attrs)
@@ -502,8 +535,8 @@ struct JuliaFunction {
     }
 };
 
-template<typename T>
-static inline void add_named_global(JuliaFunction *name, T *addr)
+template<typename T, typename TypeFn_t>
+static inline void add_named_global(JuliaFunction<TypeFn_t> *name, T *addr)
 {
     // cast through integer to avoid c++ pedantic warning about casting between
     // data and code pointers
@@ -589,57 +622,60 @@ static AttributeList get_attrs_zext(LLVMContext &C)
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
     XSTR(jl_RTLD_DEFAULT_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
 };
-#ifdef _OS_WINDOWS_
 static const auto jlexe_var = new JuliaVariable{
     XSTR(jl_exe_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
 };
 static const auto jldll_var = new JuliaVariable{
     XSTR(jl_libjulia_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
 };
 static const auto jldlli_var = new JuliaVariable{
     XSTR(jl_libjulia_internal_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+};
+static const auto jlsmall_typeof_var = new JuliaVariable{
+    XSTR(small_typeof),
+    true,
+    [](Type *T_size) -> Type * { return getInt8Ty(T_size->getContext()); },
 };
-#endif //_OS_WINDOWS_
 
 static const auto jlstack_chk_guard_var = new JuliaVariable{
     XSTR(__stack_chk_guard),
     true,
-    get_pjlvalue,
+    [](Type *T_size) -> Type * { return get_pjlvalue(T_size->getContext()); },
 };
 
 static const auto jlgetworld_global = new JuliaVariable{
     XSTR(jl_world_counter),
     false,
-    [](LLVMContext &C) { return (Type*)getSizeTy(C); },
+    [](Type *T_size) -> Type * { return T_size; },
 };
 
 static const auto jlboxed_int8_cache = new JuliaVariable{
     XSTR(jl_boxed_int8_cache),
     true,
-    [](LLVMContext &C) { return (Type*)ArrayType::get(get_pjlvalue(C), 256); },
+    [](Type *T_size) -> Type * { return ArrayType::get(get_pjlvalue(T_size->getContext()), 256); },
 };
 
 static const auto jlboxed_uint8_cache = new JuliaVariable{
     XSTR(jl_boxed_uint8_cache),
     true,
-    [](LLVMContext &C) { return (Type*)ArrayType::get(get_pjlvalue(C), 256); },
+    [](Type *T_size) -> Type * { return ArrayType::get(get_pjlvalue(T_size->getContext()), 256); },
 };
 
-static const auto jlpgcstack_func = new JuliaFunction{
+static const auto jlpgcstack_func = new JuliaFunction<>{
     "julia.get_pgcstack",
     [](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_ppjlvalue_ty(C), 0), false); },
     nullptr,
 };
 
-static const auto jladoptthread_func = new JuliaFunction{
+static const auto jladoptthread_func = new JuliaFunction<>{
     "julia.get_pgcstack_or_new",
     jlpgcstack_func->_type,
     jlpgcstack_func->_attrs,
@@ -649,12 +685,12 @@ static const auto jladoptthread_func = new JuliaFunction{
 // important functions
 // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway,
 // because they may come from a gc-rooted location
-static const auto jlnew_func = new JuliaFunction{
+static const auto jlnew_func = new JuliaFunction<>{
     XSTR(jl_new_structv),
     get_func_sig,
     get_func_attrs,
 };
-static const auto jlsplatnew_func = new JuliaFunction{
+static const auto jlsplatnew_func = new JuliaFunction<>{
     XSTR(jl_new_structt),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -663,62 +699,62 @@ static const auto jlsplatnew_func = new JuliaFunction{
     },
     get_attrs_basic,
 };
-static const auto jlthrow_func = new JuliaFunction{
+static const auto jlthrow_func = new JuliaFunction<>{
     XSTR(jl_throw),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlerror_func = new JuliaFunction{
+static const auto jlerror_func = new JuliaFunction<>{
     XSTR(jl_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlatomicerror_func = new JuliaFunction{
+static const auto jlatomicerror_func = new JuliaFunction<>{
     XSTR(jl_atomic_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C)}, false); },
     get_attrs_noreturn,
 };
-static const auto jltypeerror_func = new JuliaFunction{
+static const auto jltypeerror_func = new JuliaFunction<>{
     XSTR(jl_type_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlundefvarerror_func = new JuliaFunction{
+static const auto jlundefvarerror_func = new JuliaFunction<>{
     XSTR(jl_undefined_var_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlboundserrorv_func = new JuliaFunction{
+static const auto jlboundserrorv_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_ints),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), getSizePtrTy(C), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), T_size->getPointerTo(), T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jlboundserror_func = new JuliaFunction{
+static const auto jlboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_int),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jlvboundserror_func = new JuliaFunction{
+static const auto jlvboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_tuple_int),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {JuliaType::get_pprjlvalue_ty(C), getSizeTy(C), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_pprjlvalue_ty(C), T_size, T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jluboundserror_func = new JuliaFunction{
+static const auto jluboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_unboxed_int),
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         return FunctionType::get(getVoidTy(C),
-            {PointerType::get(getInt8Ty(C), AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), getSizeTy(C)}, false); },
+            {PointerType::get(getInt8Ty(C), AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jlcheckassign_func = new JuliaFunction{
+static const auto jlcheckassign_func = new JuliaFunction<>{
     XSTR(jl_checked_assignment),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -726,7 +762,7 @@ static const auto jlcheckassign_func = new JuliaFunction{
             {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
-static const auto jldeclareconst_func = new JuliaFunction{
+static const auto jldeclareconst_func = new JuliaFunction<>{
     XSTR(jl_declare_constant),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -734,7 +770,7 @@ static const auto jldeclareconst_func = new JuliaFunction{
             {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
-static const auto jlgetbindingorerror_func = new JuliaFunction{
+static const auto jlgetbindingorerror_func = new JuliaFunction<>{
     XSTR(jl_get_binding_or_error),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -743,7 +779,7 @@ static const auto jlgetbindingorerror_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlgetbindingwrorerror_func = new JuliaFunction{
+static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
     XSTR(jl_get_binding_wr),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -752,7 +788,7 @@ static const auto jlgetbindingwrorerror_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlboundp_func = new JuliaFunction{
+static const auto jlboundp_func = new JuliaFunction<>{
     XSTR(jl_boundp),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -761,7 +797,7 @@ static const auto jlboundp_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jltopeval_func = new JuliaFunction{
+static const auto jltopeval_func = new JuliaFunction<>{
     XSTR(jl_toplevel_eval),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -773,7 +809,7 @@ static const auto jltopeval_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jlcopyast_func = new JuliaFunction{
+static const auto jlcopyast_func = new JuliaFunction<>{
     XSTR(jl_copy_ast),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -785,21 +821,12 @@ static const auto jlcopyast_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-//static const auto jlnsvec_func = new JuliaFunction{
-//    XSTR(jl_svec),
-//    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-//                {getSizeTy(C)}, true); },
-//    [](LLVMContext &C) { return AttributeList::get(C,
-//            AttributeSet(),
-//            Attributes(C, {Attribute::NonNull}),
-//            None); },
-//};
-static const auto jlapplygeneric_func = new JuliaFunction{
+static const auto jlapplygeneric_func = new JuliaFunction<>{
     XSTR(jl_apply_generic),
     get_func_sig,
     get_func_attrs,
 };
-static const auto jlinvoke_func = new JuliaFunction{
+static const auto jlinvoke_func = new JuliaFunction<>{
     XSTR(jl_invoke),
     get_func2_sig,
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -808,7 +835,7 @@ static const auto jlinvoke_func = new JuliaFunction{
             {AttributeSet(),
              Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
 };
-static const auto jlmethod_func = new JuliaFunction{
+static const auto jlmethod_func = new JuliaFunction<>{
     XSTR(jl_method_def),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -819,7 +846,7 @@ static const auto jlmethod_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlgenericfunction_func = new JuliaFunction{
+static const auto jlgenericfunction_func = new JuliaFunction<>{
     XSTR(jl_generic_function_def),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -830,7 +857,7 @@ static const auto jlgenericfunction_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jllockvalue_func = new JuliaFunction{
+static const auto jllockvalue_func = new JuliaFunction<>{
     XSTR(jl_lock_value),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
@@ -839,7 +866,7 @@ static const auto jllockvalue_func = new JuliaFunction{
             AttributeSet(),
             {Attributes(C, {Attribute::NoCapture})}); },
 };
-static const auto jlunlockvalue_func = new JuliaFunction{
+static const auto jlunlockvalue_func = new JuliaFunction<>{
     XSTR(jl_unlock_value),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
@@ -848,64 +875,69 @@ static const auto jlunlockvalue_func = new JuliaFunction{
             AttributeSet(),
             {Attributes(C, {Attribute::NoCapture})}); },
 };
-static const auto jlenter_func = new JuliaFunction{
+static const auto jlenter_func = new JuliaFunction<>{
     XSTR(jl_enter_handler),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C)}, false); },
     nullptr,
 };
-static const auto jl_current_exception_func = new JuliaFunction{
+static const auto jl_current_exception_func = new JuliaFunction<>{
     XSTR(jl_current_exception),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), false); },
     nullptr,
 };
-static const auto jlleave_func = new JuliaFunction{
+static const auto jlleave_func = new JuliaFunction<>{
     XSTR(jl_pop_handler),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt32Ty(C)}, false); },
     nullptr,
 };
-static const auto jl_restore_excstack_func = new JuliaFunction{
+static const auto jl_restore_excstack_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_restore_excstack),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {T_size}, false); },
     nullptr,
 };
-static const auto jl_excstack_state_func = new JuliaFunction{
+static const auto jl_excstack_state_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_excstack_state),
-    [](LLVMContext &C) { return FunctionType::get(getSizeTy(C), false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size, false); },
     nullptr,
 };
-static const auto jlegalx_func = new JuliaFunction{
+static const auto jlegalx_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_egal__unboxed),
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
-        return FunctionType::get(getInt32Ty(C), {T, T, JuliaType::get_prjlvalue_ty(C)}, false); },
+        return FunctionType::get(getInt32Ty(C), {T, T, T_size}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
             AttributeSet(),
             None); },
 };
-static const auto jl_alloc_obj_func = new JuliaFunction{
+static const auto jl_alloc_obj_func = new JuliaFunction<TypeFnContextAndSizeT>{
     "julia.gc_alloc_obj",
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
         auto T_ppjlvalue = PointerType::get(PointerType::get(T_jlvalue, 0), 0);
         return FunctionType::get(T_prjlvalue,
-                {T_ppjlvalue, getSizeTy(C), T_prjlvalue}, false);
+                {T_ppjlvalue, T_size, T_prjlvalue}, false);
     },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::getWithAllocSizeArgs(C, 1, None)})), // returns %1 bytes
-
-            Attributes(C, {Attribute::NoAlias, Attribute::NonNull,
+    [](LLVMContext &C) {
+        auto FnAttrs = AttrBuilder(C);
+        FnAttrs.addAllocSizeAttr(1, None); // returns %1 bytes
 #if JL_LLVM_VERSION >= 150000
-            Attribute::get(C, Attribute::AllocKind, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned),
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned);
 #endif
-            }),
-            None); },
+        auto RetAttrs = AttrBuilder(C);
+        RetAttrs.addAttribute(Attribute::NoAlias);
+        RetAttrs.addAttribute(Attribute::NonNull);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet::get(C, RetAttrs),
+            None);
+    },
 };
-static const auto jl_newbits_func = new JuliaFunction{
+static const auto jl_newbits_func = new JuliaFunction<>{
     XSTR(jl_new_bits),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -920,7 +952,7 @@ static const auto jl_newbits_func = new JuliaFunction{
 // `julia.typeof` does read memory, but it is effectively readnone before we lower
 // the allocation function. This is OK as long as we lower `julia.typeof` no later than
 // `julia.gc_alloc_obj`.
-static const auto jl_typeof_func = new JuliaFunction{
+static const auto jl_typeof_func = new JuliaFunction<>{
     "julia.typeof",
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -932,7 +964,7 @@ static const auto jl_typeof_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jl_loopinfo_marker_func = new JuliaFunction{
+static const auto jl_loopinfo_marker_func = new JuliaFunction<>{
     "julia.loopinfo_marker",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -940,7 +972,7 @@ static const auto jl_loopinfo_marker_func = new JuliaFunction{
             AttributeSet(),
             None); },
 };
-static const auto jl_write_barrier_func = new JuliaFunction{
+static const auto jl_write_barrier_func = new JuliaFunction<>{
     "julia.write_barrier",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {JuliaType::get_prjlvalue_ty(C)}, true); },
@@ -949,16 +981,8 @@ static const auto jl_write_barrier_func = new JuliaFunction{
             AttributeSet(),
             {Attributes(C, {Attribute::ReadOnly})}); },
 };
-static const auto jl_write_barrier_binding_func = new JuliaFunction{
-    "julia.write_barrier_binding",
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {JuliaType::get_prjlvalue_ty(C)}, true); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
-            AttributeSet(),
-            {Attributes(C, {Attribute::ReadOnly})}); },
-};
-static const auto jlisa_func = new JuliaFunction{
+
+static const auto jlisa_func = new JuliaFunction<>{
     XSTR(jl_isa),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -968,7 +992,7 @@ static const auto jlisa_func = new JuliaFunction{
     nullptr,
 };
 
-static const auto jlsubtype_func = new JuliaFunction{
+static const auto jlsubtype_func = new JuliaFunction<>{
     XSTR(jl_subtype),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -977,7 +1001,7 @@ static const auto jlsubtype_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlapplytype_func = new JuliaFunction{
+static const auto jlapplytype_func = new JuliaFunction<>{
     XSTR(jl_instantiate_type_in_env),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -995,48 +1019,49 @@ static const auto jlapplytype_func = new JuliaFunction{
             None);
     },
 };
-static const auto jl_object_id__func = new JuliaFunction{
+static const auto jl_object_id__func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_object_id_),
-    [](LLVMContext &C) { return FunctionType::get(getSizeTy(C),
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size,
             {JuliaType::get_prjlvalue_ty(C), PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
     nullptr,
 };
-static const auto setjmp_func = new JuliaFunction{
+static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
     jl_setjmp_name,
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C),
-#ifndef _OS_WINDOWS_
-            getInt32Ty(C),
-#endif
-            }, false); },
+    [](LLVMContext &C, const Triple &T) {
+        if (T.isOSWindows())
+            return FunctionType::get(getInt32Ty(C),
+                {getInt8PtrTy(C)}, false);
+        return FunctionType::get(getInt32Ty(C),
+            {getInt8PtrTy(C), getInt32Ty(C)}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReturnsTwice}),
             AttributeSet(),
             None); },
 };
-static const auto memcmp_func = new JuliaFunction{
+static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(memcmp),
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getInt32Ty(C),
+            {getInt8PtrTy(C), getInt8PtrTy(C), T_size}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
             AttributeSet(),
             None); },
     // TODO: inferLibFuncAttributes(*memcmp_func, TLI);
 };
-static const auto jldlsym_func = new JuliaFunction{
+static const auto jldlsym_func = new JuliaFunction<>{
     XSTR(jl_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
             {getInt8PtrTy(C), getInt8PtrTy(C), PointerType::get(getInt8PtrTy(C), 0)}, false); },
     nullptr,
 };
-static const auto jllazydlsym_func = new JuliaFunction{
+static const auto jllazydlsym_func = new JuliaFunction<>{
     XSTR(jl_lazy_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
             {JuliaType::get_prjlvalue_ty(C), getInt8PtrTy(C)}, false); },
     nullptr,
 };
-static const auto jltypeassert_func = new JuliaFunction{
+static const auto jltypeassert_func = new JuliaFunction<>{
     XSTR(jl_typeassert),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -1045,31 +1070,31 @@ static const auto jltypeassert_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlgetnthfieldchecked_func = new JuliaFunction{
+static const auto jlgetnthfieldchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_get_nth_field_checked),
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue, getSizeTy(C)}, false);
+            {T_prjlvalue, T_size}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jlfieldisdefinedchecked_func = new JuliaFunction{
+static const auto jlfieldisdefinedchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_field_isdefined_checked),
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(getInt32Ty(C),
-            {T_prjlvalue, getSizeTy(C)}, false);
+            {T_prjlvalue, T_size}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {}),
             None); },
 };
-static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
+static const auto jlgetcfunctiontrampoline_func = new JuliaFunction<>{
     XSTR(jl_get_cfunction_trampoline),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -1093,18 +1118,18 @@ static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto diff_gc_total_bytes_func = new JuliaFunction{
+static const auto diff_gc_total_bytes_func = new JuliaFunction<>{
     XSTR(jl_gc_diff_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C), false); },
     nullptr,
 };
-static const auto sync_gc_total_bytes_func = new JuliaFunction{
+static const auto sync_gc_total_bytes_func = new JuliaFunction<>{
     XSTR(jl_gc_sync_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C),
             {getInt64Ty(C)}, false); },
     nullptr,
 };
-static const auto jlarray_data_owner_func = new JuliaFunction{
+static const auto jlarray_data_owner_func = new JuliaFunction<>{
     XSTR(jl_array_data_owner),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -1117,7 +1142,7 @@ static const auto jlarray_data_owner_func = new JuliaFunction{
             None); },
 };
 #define BOX_FUNC(ct,at,attrs)                                                    \
-static const auto box_##ct##_func = new JuliaFunction{                           \
+static const auto box_##ct##_func = new JuliaFunction<>{                           \
     XSTR(jl_box_##ct),                                                           \
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C),\
             {at}, false); },                                                     \
@@ -1132,27 +1157,36 @@ BOX_FUNC(uint64, getInt64Ty(C), get_attrs_zext);
 BOX_FUNC(char, getCharTy(C), get_attrs_zext);
 BOX_FUNC(float32, getFloatTy(C), get_attrs_basic);
 BOX_FUNC(float64, getDoubleTy(C), get_attrs_basic);
-BOX_FUNC(ssavalue, getSizeTy(C), get_attrs_basic);
 #undef BOX_FUNC
 
+static const auto box_ssavalue_func = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_box_ssavalue),
+    [](LLVMContext &C, Type *T_size) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_size}, false);
+    },
+    get_attrs_basic,
+};
+
 
 // placeholder functions
-static const auto gcroot_flush_func = new JuliaFunction{
+static const auto gcroot_flush_func = new JuliaFunction<>{
     "julia.gcroot_flush",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
     nullptr,
 };
-static const auto gc_preserve_begin_func = new JuliaFunction{
+static const auto gc_preserve_begin_func = new JuliaFunction<>{
     "llvm.julia.gc_preserve_begin",
     [](LLVMContext &C) { return FunctionType::get(Type::getTokenTy(C), true); },
     nullptr,
 };
-static const auto gc_preserve_end_func = new JuliaFunction {
+static const auto gc_preserve_end_func = new JuliaFunction<> {
     "llvm.julia.gc_preserve_end",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), {Type::getTokenTy(C)}, false); },
     nullptr,
 };
-static const auto except_enter_func = new JuliaFunction{
+static const auto except_enter_func = new JuliaFunction<>{
     "julia.except_enter",
     [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C), false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -1160,7 +1194,7 @@ static const auto except_enter_func = new JuliaFunction{
             AttributeSet(),
             None); },
 };
-static const auto pointer_from_objref_func = new JuliaFunction{
+static const auto pointer_from_objref_func = new JuliaFunction<>{
     "julia.pointer_from_objref",
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pjlvalue_ty(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); },
@@ -1181,16 +1215,12 @@ static const auto pointer_from_objref_func = new JuliaFunction{
 // with all the spelled out args appropriately moved into the argument stack buffer.
 // By representing it this way rather than allocating the stack buffer earlier, we
 // allow LLVM to make more aggressive optimizations on the call arguments.
-static const auto julia_call = new JuliaFunction{
+static const auto julia_call = new JuliaFunction<>{
     "julia.call",
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-#ifdef JL_LLVM_OPAQUE_POINTERS
-            {PointerType::get(C, 0),
-#else
             {get_func_sig(C)->getPointerTo(),
-#endif
              T_prjlvalue}, // %f
             true); }, // %args
     get_attrs_basic,
@@ -1198,65 +1228,61 @@ static const auto julia_call = new JuliaFunction{
 
 // julia.call2 is like julia.call, except that %arg1 gets passed as a register
 // argument at the end of the argument list.
-static const auto julia_call2 = new JuliaFunction{
+static const auto julia_call2 = new JuliaFunction<>{
     "julia.call2",
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-#ifdef JL_LLVM_OPAQUE_POINTERS
-            {PointerType::get(C, 0),
-#else
             {get_func2_sig(C)->getPointerTo(),
-#endif
              T_prjlvalue, // %arg1
              T_prjlvalue}, // %f
             true); }, // %args
     get_attrs_basic,
 };
 
-static const auto jltuple_func = new JuliaFunction{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
+static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
 static const auto &builtin_func_map() {
-    static std::map<jl_fptr_args_t, JuliaFunction*> builtins = {
-          { jl_f_is_addr,                 new JuliaFunction{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
-          { jl_f_typeof_addr,             new JuliaFunction{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
-          { jl_f_sizeof_addr,             new JuliaFunction{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
-          { jl_f_issubtype_addr,          new JuliaFunction{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
-          { jl_f_isa_addr,                new JuliaFunction{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
-          { jl_f_typeassert_addr,         new JuliaFunction{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
-          { jl_f_ifelse_addr,             new JuliaFunction{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
-          { jl_f__apply_iterate_addr,     new JuliaFunction{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
-          { jl_f__apply_pure_addr,        new JuliaFunction{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
-          { jl_f__call_latest_addr,       new JuliaFunction{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
-          { jl_f__call_in_world_addr,     new JuliaFunction{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
-          { jl_f__call_in_world_total_addr, new JuliaFunction{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
-          { jl_f_throw_addr,              new JuliaFunction{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
+    static std::map<jl_fptr_args_t, JuliaFunction<>*> builtins = {
+          { jl_f_is_addr,                 new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
+          { jl_f_typeof_addr,             new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
+          { jl_f_sizeof_addr,             new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
+          { jl_f_issubtype_addr,          new JuliaFunction<>{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
+          { jl_f_isa_addr,                new JuliaFunction<>{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
+          { jl_f_typeassert_addr,         new JuliaFunction<>{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
+          { jl_f_ifelse_addr,             new JuliaFunction<>{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
+          { jl_f__apply_iterate_addr,     new JuliaFunction<>{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
+          { jl_f__apply_pure_addr,        new JuliaFunction<>{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
+          { jl_f__call_latest_addr,       new JuliaFunction<>{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_addr,     new JuliaFunction<>{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_total_addr, new JuliaFunction<>{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
+          { jl_f_throw_addr,              new JuliaFunction<>{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
           { jl_f_tuple_addr,              jltuple_func },
-          { jl_f_svec_addr,               new JuliaFunction{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
-          { jl_f_applicable_addr,         new JuliaFunction{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
-          { jl_f_invoke_addr,             new JuliaFunction{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
-          { jl_f_isdefined_addr,          new JuliaFunction{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
-          { jl_f_getfield_addr,           new JuliaFunction{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
-          { jl_f_setfield_addr,           new JuliaFunction{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
-          { jl_f_swapfield_addr,          new JuliaFunction{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
-          { jl_f_modifyfield_addr,        new JuliaFunction{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
-          { jl_f_fieldtype_addr,          new JuliaFunction{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
-          { jl_f_nfields_addr,            new JuliaFunction{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
-          { jl_f__expr_addr,              new JuliaFunction{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
-          { jl_f__typevar_addr,           new JuliaFunction{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
-          { jl_f_arrayref_addr,           new JuliaFunction{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_const_arrayref_addr,     new JuliaFunction{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_arrayset_addr,           new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
-          { jl_f_arraysize_addr,          new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
-          { jl_f_apply_type_addr,         new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
-          { jl_f_donotdelete_addr,        new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
-          { jl_f_compilerbarrier_addr,    new JuliaFunction{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
-          { jl_f_finalizer_addr,          new JuliaFunction{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
-          { jl_f__svec_ref_addr,          new JuliaFunction{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }
+          { jl_f_svec_addr,               new JuliaFunction<>{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
+          { jl_f_applicable_addr,         new JuliaFunction<>{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
+          { jl_f_invoke_addr,             new JuliaFunction<>{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
+          { jl_f_isdefined_addr,          new JuliaFunction<>{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
+          { jl_f_getfield_addr,           new JuliaFunction<>{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
+          { jl_f_setfield_addr,           new JuliaFunction<>{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
+          { jl_f_swapfield_addr,          new JuliaFunction<>{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
+          { jl_f_modifyfield_addr,        new JuliaFunction<>{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
+          { jl_f_fieldtype_addr,          new JuliaFunction<>{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
+          { jl_f_nfields_addr,            new JuliaFunction<>{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
+          { jl_f__expr_addr,              new JuliaFunction<>{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
+          { jl_f__typevar_addr,           new JuliaFunction<>{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
+          { jl_f_arrayref_addr,           new JuliaFunction<>{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
+          { jl_f_const_arrayref_addr,     new JuliaFunction<>{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
+          { jl_f_arrayset_addr,           new JuliaFunction<>{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
+          { jl_f_arraysize_addr,          new JuliaFunction<>{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
+          { jl_f_apply_type_addr,         new JuliaFunction<>{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
+          { jl_f_donotdelete_addr,        new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
+          { jl_f_compilerbarrier_addr,    new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
+          { jl_f_finalizer_addr,          new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
+          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }
         };
     return builtins;
 }
 
-static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
+static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
 
 static _Atomic(int) globalUniqueGeneratedNames{1};
 
@@ -1270,7 +1296,7 @@ extern "C" {
 #endif
         (int) DICompileUnit::DebugEmissionKind::FullDebug,
         1,
-        jl_rettype_inferred, NULL };
+        jl_rettype_inferred_addr, NULL };
 }
 
 
@@ -1542,7 +1568,7 @@ class jl_codectx_t {
     jl_codegen_params_t &emission_context;
     llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
     std::map<void*, GlobalVariable*> &global_targets;
-    std::map<std::tuple<jl_code_instance_t*, bool>, Function*> &external_calls;
+    std::map<std::tuple<jl_code_instance_t*, bool>, GlobalVariable*> &external_calls;
     Function *f = NULL;
     // local var info. globals are not in here.
     std::vector<jl_varinfo_t> slots;
@@ -1596,7 +1622,7 @@ class jl_codectx_t {
         params(params.params) { }
 
     jl_typecache_t &types() {
-        type_cache.initialize(builder.getContext());
+        type_cache.initialize(builder.getContext(), emission_context.DL);
         return type_cache;
     }
 
@@ -1693,7 +1719,7 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
 }
 
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign);
@@ -1704,10 +1730,10 @@ static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
 static Value *get_last_age_field(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
-static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction *trampoline);
-static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction *trampoline);
+static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
+                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
+static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
+                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
 static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable=false);
@@ -1721,7 +1747,8 @@ static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
     return G->realize(M);
 }
 
-static Function *prepare_call_in(Module *M, JuliaFunction *G)
+template<typename TypeFn_t>
+static Function *prepare_call_in(Module *M, JuliaFunction<TypeFn_t> *G)
 {
     return G->realize(M);
 }
@@ -1803,7 +1830,7 @@ static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *st
 
 static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty)
 {
-    // Almost all of our inttoptr are generated due to representing `Ptr` with `getSizeTy(ctx.builder.getContext())`
+    // Almost all of our inttoptr are generated due to representing `Ptr` with `ctx.types().T_size`
     // in LLVM and most of these integers are generated from `ptrtoint` in the first place.
     if (auto I = dyn_cast<PtrToIntInst>(v)) {
         auto ptr = I->getOperand(0);
@@ -2120,7 +2147,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 if (!union_isaBB) {
                     union_isaBB = BasicBlock::Create(ctx.builder.getContext(), "union_isa", ctx.f);
                     ctx.builder.SetInsertPoint(union_isaBB);
-                    union_box_dt = emit_typeof(ctx, v.Vboxed, skip != NULL);
+                    union_box_dt = emit_typeof(ctx, v.Vboxed, skip != NULL, true);
                     post_union_isaBB = ctx.builder.GetInsertBlock();
                 }
             };
@@ -2138,7 +2165,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                     if (old_idx == 0) {
                         // didn't handle this item before, select its new union index
                         maybe_setup_union_isa();
-                        Value *cmp = ctx.builder.CreateICmpEQ(track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt)), union_box_dt);
+                        Value *cmp = ctx.builder.CreateICmpEQ(emit_tagfrom(ctx, jt), union_box_dt);
                         union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | idx), union_box_tindex);
                     }
                 },
@@ -2300,111 +2327,113 @@ std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &conte
     m->setDataLayout(DL);
     m->setTargetTriple(triple.str());
 
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) && JL_LLVM_VERSION >= 130000
-    // tell Win32 to assume the stack is always 16-byte aligned,
-    // and to ensure that it is 16-byte aligned for out-going calls,
-    // to ensure compatibility with GCC codes
-    m->setOverrideStackAlignment(16);
-#endif
-#if defined(JL_DEBUG_BUILD) && JL_LLVM_VERSION >= 130000
+    if (triple.isOSWindows() && triple.getArch() == Triple::x86) {
+        // tell Win32 to assume the stack is always 16-byte aligned,
+        // and to ensure that it is 16-byte aligned for out-going calls,
+        // to ensure compatibility with GCC codes
+        m->setOverrideStackAlignment(16);
+    }
+
+#if defined(JL_DEBUG_BUILD)
     m->setStackProtectorGuard("global");
 #endif
     return m;
 }
 
-static void jl_init_function(Function *F)
+static void jl_init_function(Function *F, const Triple &TT)
 {
     // set any attributes that *must* be set on all functions
-#if JL_LLVM_VERSION >= 140000
     AttrBuilder attr(F->getContext());
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86) {
+        // tell Win32 to assume the stack is always 16-byte aligned,
+        // and to ensure that it is 16-byte aligned for out-going calls,
+        // to ensure compatibility with GCC codes
+        attr.addStackAlignmentAttr(16);
+    }
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+#if JL_LLVM_VERSION < 150000
+        attr.addAttribute(Attribute::UWTable); // force NeedsWinEH
 #else
-    AttrBuilder attr;
-#endif
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    // tell Win32 to realign the stack to the next 16-byte boundary
-    // upon entry to any function. This achieves compatibility
-    // with both MinGW-GCC (which assumes an 16-byte-aligned stack) and
-    // i686 Windows (which uses a 4-byte-aligned stack)
-    attr.addStackAlignmentAttr(16);
+        attr.addUWTableAttr(llvm::UWTableKind::Default); // force NeedsWinEH
 #endif
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-    attr.addAttribute(Attribute::UWTable); // force NeedsWinEH
-#endif
-#ifdef JL_DISABLE_FPO
-    attr.addAttribute("frame-pointer", "all");
-#endif
-#if !defined(_COMPILER_ASAN_ENABLED_) && !defined(_OS_WINDOWS_)
-    // ASAN won't like us accessing undefined memory causing spurious issues,
-    // and Windows has platform-specific handling which causes it to mishandle
-    // this annotation. Other platforms should just ignore this if they don't
-    // implement it.
-    attr.addAttribute("probe-stack", "inline-asm");
-    //attr.addAttribute("stack-probe-size", "4096"); // can use this to change the default
+    }
+    if (jl_fpo_disabled(TT))
+        attr.addAttribute("frame-pointer", "all");
+    if (!TT.isOSWindows()) {
+#if !defined(_COMPILER_ASAN_ENABLED_)
+        // ASAN won't like us accessing undefined memory causing spurious issues,
+        // and Windows has platform-specific handling which causes it to mishandle
+        // this annotation. Other platforms should just ignore this if they don't
+        // implement it.
+        attr.addAttribute("probe-stack", "inline-asm");
+        //attr.addAttribute("stack-probe-size", "4096"); // can use this to change the default
 #endif
+    }
 #if defined(_COMPILER_ASAN_ENABLED_)
     attr.addAttribute(Attribute::SanitizeAddress);
 #endif
 #if defined(_COMPILER_MSAN_ENABLED_)
     attr.addAttribute(Attribute::SanitizeMemory);
 #endif
-#if JL_LLVM_VERSION >= 140000
     F->addFnAttrs(attr);
-#else
-    F->addAttributes(AttributeList::FunctionIndex, attr);
-#endif
 }
 
-static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
+static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t *rettype, bool prefer_specsig)
 {
-    int va = lam->def.method->isva;
-    jl_value_t *sig = lam->specTypes;
-    bool needsparams = false;
-    if (jl_is_method(lam->def.method)) {
-        if ((size_t)jl_subtype_env_size(lam->def.method->sig) != jl_svec_len(lam->sparam_vals))
-            needsparams = true;
-        for (size_t i = 0; i < jl_svec_len(lam->sparam_vals); ++i) {
-            if (jl_is_typevar(jl_svecref(lam->sparam_vals, i)))
-                needsparams = true;
-        }
-    }
     if (needsparams)
-        return std::make_pair(false, true);
+        return false;
     if (sig == (jl_value_t*)jl_anytuple_type)
-        return std::make_pair(false, false);
+        return false;
     if (!jl_is_datatype(sig))
-        return std::make_pair(false, false);
+        return false;
     if (jl_nparams(sig) == 0)
-        return std::make_pair(false, false);
+        return false;
     if (va) {
         if (jl_is_vararg(jl_tparam(sig, jl_nparams(sig) - 1)))
-            return std::make_pair(false, false);
+            return false;
     }
     // not invalid, consider if specialized signature is worthwhile
     if (prefer_specsig)
-        return std::make_pair(true, false);
+        return true;
     if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type)
-        return std::make_pair(true, false);
+        return true;
     if (jl_is_uniontype(rettype)) {
         bool allunbox;
         size_t nbytes, align, min_align;
         union_alloca_type((jl_uniontype_t*)rettype, allunbox, nbytes, align, min_align);
         if (nbytes > 0)
-            return std::make_pair(true, false); // some elements of the union could be returned unboxed avoiding allocation
+            return true; // some elements of the union could be returned unboxed avoiding allocation
     }
     if (jl_nparams(sig) <= 3) // few parameters == more efficient to pass directly
-        return std::make_pair(true, false);
+        return true;
     bool allSingleton = true;
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *sigt = jl_tparam(sig, i);
         bool issing = jl_is_datatype(sigt) && jl_is_datatype_singleton((jl_datatype_t*)sigt);
         allSingleton &= issing;
         if (!deserves_argbox(sigt) && !issing) {
-            return std::make_pair(true, false);
+            return true;
         }
     }
     if (allSingleton)
-        return std::make_pair(true, false);
-    return std::make_pair(false, false); // jlcall sig won't require any box allocations
+        return true;
+    return false; // jlcall sig won't require any box allocations
+}
+
+static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
+{
+    int va = lam->def.method->isva;
+    jl_value_t *sig = lam->specTypes;
+    bool needsparams = false;
+    if (jl_is_method(lam->def.method)) {
+        if ((size_t)jl_subtype_env_size(lam->def.method->sig) != jl_svec_len(lam->sparam_vals))
+            needsparams = true;
+        for (size_t i = 0; i < jl_svec_len(lam->sparam_vals); ++i) {
+            if (jl_is_typevar(jl_svecref(lam->sparam_vals, i)))
+                needsparams = true;
+        }
+    }
+    return std::make_pair(uses_specsig(sig, needsparams, va, rettype, prefer_specsig), needsparams);
 }
 
 
@@ -2417,7 +2446,7 @@ JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line);
 static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const char *name)
 {
     Value *pv = ConstantExpr::getIntToPtr(
-        ConstantInt::get(getSizeTy(ctx.builder.getContext()), (uintptr_t)ptr),
+        ConstantInt::get(ctx.types().T_size, (uintptr_t)ptr),
         getInt64PtrTy(ctx.builder.getContext()));
     Value *v = ctx.builder.CreateLoad(getInt64Ty(ctx.builder.getContext()), pv, true, name);
     v = ctx.builder.CreateAdd(v, addend);
@@ -2469,7 +2498,7 @@ static void cg_bdw(jl_codectx_t &ctx, jl_sym_t *var, jl_binding_t *b)
 static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, size_t nargs)
 {
     assert(nargs > 1);
-    jl_value_t **v = (jl_value_t**)alloca(sizeof(jl_value_t*) * nargs);
+    SmallVector<jl_value_t *> v(nargs);
     for (size_t i = 0; i < nargs; i++) {
         if (!args[i].constant)
             return NULL;
@@ -2481,7 +2510,7 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
     jl_current_task->world_age = 1;
     jl_value_t *result;
     JL_TRY {
-        result = jl_apply(v, nargs);
+        result = jl_apply(v.data(), nargs);
     }
     JL_CATCH {
         result = NULL;
@@ -2500,7 +2529,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
             return jl_get_global(ctx.module, sym);
         return NULL;
     }
-    if (jl_is_slot(ex) || jl_is_argument(ex))
+    if (jl_is_slotnumber(ex) || jl_is_argument(ex))
         return NULL;
     if (jl_is_ssavalue(ex)) {
         ssize_t idx = ((jl_ssavalue_t*)ex)->id - 1;
@@ -2594,7 +2623,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
 
 static bool slot_eq(jl_value_t *e, int sl)
 {
-    return (jl_is_slot(e) || jl_is_argument(e)) && jl_slot_number(e)-1 == sl;
+    return (jl_is_slotnumber(e) || jl_is_argument(e)) && jl_slot_number(e)-1 == sl;
 }
 
 // --- code gen for intrinsic functions ---
@@ -2637,7 +2666,7 @@ static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
         if (jl_is_expr(st)) {
             if (((jl_expr_t*)st)->head == jl_assign_sym) {
                 jl_value_t *ar = jl_exprarg(st, 0);
-                if (jl_is_slot(ar)) {
+                if (jl_is_slotnumber(ar)) {
                     av.insert(jl_slot_number(ar)-1);
                 }
             }
@@ -2740,7 +2769,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
 static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
 {
     auto scan_slot_arg = [&](jl_value_t *expr) {
-        if (jl_is_slot(expr) || jl_is_argument(expr)) {
+        if (jl_is_slotnumber(expr) || jl_is_argument(expr)) {
             int i = jl_slot_number(expr) - 1;
             ctx.slots[i].used = true;
             return true;
@@ -2845,8 +2874,8 @@ static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const
         }
         Value *neq = ctx.builder.CreateICmpNE(varg1, varg2);
         return emit_guarded_test(ctx, neq, true, [&] {
-            Value *dtarg = emit_typeof_boxed(ctx, arg1);
-            Value *dt_eq = ctx.builder.CreateICmpEQ(dtarg, emit_typeof_boxed(ctx, arg2));
+            Value *dtarg = emit_typeof(ctx, arg1, false, true);
+            Value *dt_eq = ctx.builder.CreateICmpEQ(dtarg, emit_typeof(ctx, arg2, false, true));
             return emit_guarded_test(ctx, dt_eq, false, [&] {
                 return ctx.builder.CreateTrunc(ctx.builder.CreateCall(prepare_call(jlegalx_func),
                                                                       {varg1, varg2, dtarg}), getInt1Ty(ctx.builder.getContext()));
@@ -2911,7 +2940,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
-        Type *at_int = INTT(at);
+        Type *at_int = INTT(at, ctx.emission_context.DL);
         Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
         Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
         return ctx.builder.CreateICmpEQ(varg1, varg2);
@@ -2955,7 +2984,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             auto answer = ctx.builder.CreateCall(prepare_call(memcmp_func), {
                         ctx.builder.CreateBitCast(varg1, getInt8PtrTy(ctx.builder.getContext())),
                         ctx.builder.CreateBitCast(varg2, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), sz) },
+                        ConstantInt::get(ctx.types().T_size, sz) },
                     ArrayRef<OperandBundleDef>(&OpBundle, nroots ? 1 : 0));
 
             if (arg1.tbaa || arg2.tbaa) {
@@ -3031,11 +3060,11 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         // since it is normalized to `::Type{Union{}}` instead...
         if (arg1.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck1, [&] {
-                return emit_exactly_isa(ctx, arg1, rt2); // rt2 is a singleton type
+                return emit_exactly_isa(ctx, arg1, (jl_datatype_t*)rt2); // rt2 is a singleton type
             });
         if (arg2.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck2, [&] {
-                return emit_exactly_isa(ctx, arg2, rt1); // rt1 is a singleton type
+                return emit_exactly_isa(ctx, arg2, (jl_datatype_t*)rt1); // rt1 is a singleton type
             });
         if (!(arg1.isboxed || arg1.constant) || !(arg2.isboxed || arg2.constant))
             // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
@@ -3058,8 +3087,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     bool justbits2 = jl_is_concrete_immutable(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
-            jl_value_t *typ = justbits1 ? rt1 : rt2;
-            if (typ == (jl_value_t*)jl_bool_type) { // aka jl_pointer_egal
+            jl_datatype_t *typ = (jl_datatype_t*)(justbits1 ? rt1 : rt2);
+            if (typ == jl_bool_type) { // aka jl_pointer_egal
                 // some optimizations for bool, since pointer comparison may be better
                 if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
                     Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
@@ -3069,14 +3098,14 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             }
             if (rt1 == rt2)
                 return emit_bits_compare(ctx, arg1, arg2);
-            Value *same_type = emit_exactly_isa(ctx, (typ == rt2 ? arg1 : arg2), typ);
+            Value *same_type = emit_exactly_isa(ctx, (justbits1 ? arg2 : arg1), typ);
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
             BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "is", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_is", ctx.f);
             ctx.builder.CreateCondBr(same_type, isaBB, postBB);
             ctx.builder.SetInsertPoint(isaBB);
-            Value *bitcmp = emit_bits_compare(ctx, jl_cgval_t(arg1, typ, NULL),
-                                              jl_cgval_t(arg2, typ, NULL));
+            Value *bitcmp = emit_bits_compare(ctx, jl_cgval_t(arg1, (jl_value_t*)typ, NULL),
+                                              jl_cgval_t(arg2, (jl_value_t*)typ, NULL));
             isaBB = ctx.builder.GetInsertBlock(); // might have changed
             ctx.builder.CreateBr(postBB);
             ctx.builder.SetInsertPoint(postBB);
@@ -3271,7 +3300,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     }
 
     else if (f == jl_builtin_typeof && nargs == 1) {
-        *ret = emit_typeof(ctx, argv[1], false);
+        const jl_cgval_t &p = argv[1];
+        if (p.constant)
+            *ret = mark_julia_const(ctx, jl_typeof(p.constant));
+        else if (jl_is_concrete_type(p.typ))
+            *ret = mark_julia_const(ctx, p.typ);
+        else
+            *ret = mark_julia_type(ctx, emit_typeof(ctx, p, false, false), true, jl_datatype_type);
         return true;
     }
 
@@ -3326,7 +3361,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 #ifdef _P64
                 nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs));
+                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
                 return true;
@@ -3368,22 +3403,22 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                         return true;
                     }
                     else if (idx_const > ndims) {
-                        *ret = mark_julia_type(ctx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1), false, jl_long_type);
+                        *ret = mark_julia_type(ctx, ConstantInt::get(ctx.types().T_size, 1), false, jl_long_type);
                         return true;
                     }
                 }
                 else {
-                    Value *idx_dyn = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idx, (jl_value_t*)jl_long_type);
-                    error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(getSizeTy(ctx.builder.getContext()))),
+                    Value *idx_dyn = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
+                    error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size)),
                                  "arraysize: dimension out of range");
                     BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f);
                     BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange");
                     BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize");
                     ctx.builder.CreateCondBr(ctx.builder.CreateICmpSLE(idx_dyn,
-                                ConstantInt::get(getSizeTy(ctx.builder.getContext()), ndims)),
+                                ConstantInt::get(ctx.types().T_size, ndims)),
                             inBB, outBB);
                     ctx.builder.SetInsertPoint(outBB);
-                    Value *v_one = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
+                    Value *v_one = ConstantInt::get(ctx.types().T_size, 1);
                     ctx.builder.CreateBr(ansBB);
                     ctx.f->getBasicBlockList().push_back(inBB);
                     ctx.builder.SetInsertPoint(inBB);
@@ -3392,7 +3427,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     inBB = ctx.builder.GetInsertBlock(); // could have changed
                     ctx.f->getBasicBlockList().push_back(ansBB);
                     ctx.builder.SetInsertPoint(ansBB);
-                    PHINode *result = ctx.builder.CreatePHI(getSizeTy(ctx.builder.getContext()), 2);
+                    PHINode *result = ctx.builder.CreatePHI(ctx.types().T_size, 2);
                     result->addIncoming(v_one, outBB);
                     result->addIncoming(v_sz, inBB);
                     *ret = mark_julia_type(ctx, result, false, jl_long_type);
@@ -3443,7 +3478,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                         // isbits union selector bytes are stored after a->maxsize
                         Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
                         Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, getSizeTy(ctx.builder.getContext())));
+                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size));
                         Value *selidx_m = emit_arraylen(ctx, ary);
                         Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
                         ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
@@ -3551,7 +3586,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                         else {
                             Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
                             Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, getSizeTy(ctx.builder.getContext())));
+                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size));
                             Value *selidx_m = emit_arraylen(ctx, ary);
                             Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
                             ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
@@ -3652,11 +3687,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field)
-                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs)),
+                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)),
                                 NULL, NULL);
-                        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                        Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
-                        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs));
+                        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
                         Instruction *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, idx), Align(sizeof(void*)));
                         // if we know the result type of this load, we will mark that information here too
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_value);
@@ -3681,13 +3716,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     }
                     else {
                         // unknown index
-                        Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                        Value *vidx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         if (emit_getfield_unknownidx(ctx, ret, obj, vidx, utt, boundscheck, order)) {
                             return true;
                         }
                     }
                 }
-                Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                Value *vidx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                 if (jl_is_tuple_type(utt) && is_tupletype_homogeneous(utt->parameters, true)) {
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
@@ -3706,10 +3741,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                         // This is not necessary for correctness, but allows to omit
                         // the extra code for getting the length of the tuple
                         if (!bounds_check_enabled(ctx, boundscheck)) {
-                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
-                        } else {
+                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+                        }
+                        else {
                             vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx,
-                                emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj)),
+                                emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false)),
                                 jl_true);
                         }
                         bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
@@ -3722,7 +3758,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
 
                 // Unknown object, but field known to be integer
-                vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+                vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
                 Value *fld_val = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, obj), vidx });
                 *ret = mark_julia_type(ctx, fld_val, true, jl_any_type);
                 return true;
@@ -3800,9 +3836,9 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
         Value *sz;
         if (nf != -1)
-            sz = ConstantInt::get(getSizeTy(ctx.builder.getContext()), nf);
+            sz = ConstantInt::get(ctx.types().T_size, nf);
         else
-            sz = emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj));
+            sz = emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false));
         *ret = mark_julia_type(ctx, sz, false, jl_long_type);
         return true;
     }
@@ -3817,7 +3853,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 Value *tyv = boxed(ctx, typ);
                 Value *types_svec = emit_datatype_types(ctx, tyv);
                 Value *types_len = emit_datatype_nfields(ctx, tyv);
-                Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                 jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
                 if (nargs == 3)
                     emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
@@ -3844,23 +3880,23 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 else {
                     sz = (1 + jl_svec_len(obj.constant)) * sizeof(void*);
                 }
-                *ret = mark_julia_type(ctx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sz), false, jl_long_type);
+                *ret = mark_julia_type(ctx, ConstantInt::get(ctx.types().T_size, sz), false, jl_long_type);
                 return true;
             }
             // String and SimpleVector's length fields have the same layout
-            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), getSizePtrTy(ctx.builder.getContext()));
+            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), ctx.types().T_size->getPointerTo());
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-            Value *len = ai.decorateInst(ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ptr, Align(sizeof(size_t))));
+            Value *len = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr));
             MDBuilder MDB(ctx.builder.getContext());
             if (sty == jl_simplevector_type) {
                 auto rng = MDB.createRange(
-                    Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX / sizeof(void*) - 1));
+                    Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX / sizeof(void*) - 1));
                 cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
-                len = ctx.builder.CreateMul(len, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(void*)));
-                len = ctx.builder.CreateAdd(len, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(void*)));
+                len = ctx.builder.CreateMul(len, ConstantInt::get(ctx.types().T_size, sizeof(void*)));
+                len = ctx.builder.CreateAdd(len, ConstantInt::get(ctx.types().T_size, sizeof(void*)));
             }
             else {
-                auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX));
+                auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX));
                 cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
             }
             *ret = mark_julia_type(ctx, len, false, jl_long_type);
@@ -3871,10 +3907,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             Value *elsize;
             size_t elsz;
             if (arraytype_constelsize(sty, &elsz)) {
-                elsize = ConstantInt::get(getSizeTy(ctx.builder.getContext()), elsz);
+                elsize = ConstantInt::get(ctx.types().T_size, elsz);
             }
             else {
-                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), getSizeTy(ctx.builder.getContext()));
+                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), ctx.types().T_size);
             }
             *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type);
             return true;
@@ -3924,8 +3960,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 isdefined_unknown_idx:
             if (nargs == 3 || fld.typ != (jl_value_t*)jl_long_type)
                 return false;
-            Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
-            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+            Value *vidx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
+            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
             Value *isd = ctx.builder.CreateCall(prepare_call(jlfieldisdefinedchecked_func), { boxed(ctx, obj), vidx });
             isd = ctx.builder.CreateTrunc(isd, getInt8Ty(ctx.builder.getContext()));
             *ret = mark_julia_type(ctx, isd, false, jl_bool_type);
@@ -3982,7 +4018,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-                fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(size_t))));
+                fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, ctx.types().alignof_ptr));
                 cast<LoadInst>(fldv)->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             }
             else {
@@ -4039,14 +4075,14 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 }
 
 // Returns ctx.types().T_prjlvalue
-static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction *trampoline)
+static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
+                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     ++EmittedJLCalls;
     Function *TheTrampoline = prepare_call(trampoline);
     // emit arguments
     SmallVector<Value*, 4> theArgs;
-    theArgs.push_back(theFptr);
+    theArgs.push_back(theFptr.getCallee());
     if (theF)
         theArgs.push_back(theF);
     for (size_t i = 0; i < nargs; i++) {
@@ -4060,35 +4096,33 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
 }
 
 // Returns ctx.types().T_prjlvalue
-static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction *trampoline)
+static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
+                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline);
 }
 
-
-static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject,
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
                                           const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
     ++EmittedSpecfunCalls;
     // emit specialized call site
-    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, specFunctionObject, mi->specTypes, jlretty, is_opaque_closure);
-    FunctionType *cft = returninfo.decl->getFunctionType();
+    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure);
+    FunctionType *cft = returninfo.decl.getFunctionType();
     *cc = returninfo.cc;
     *return_roots = returninfo.return_roots;
 
     size_t nfargs = cft->getNumParams();
-    Value **argvals = (Value**)alloca(nfargs * sizeof(Value*));
+    SmallVector<Value *> argvals(nfargs);
     unsigned idx = 0;
-    AllocaInst *result;
+    AllocaInst *result = nullptr;
     switch (returninfo.cc) {
     case jl_returninfo_t::Boxed:
     case jl_returninfo_t::Register:
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
         assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
         argvals[idx] = result;
         idx++;
@@ -4109,42 +4143,68 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     }
 
     for (size_t i = 0; i < nargs; i++) {
-        jl_value_t *jt = (is_opaque_closure && i == 0) ? (jl_value_t*)jl_any_type :
-            jl_nth_slot_type(mi->specTypes, i);
-        if (is_uniquerep_Type(jt))
-            continue;
-        bool isboxed = deserves_argbox(jt);
-        Type *et = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
-        if (type_is_ghost(et))
-            continue;
-        assert(idx < nfargs);
-        Type *at = cft->getParamType(idx);
+        jl_value_t *jt = jl_nth_slot_type(specTypes, i);
+        // n.b.: specTypes is required to be a datatype by construction for specsig
         jl_cgval_t arg = argv[i];
-        if (isboxed) {
-            assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
-            argvals[idx] = boxed(ctx, arg);
-        }
-        else if (et->isAggregateType()) {
+        if (is_opaque_closure && i == 0) {
+            Type *at = cft->getParamType(idx);
+            // Special optimization for opaque closures: We know that specsig opaque
+            // closures don't look at their type tag (they are fairly quickly discarded
+            // for their environments). Therefore, we can just pass these as a pointer,
+            // rather than a boxed value.
             arg = value_to_pointer(ctx, arg);
-            // can lazy load on demand, no copy needed
-            assert(at == PointerType::get(et, AddressSpace::Derived));
             argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
         }
-        else {
-            assert(at == et);
-            Value *val = emit_unbox(ctx, et, arg, jt);
-            if (!val) {
-                // There was a type mismatch of some sort - exit early
-                CreateTrap(ctx.builder);
-                return jl_cgval_t();
+        else if (is_uniquerep_Type(jt)) {
+            continue;
+        } else {
+            bool isboxed = deserves_argbox(jt);
+            Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            if (type_is_ghost(et))
+                continue;
+            assert(idx < nfargs);
+            Type *at = cft->getParamType(idx);
+            if (isboxed) {
+                assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
+                argvals[idx] = boxed(ctx, arg);
+            }
+            else if (et->isAggregateType()) {
+                arg = value_to_pointer(ctx, arg);
+                // can lazy load on demand, no copy needed
+                assert(at == PointerType::get(et, AddressSpace::Derived));
+                argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
+            }
+            else {
+                assert(at == et);
+                Value *val = emit_unbox(ctx, et, arg, jt);
+                if (!val) {
+                    // There was a type mismatch of some sort - exit early
+                    CreateTrap(ctx.builder);
+                    return jl_cgval_t();
+                }
+                argvals[idx] = val;
             }
-            argvals[idx] = val;
         }
         idx++;
     }
     assert(idx == nfargs);
-    CallInst *call = ctx.builder.CreateCall(returninfo.decl, ArrayRef<Value*>(&argvals[0], nfargs));
-    call->setAttributes(returninfo.decl->getAttributes());
+    Value *TheCallee = returninfo.decl.getCallee();
+    if (fromexternal) {
+        std::string namep("p");
+        namep += cast<Function>(returninfo.decl.getCallee())->getName();
+        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
+        if (GV == nullptr) {
+            GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false,
+                                    GlobalVariable::ExternalLinkage,
+                                    Constant::getNullValue(TheCallee->getType()),
+                                    namep);
+            ctx.external_calls[std::make_tuple(fromexternal, true)] = GV;
+        }
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*))));
+    }
+    CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals);
+    call->setAttributes(returninfo.attrs);
 
     jl_cgval_t retval;
     switch (returninfo.cc) {
@@ -4155,6 +4215,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             retval = mark_julia_type(ctx, call, false, jlretty);
             break;
         case jl_returninfo_t::SRet:
+            assert(result);
             retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
             break;
         case jl_returninfo_t::Union: {
@@ -4182,13 +4243,38 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     return update_julia_type(ctx, retval, inferred_retty);
 }
 
-static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject,
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+{
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    return emit_call_specfun_other(ctx, is_opaque_closure, mi->specTypes, jlretty, NULL,
+        specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty);
+}
+
+static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
                                           const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
 {
-    auto theFptr = cast<Function>(
-        jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee());
-    addRetAttr(theFptr, Attribute::NonNull);
-    Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, julia_call);
+    Value *theFptr;
+    if (fromexternal) {
+        std::string namep("p");
+        namep += specFunctionObject;
+        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
+        Type *pfunc = ctx.types().T_jlfunc->getPointerTo();
+        if (GV == nullptr) {
+            GV = new GlobalVariable(*jl_Module, pfunc, false,
+                                    GlobalVariable::ExternalLinkage,
+                                    Constant::getNullValue(pfunc),
+                                    namep);
+            ctx.external_calls[std::make_tuple(fromexternal, false)] = GV;
+        }
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*))));
+    }
+    else {
+        theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee();
+        addRetAttr(cast<Function>(theFptr), Attribute::NonNull);
+    }
+    Value *ret = emit_jlcall(ctx, FunctionCallee(ctx.types().T_jlfunc, theFptr), nullptr, argv, nargs, julia_call);
     return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
@@ -4200,13 +4286,13 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     assert(arglen >= 2);
 
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
-    return emit_invoke(ctx, lival, argv, nargs, rt);
+    return emit_invoke(ctx, lival, argv.data(), nargs, rt);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
@@ -4223,12 +4309,12 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
             if (ft == ctx.types().T_jlfunc) {
-                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, argv, nargs, rt);
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, nullptr, argv, nargs, rt);
                 handled = true;
             }
             else if (ft != ctx.types().T_jlfuncparams) {
                 unsigned return_roots = 0;
-                result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, argv, nargs, &cc, &return_roots, rt);
+                result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, nullptr, argv, nargs, &cc, &return_roots, rt);
                 handled = true;
             }
         }
@@ -4236,7 +4322,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
             jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
             if (ci != jl_nothing) {
-                auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
                  // check if we know how to handle this specptr
                 if (invoke == jl_fptr_const_return_addr) {
                     result = mark_julia_const(ctx, codeinst->rettype_const);
@@ -4248,34 +4334,43 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
                     std::string name;
                     StringRef protoname;
                     bool need_to_emit = true;
-                    bool cache_valid = ctx.use_cache;
+                    bool cache_valid = ctx.use_cache || ctx.external_linkage;
                     bool external = false;
-                    if (ctx.external_linkage) {
-                       if (jl_object_in_image((jl_value_t*)codeinst)) {
-                           // Target is present in another pkgimage
-                           cache_valid = true;
-                           external = true;
-                       }
+
+                    // Check if we already queued this up
+                    auto it = ctx.call_targets.find(codeinst);
+                    if (need_to_emit && it != ctx.call_targets.end()) {
+                        protoname = std::get<2>(it->second)->getName();
+                        need_to_emit = cache_valid = false;
                     }
 
+                    // Check if it is already compiled (either JIT or externally)
                     if (cache_valid) {
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
                         // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-                        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
                         auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                         if (fptr) {
-                            if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
+                            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                                jl_cpu_pause();
+                            }
+                            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                            if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
                                 protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-                                need_to_emit = false;
+                                if (ctx.external_linkage) {
+                                    // TODO: Add !specsig support to aotcompile.cpp
+                                    // Check that the codeinst is containing native code
+                                    if (specsig && jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b100) {
+                                        external = true;
+                                        need_to_emit = false;
+                                    }
+                                }
+                                else { // ctx.use_cache
+                                    need_to_emit = false;
+                                }
                             }
                         }
                     }
-                    auto it = ctx.call_targets.find(codeinst);
-                    if (need_to_emit && it != ctx.call_targets.end()) {
-                        protoname = std::get<2>(it->second)->getName();
-                        need_to_emit = false;
-                    }
                     if (need_to_emit) {
                         raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
                         protoname = StringRef(name);
@@ -4283,16 +4378,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
                     jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
                     unsigned return_roots = 0;
                     if (specsig)
-                        result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt);
+                        result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt);
                     else
-                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt);
-                    if (external) {
-                        assert(!need_to_emit);
-                        auto calledF = jl_Module->getFunction(protoname);
-                        assert(calledF);
-                        // TODO: Check if already present?
-                        ctx.external_calls[std::make_tuple(codeinst, specsig)] = calledF;
-                    }
+                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt);
                     handled = true;
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
@@ -4319,7 +4407,7 @@ static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
@@ -4328,24 +4416,51 @@ static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_
     const jl_cgval_t &f = argv[0];
     jl_cgval_t ret;
     if (f.constant && f.constant == jl_builtin_modifyfield) {
-        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv.data(), nargs - 1, &lival))
             return ret;
         auto it = builtin_func_map().find(jl_f_modifyfield_addr);
         assert(it != builtin_func_map().end());
         Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
         return mark_julia_type(ctx, oldnew, true, rt);
     }
-    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
         JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
         if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
-            return emit_atomic_pointerop(ctx, fi, argv, nargs - 1, &lival);
+            return emit_atomic_pointerop(ctx, fi, argv.data(), nargs - 1, &lival);
     }
 
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv.data(), nargs, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
+static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, jl_value_t *sigtype, jl_cgval_t *argv, size_t nargs)
+{
+    jl_datatype_t *oc_argt = (jl_datatype_t *)jl_tparam0(oc_type);
+    jl_value_t *oc_rett = jl_tparam1(oc_type);
+    jl_svec_t *types = jl_get_fieldtypes((jl_datatype_t*)oc_argt);
+    size_t ntypes = jl_svec_len(types);
+    for (size_t i = 0; i < nargs-1; ++i) {
+        jl_value_t *typ = i >= ntypes ? jl_svecref(types, ntypes-1) : jl_svecref(types, i);
+        if (jl_is_vararg(typ))
+            typ = jl_unwrap_vararg(typ);
+        emit_typecheck(ctx, argv[i+1], typ, "typeassert");
+        argv[i+1] = update_julia_type(ctx, argv[i+1], typ);
+    }
+    jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+    unsigned return_roots = 0;
+
+    // Load specptr
+    jl_cgval_t &theArg = argv[0];
+    jl_cgval_t closure_specptr = emit_getfield_knownidx(ctx, theArg, 4, (jl_datatype_t*)oc_type, jl_memory_order_notatomic);
+    Value *specptr = emit_unbox(ctx, ctx.types().T_size, closure_specptr, (jl_value_t*)jl_long_type);
+    JL_GC_PUSH1(&sigtype);
+    jl_cgval_t r = emit_call_specfun_other(ctx, true, sigtype, oc_rett, specptr, "", NULL, argv, nargs,
+        &cc, &return_roots, oc_rett);
+    JL_GC_POP();
+    return r;
+}
+
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bool is_promotable)
 {
     ++EmittedCalls;
@@ -4354,7 +4469,7 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
     assert(nargs >= 1);
     jl_cgval_t f = emit_expr(ctx, args[0]);
 
-    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
         JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
         return emit_intrinsic(ctx, fi, args, nargs - 1);
     }
@@ -4362,8 +4477,8 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
     jl_value_t *context = ctx.params->generic_context == jl_nothing ? nullptr : ctx.params->generic_context;
     size_t n_generic_args = nargs + (context ? 1 : 0);
 
-    jl_cgval_t *generic_argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * n_generic_args);
-    jl_cgval_t *argv = generic_argv;
+    SmallVector<jl_cgval_t> generic_argv(n_generic_args);
+    jl_cgval_t *argv = generic_argv.data();
     if (context) {
         generic_argv[0] = mark_julia_const(ctx, context);
         argv = &generic_argv[1];
@@ -4392,8 +4507,23 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
         }
     }
 
+    // handle calling an OpaqueClosure
+    if (jl_is_concrete_type(f.typ) && jl_subtype(f.typ, (jl_value_t*)jl_opaque_closure_type)) {
+        jl_value_t *oc_argt = jl_tparam0(f.typ);
+        jl_value_t *oc_rett = jl_tparam1(f.typ);
+        if (jl_is_datatype(oc_argt) && jl_tupletype_length_compat(oc_argt, nargs-1)) {
+            jl_value_t *sigtype = jl_argtype_with_function_type((jl_value_t*)f.typ, (jl_value_t*)oc_argt);
+            if (uses_specsig(sigtype, false, true, oc_rett, true)) {
+                JL_GC_PUSH1(&sigtype);
+                jl_cgval_t r = emit_specsig_oc_call(ctx, f.typ, sigtype, argv, nargs);
+                JL_GC_POP();
+                return r;
+            }
+        }
+    }
+
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, generic_argv, n_generic_args, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, generic_argv.data(), n_generic_args, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
@@ -4503,8 +4633,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
             i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
-    Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type)));
+    Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
     jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig;
     for (size_t j = 0; j < i; j++) {
         sparam = (jl_unionall_t*)sparam->body;
@@ -4517,7 +4646,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
 static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
 {
     Value *isnull = NULL;
-    if (jl_is_slot(sym) || jl_is_argument(sym)) {
+    if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
         size_t sl = jl_slot_number(sym) - 1;
         jl_varinfo_t &vi = ctx.slots[sl];
         if (!vi.usedUndef)
@@ -4559,8 +4688,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
                 i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
-        isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type)));
+        isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
     }
     else {
         jl_module_t *modu;
@@ -4597,8 +4725,8 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
     return mark_julia_type(ctx, isnull, false, jl_bool_type);
 }
 
-static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname, jl_value_t *better_typ=NULL) {
-    jl_value_t *typ = better_typ ? better_typ : vi.value.typ;
+static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname) {
+    jl_value_t *typ = vi.value.typ;
     jl_cgval_t v;
     Value *isnull = NULL;
     if (vi.boxroot == NULL || vi.pTIndex != NULL) {
@@ -4675,14 +4803,7 @@ static jl_cgval_t emit_local(jl_codectx_t &ctx, jl_value_t *slotload)
     size_t sl = jl_slot_number(slotload) - 1;
     jl_varinfo_t &vi = ctx.slots[sl];
     jl_sym_t *sym = slot_symbol(ctx, sl);
-    jl_value_t *typ = NULL;
-    if (jl_typeis(slotload, jl_typedslot_type)) {
-        // use the better type from inference for this load
-        typ = jl_typedslot_get_type(slotload);
-        if (jl_is_typevar(typ))
-            typ = ((jl_tvar_t*)typ)->ub;
-    }
-    return emit_varinfo(ctx, vi, sym, typ);
+    return emit_varinfo(ctx, vi, sym);
 }
 
 static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Value *isboxed, jl_cgval_t rval_info)
@@ -4929,7 +5050,7 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
     assert(!jl_is_ssavalue(l));
     jl_cgval_t rval_info = emit_expr(ctx, r, ssaval);
 
-    if (jl_is_slot(l)) {
+    if (jl_is_slotnumber(l)) {
         int sl = jl_slot_number(l) - 1;
         // it's a local variable
         jl_varinfo_t &vi = ctx.slots[sl];
@@ -5035,7 +5156,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
 {
     if (jl_is_ssavalue(expr) && ssaval_result == -1)
         return; // value not used, no point in attempting codegen for it
-    if (jl_is_slot(expr) && ssaval_result == -1) {
+    if (jl_is_slotnumber(expr) && ssaval_result == -1) {
         size_t sl = jl_slot_number(expr) - 1;
         jl_varinfo_t &vi = ctx.slots[sl];
         if (vi.usedUndef)
@@ -5047,7 +5168,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     }
     if (jl_is_newvarnode(expr)) {
         jl_value_t *var = jl_fieldref(expr, 0);
-        assert(jl_is_slot(var));
+        assert(jl_is_slotnumber(var));
         jl_varinfo_t &vi = ctx.slots[jl_slot_number(var)-1];
         if (vi.usedUndef) {
             // create a new uninitialized variable
@@ -5080,7 +5201,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     }
     else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
-        assert(excstack_state.V && excstack_state.V->getType() == getSizeTy(ctx.builder.getContext()));
+        assert(excstack_state.V && excstack_state.V->getType() == ctx.types().T_size);
         ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
         return;
     }
@@ -5103,10 +5224,10 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     for (size_t i = 0; i < jl_svec_len(argt_typ->parameters); ++i) {
         jl_svecset(sig_args, 1+i, jl_svecref(argt_typ->parameters, i));
     }
-    sigtype = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
+    sigtype = jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
 
     jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
-    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred(mi, ctx.world, ctx.world);
+    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.world, ctx.world);
 
     if (ci == NULL || (jl_value_t*)ci == jl_nothing) {
         JL_GC_POP();
@@ -5119,7 +5240,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     }
     ++EmittedOpaqueClosureFunctions;
 
-    ir = jl_uncompress_ir(closure_method, ci, (jl_array_t*)inferred);
+    ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred);
 
     // TODO: Emit this inline and outline it late using LLVM's coroutine support.
     orc::ThreadSafeModule closure_m = jl_create_ts_module(
@@ -5141,7 +5262,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         F = Function::Create(get_func_sig(ctx.builder.getContext()),
                              Function::ExternalLinkage,
                              fname, jl_Module);
-        jl_init_function(F);
+        jl_init_function(F, ctx.emission_context.TargetTriple);
         F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()}));
     }
     Function *specF = NULL;
@@ -5151,9 +5272,9 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         //emission context holds context lock so can get module
         specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
         if (specF) {
-            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module,
+            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL,
                 closure_decls.specFunctionObject, sigtype, rettype, true);
-            specF = returninfo.decl;
+            specF = cast<Function>(returninfo.decl.getCallee());
         }
     }
     ctx.oc_modules.push_back(std::move(closure_m));
@@ -5169,7 +5290,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         jl_sym_t *sym = (jl_sym_t*)expr;
         return emit_globalref(ctx, ctx.module, sym, AtomicOrdering::Unordered);
     }
-    if (jl_is_slot(expr) || jl_is_argument(expr)) {
+    if (jl_is_slotnumber(expr) || jl_is_argument(expr)) {
         return emit_local(ctx, expr);
     }
     if (jl_is_ssavalue(expr)) {
@@ -5257,7 +5378,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         jl_cgval_t res = emit_call(ctx, ex, expr_t, is_promotable);
         // some intrinsics (e.g. typeassert) can return a wider type
         // than what's actually possible
-        if (is_promotable && res.promotion_point) {
+        if (is_promotable && res.promotion_point && res.promotion_ssa == -1) {
             res.promotion_ssa = ssaidx_0based;
         }
         res = update_julia_type(ctx, res, expr_t);
@@ -5286,7 +5407,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     else if (head == jl_method_sym) {
         if (nargs == 1) {
             jl_value_t *mn = args[0];
-            assert(jl_is_symbol(mn) || jl_is_slot(mn));
+            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn));
 
             Value *bp = NULL, *name;
             jl_binding_t *bnd = NULL;
@@ -5314,7 +5435,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 bp = julia_binding_gv(ctx, bnd);
                 bp = julia_binding_pvalue(ctx, bp);
             }
-            else if (jl_is_slot(mn) || jl_is_argument(mn)) {
+            else if (jl_is_slotnumber(mn) || jl_is_argument(mn)) {
                 // XXX: eval_methoddef does not have this code branch
                 int sl = jl_slot_number(mn)-1;
                 jl_varinfo_t &vi = ctx.slots[sl];
@@ -5371,7 +5492,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             is_promotable = ctx.ssavalue_usecount.at(ssaidx_0based) == 1;
         }
         assert(nargs > 0);
-        jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+        SmallVector<jl_cgval_t> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
@@ -5380,12 +5501,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 jl_is_datatype(jl_tparam0(ty)) &&
                 jl_is_concrete_type(jl_tparam0(ty))) {
             assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1);
-            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1], is_promotable);
+            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, argv.data() + 1, is_promotable);
             if (is_promotable && res.promotion_point && res.promotion_ssa==-1)
                 res.promotion_ssa = ssaidx_0based;
             return res;
         }
-        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv, nargs, julia_call);
+        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv.data(), nargs, julia_call);
         // temporarily mark as `Any`, expecting `emit_ssaval_assign` to update
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
@@ -5415,7 +5536,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         if (source.constant == NULL) {
             // For now, we require non-constant source to be handled by using
             // eval. This should probably be a verifier error and an abort here.
-            emit_error(ctx, "(internal error) invalid IR: opaque closure source be constant");
+            emit_error(ctx, "(internal error) invalid IR: opaque closure source must be constant");
             return jl_cgval_t();
         }
         bool can_optimize = argt.constant != NULL && lb.constant != NULL && ub.constant != NULL &&
@@ -5426,33 +5547,33 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
 
         if (can_optimize) {
             jl_value_t *closure_t = NULL;
-            jl_tupletype_t *env_t = NULL;
+            jl_value_t *env_t = NULL;
             JL_GC_PUSH2(&closure_t, &env_t);
 
-            jl_value_t **env_component_ts = (jl_value_t**)alloca(sizeof(jl_value_t*) * (nargs-4));
+            SmallVector<jl_value_t *> env_component_ts(nargs-4);
             for (size_t i = 0; i < nargs - 4; ++i) {
                 env_component_ts[i] = argv[4+i].typ;
             }
 
-            env_t = jl_apply_tuple_type_v(env_component_ts, nargs-4);
+            env_t = jl_apply_tuple_type_v(env_component_ts.data(), nargs-4);
             // we need to know the full env type to look up the right specialization
-            if (jl_is_concrete_type((jl_value_t*)env_t)) {
+            if (jl_is_concrete_type(env_t)) {
                 jl_tupletype_t *argt_typ = (jl_tupletype_t*)argt.constant;
                 Function *F, *specF;
-                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, env_t, argt_typ, ub.constant);
+                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, (jl_datatype_t*)env_t, argt_typ, ub.constant);
                 if (F) {
                     jl_cgval_t jlcall_ptr = mark_julia_type(ctx, F, false, jl_voidpointer_type);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-                    Instruction *I = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), get_last_age_field(ctx), Align(sizeof(size_t)));
+                    Instruction *I = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_last_age_field(ctx), ctx.types().alignof_ptr);
                     jl_cgval_t world_age = mark_julia_type(ctx, ai.decorateInst(I), false, jl_long_type);
                     jl_cgval_t fptr;
                     if (specF)
                         fptr = mark_julia_type(ctx, specF, false, jl_voidpointer_type);
                     else
-                        fptr = mark_julia_type(ctx, (llvm::Value*)Constant::getNullValue(getSizeTy(ctx.builder.getContext())), false, jl_voidpointer_type);
+                        fptr = mark_julia_type(ctx, (llvm::Value*)Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
 
                     // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
-                    jl_cgval_t env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-4, &argv.data()[4]);
+                    jl_cgval_t env = emit_new_struct(ctx, env_t, nargs-4, &argv.data()[4]);
 
                     jl_cgval_t closure_fields[5] = {
                         env,
@@ -5516,7 +5637,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         return mark_julia_const(ctx, bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
     }
     else if (head == jl_gc_preserve_begin_sym) {
-        jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+        SmallVector<jl_cgval_t> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
@@ -5584,13 +5705,13 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack);
+    return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
+    return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
 // Get the address of the world age of the current task
@@ -5598,9 +5719,9 @@ static Value *get_last_age_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
     return ctx.builder.CreateInBoundsGEP(
-            getSizeTy(ctx.builder.getContext()),
-            ctx.builder.CreateBitCast(ct, getSizePtrTy(ctx.builder.getContext())),
-            ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_task_t, world_age) / sizeof(size_t)),
+            ctx.types().T_size,
+            ctx.builder.CreateBitCast(ct, ctx.types().T_size->getPointerTo()),
+            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr),
             "world_age");
 }
 
@@ -5613,7 +5734,7 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     Function *f = Function::Create(ctx.types().T_jlfunc,
             GlobalVariable::InternalLinkage,
             name, M);
-    jl_init_function(f);
+    jl_init_function(f, params.TargetTriple);
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
     BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f);
@@ -5621,14 +5742,7 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     Function *theFunc;
     Value *theFarg;
     auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-
     bool cache_valid = params.cache;
-    if (params.external_linkage) {
-        if (jl_object_in_image((jl_value_t*)codeinst)) {
-            // Target is present in another pkgimage
-            cache_valid = true;
-        }
-    }
 
     if (cache_valid && invoke != NULL) {
         StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
@@ -5658,7 +5772,7 @@ static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
 
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
-        jl_value_t *calltype, jl_value_t *rettype,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
         size_t nargs,
         jl_codegen_params_t &params,
         Function *target)
@@ -5674,15 +5788,26 @@ static void emit_cfunc_invalidate(
     allocate_gc_frame(ctx, b0);
 
     Function::arg_iterator AI = gf_thunk->arg_begin();
-    jl_cgval_t *myargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t> myargs(nargs);
     if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
         ++AI;
     if (return_roots)
         ++AI;
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_nth_slot_type(calltype, i);
-        bool isboxed = deserves_argbox(jt);
-        Type *et = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+        // n.b. specTypes is required to be a datatype by construction for specsig
+        bool isboxed = false;
+        Type *et;
+        if (i == 0 && is_for_opaque_closure) {
+            et = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+        }
+        else if (deserves_argbox(jt)) {
+            et = ctx.types().T_prjlvalue;
+            isboxed = true;
+        }
+        else {
+            et = julia_type_to_llvm(ctx, jt);
+        }
         if (is_uniquerep_Type(jt)) {
             myargs[i] = mark_julia_const(ctx, jl_tparam0(jt));
         }
@@ -5694,7 +5819,7 @@ static void emit_cfunc_invalidate(
             Value *arg_v = &*AI;
             ++AI;
             Type *at = arg_v->getType();
-            if (!isboxed && et->isAggregateType()) {
+            if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) {
                 myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
             }
             else {
@@ -5705,7 +5830,7 @@ static void emit_cfunc_invalidate(
         }
     }
     assert(AI == gf_thunk->arg_end());
-    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs, nargs, julia_call);
+    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs.data(), nargs, julia_call);
     jl_cgval_t gf_retbox = mark_julia_type(ctx, gf_ret, true, jl_any_type);
     if (cc != jl_returninfo_t::Boxed) {
         emit_typecheck(ctx, gf_retbox, rettype, "cfunction");
@@ -5741,7 +5866,7 @@ static void emit_cfunc_invalidate(
     case jl_returninfo_t::Union: {
         Type *retty = gf_thunk->getReturnType();
         Value *gf_retval = UndefValue::get(retty);
-        Value *tindex = compute_box_tindex(ctx, emit_typeof_boxed(ctx, gf_retbox), (jl_value_t*)jl_any_type, rettype);
+        Value *tindex = compute_box_tindex(ctx, emit_typeof(ctx, gf_retbox, false, true), (jl_value_t*)jl_any_type, rettype);
         tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, gf_ret, 0);
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, tindex, 1);
@@ -5758,14 +5883,15 @@ static void emit_cfunc_invalidate(
 
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
-        jl_value_t *calltype, jl_value_t *rettype,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
         size_t nargs,
         jl_codegen_params_t &params)
 {
-    emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, nargs, params,
+    emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params,
         prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func));
 }
 
+#include <iostream>
 static Function* gen_cfun_wrapper(
     Module *into, jl_codegen_params_t &params,
     const function_sig_t &sig, jl_value_t *ff, const char *aliasname,
@@ -5790,9 +5916,15 @@ static Function* gen_cfun_wrapper(
     if (lam && params.cache) {
         // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
         codeinst = jl_compile_method_internal(lam, world);
-        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
         auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
         assert(invoke);
+        if (fptr) {
+            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                jl_cpu_pause();
+            }
+            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        }
         // WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
         if (invoke == jl_fptr_args_addr) {
             callptr = fptr;
@@ -5803,7 +5935,7 @@ static Function* gen_cfun_wrapper(
             callptr = (void*)codeinst->rettype_const;
             calltype = 2;
         }
-        else if (codeinst->isspecsig) {
+        else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
             callptr = fptr;
             calltype = 3;
         }
@@ -5834,13 +5966,8 @@ static Function* gen_cfun_wrapper(
         // we are adding the extra nest parameter after sret arg.
         std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
         newAttributes.reserve(attributes.getNumAttrSets() + 1);
-#if JL_LLVM_VERSION >= 140000
         auto it = *attributes.indexes().begin();
         const auto it_end = *attributes.indexes().end();
-#else
-        auto it = attributes.index_begin();
-        const auto it_end = attributes.index_end();
-#endif
 
         // Skip past FunctionIndex
         if (it == AttributeList::AttrIndex::FunctionIndex) {
@@ -5855,11 +5982,7 @@ static Function* gen_cfun_wrapper(
         }
 
         // Add the new nest attribute
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder attrBuilder(M->getContext());
-#else
-        AttrBuilder attrBuilder;
-#endif
         attrBuilder.addAttribute(Attribute::Nest);
         newAttributes.emplace_back(it, AttributeSet::get(M->getContext(), attrBuilder));
 
@@ -5888,7 +6011,7 @@ static Function* gen_cfun_wrapper(
     Function *cw = Function::Create(functype,
             GlobalVariable::ExternalLinkage,
             funcName, M);
-    jl_init_function(cw);
+    jl_init_function(cw, params.TargetTriple);
     cw->setAttributes(AttributeList::get(M->getContext(), {attributes, cw->getAttributes()}));
 
     jl_codectx_t ctx(M->getContext(), params);
@@ -5906,21 +6029,21 @@ static Function* gen_cfun_wrapper(
     Value *world_age_field = get_last_age_field(ctx);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
     Value *last_age = ai.decorateInst(
-            ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), world_age_field, Align(sizeof(size_t))));
+            ctx.builder.CreateAlignedLoad(ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
 
-    Value *world_v = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
-        prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
+    Value *world_v = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+        prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
     cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
 
     Value *age_ok = NULL;
     if (calltype) {
         LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
-                getSizeTy(ctx.builder.getContext()),
+                ctx.types().T_size,
                 ctx.builder.CreateConstInBoundsGEP1_32(
-                    getSizeTy(ctx.builder.getContext()),
-                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), getSizePtrTy(ctx.builder.getContext())),
-                    offsetof(jl_code_instance_t, max_world) / sizeof(size_t)),
-                Align(sizeof(size_t)));
+                    ctx.types().T_size,
+                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), ctx.types().T_size->getPointerTo()),
+                    offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr),
+                ctx.types().alignof_ptr);
         age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
     }
     ctx.builder.CreateStore(world_v, world_age_field);
@@ -5929,7 +6052,7 @@ static Function* gen_cfun_wrapper(
     Function::arg_iterator AI = cw->arg_begin();
     Value *sretPtr = sig.sret ? &*AI++ : NULL;
     Value *nestPtr = nest ? &*AI++ : NULL;
-    jl_cgval_t *inputargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * (nargs + 1));
+    SmallVector<jl_cgval_t> inputargs(nargs + 1);
     if (ff) {
         // we need to pass the function object even if (even though) it is a singleton
         inputargs[0] = mark_julia_const(ctx, ff);
@@ -6008,7 +6131,7 @@ static Function* gen_cfun_wrapper(
                 BasicBlock *unboxedBB = BasicBlock::Create(ctx.builder.getContext(), "maybe-unboxed", cw);
                 BasicBlock *isanyBB = BasicBlock::Create(ctx.builder.getContext(), "any", cw);
                 BasicBlock *afterBB = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
-                Value *isrtboxed = ctx.builder.CreateIsNull(val);
+                Value *isrtboxed = ctx.builder.CreateIsNull(val); // XXX: this is the wrong condition and should be inspecting runtime_dt intead
                 ctx.builder.CreateCondBr(isrtboxed, boxedBB, loadBB);
                 ctx.builder.SetInsertPoint(boxedBB);
                 Value *p1 = ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue);
@@ -6097,7 +6220,7 @@ static Function* gen_cfun_wrapper(
             if (!theFptr) {
                 theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage,
                                            fname, jl_Module);
-                jl_init_function(theFptr);
+                jl_init_function(theFptr, ctx.emission_context.TargetTriple);
                 addRetAttr(theFptr, Attribute::NonNull);
             }
             else {
@@ -6120,7 +6243,7 @@ static Function* gen_cfun_wrapper(
             ctx.builder.CreateBr(b_after);
             ctx.builder.SetInsertPoint(b_generic);
         }
-        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs, nargs + 1, julia_call);
+        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs.data(), nargs + 1, julia_call);
         if (age_ok) {
             ctx.builder.CreateBr(b_after);
             ctx.builder.SetInsertPoint(b_after);
@@ -6136,8 +6259,8 @@ static Function* gen_cfun_wrapper(
         assert(calltype == 3);
         // emit a specsig call
         StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-        jl_returninfo_t returninfo = get_specsig_function(ctx, M, protoname, lam->specTypes, astrt, is_opaque_closure);
-        FunctionType *cft = returninfo.decl->getFunctionType();
+        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure);
+        FunctionType *cft = returninfo.decl.getFunctionType();
         jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
 
         // TODO: Can use use emit_call_specfun_other here?
@@ -6150,7 +6273,7 @@ static Function* gen_cfun_wrapper(
             }
             else {
                 if (jlfunc_sret) {
-                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
                     assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
                 } else {
                     result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
@@ -6169,6 +6292,7 @@ static Function* gen_cfun_wrapper(
             Value *arg;
             jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
                 jl_nth_slot_type(lam->specTypes, i);
+            // n.b. specTypes is required to be a datatype by construction for specsig
             bool isboxed = deserves_argbox(spect);
             Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
             if (is_uniquerep_Type(spect)) {
@@ -6194,25 +6318,25 @@ static Function* gen_cfun_wrapper(
             // add to argument list
             args.push_back(arg);
         }
-        Value *theFptr = returninfo.decl;
+        Value *theFptr = returninfo.decl.getCallee();
         assert(theFptr);
         if (age_ok) {
             funcName += "_gfthunk";
-            Function *gf_thunk = Function::Create(returninfo.decl->getFunctionType(),
+            Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
                     GlobalVariable::InternalLinkage, funcName, M);
-            jl_init_function(gf_thunk);
-            gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.decl->getAttributes(), gf_thunk->getAttributes()}));
+            jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
+            gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()}));
             // build a  specsig -> jl_apply_generic converter thunk
             // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer),
             // but which has the signature of a specsig
-            emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, nargs + 1, ctx.emission_context);
+            emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context);
             theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
         }
-        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl->getFunctionType()));
+        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
         CallInst *call = ctx.builder.CreateCall(
-            cast<FunctionType>(returninfo.decl->getFunctionType()),
+            returninfo.decl.getFunctionType(),
             theFptr, ArrayRef<Value*>(args));
-        call->setAttributes(returninfo.decl->getAttributes());
+        call->setAttributes(returninfo.attrs);
         switch (returninfo.cc) {
             case jl_returninfo_t::Boxed:
                 retval = mark_julia_type(ctx, call, true, astrt);
@@ -6263,8 +6387,6 @@ static Function* gen_cfun_wrapper(
     }
     else if (!type_is_ghost(sig.lrt)) {
         Type *prt = sig.prt;
-        if (sig.sret)
-            prt = sig.fargt_sig[0]->getContainedType(0); // sret is a PointerType
         bool issigned = jl_signed_type && jl_subtype(declrt, (jl_value_t*)jl_signed_type);
         Value *v = emit_unbox(ctx, sig.lrt, retval, retval.typ);
         r = llvm_type_rewrite(ctx, v, prt, issigned);
@@ -6294,7 +6416,7 @@ static Function* gen_cfun_wrapper(
                 FunctionType::get(getInt8PtrTy(ctx.builder.getContext()), { getInt8PtrTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
                 GlobalVariable::ExternalLinkage,
                 funcName, M);
-        jl_init_function(cw_make);
+        jl_init_function(cw_make, ctx.emission_context.TargetTriple);
         BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw_make);
         IRBuilder<> cwbuilder(b0);
         Function::arg_iterator AI = cw_make->arg_begin();
@@ -6399,19 +6521,19 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         sigt = NULL;
     }
     else {
-        sigt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)sigt);
+        sigt = jl_apply_tuple_type((jl_svec_t*)sigt);
     }
     if (sigt && !(unionall_env && jl_has_typevar_from_unionall(rt, unionall_env))) {
         unionall_env = NULL;
     }
 
     bool nest = (!fexpr_rt.constant || unionall_env);
-#if defined(_CPU_AARCH64_) || defined(_CPU_ARM_) || defined(_CPU_PPC64_)
-    if (nest) {
-        emit_error(ctx, "cfunction: closures are not supported on this platform");
-        return jl_cgval_t();
+    if (ctx.emission_context.TargetTriple.isAArch64() || ctx.emission_context.TargetTriple.isARM() || ctx.emission_context.TargetTriple.isPPC64()) {
+        if (nest) {
+            emit_error(ctx, "cfunction: closures are not supported on this platform");
+            return jl_cgval_t();
+        }
     }
-#endif
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
@@ -6437,7 +6559,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
             }
             fill = (jl_svec_t*)jl_ensure_rooted(ctx, (jl_value_t*)fill);
         }
-        Type *T_htable = ArrayType::get(getSizeTy(ctx.builder.getContext()), sizeof(htable_t) / sizeof(void*));
+        Type *T_htable = ArrayType::get(ctx.types().T_size, sizeof(htable_t) / sizeof(void*));
         Value *cache = new GlobalVariable(*jl_Module, T_htable, false,
                                GlobalVariable::PrivateLinkage,
                                ConstantAggregateZero::get(T_htable));
@@ -6453,23 +6575,22 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         outboxed = true;
     }
     else {
-        F = ctx.builder.CreatePtrToInt(F, getSizeTy(ctx.builder.getContext()));
+        F = ctx.builder.CreatePtrToInt(F, ctx.types().T_size);
         outboxed = (output_type != (jl_value_t*)jl_voidpointer_type);
         if (outboxed) {
             assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
-            Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type),
-                                         literal_pointer_val(ctx, (jl_value_t*)output_type));
-            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), getSizePtrTy(ctx.builder.getContext()));
+            Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type);
+            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo());
             MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.decorateInst(ctx.builder.CreateStore(F, derived_strct));
             ai.decorateInst(ctx.builder.CreateStore(
-                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), getSizeTy(ctx.builder.getContext())),
-                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 1)));
-            ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(getSizeTy(ctx.builder.getContext())),
-                    ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 2)));
-            ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(getSizeTy(ctx.builder.getContext())),
-                    ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 3)));
+                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), ctx.types().T_size),
+                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 1)));
+            ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(ctx.types().T_size),
+                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 2)));
+            ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(ctx.types().T_size),
+                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 3)));
             F = strct;
         }
     }
@@ -6536,7 +6657,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
 {
     ++GeneratedInvokeWrappers;
     Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M);
-    jl_init_function(w);
+    jl_init_function(w, params.TargetTriple);
     w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()}));
     Function::arg_iterator AI = w->arg_begin();
     Value *funcArg = &*AI++;
@@ -6558,9 +6679,9 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     allocate_gc_frame(ctx, b0);
 
     // TODO: replace this with emit_call_specfun_other?
-    FunctionType *ftype = f.decl->getFunctionType();
+    FunctionType *ftype = const_cast<llvm::FunctionCallee&>(f.decl).getFunctionType();
     size_t nfargs = ftype->getNumParams();
-    Value **args = (Value**) alloca(nfargs * sizeof(Value*));
+    SmallVector<Value *> args(nfargs);
     unsigned idx = 0;
     AllocaInst *result = NULL;
     switch (f.cc) {
@@ -6569,8 +6690,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
-        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()));
+        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType());
         args[idx] = result;
         idx++;
         break;
@@ -6592,13 +6713,19 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
         jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
             jl_nth_slot_type(lam->specTypes, i);
+        // n.b. specTypes is required to be a datatype by construction for specsig
         bool isboxed = deserves_argbox(ty);
         Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
         if (type_is_ghost(lty) || is_uniquerep_Type(ty))
             continue;
         Value *theArg;
         if (i == 0) {
-            theArg = funcArg;
+            // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers
+            // come in as ::Tracked, but specsig expected ::Derived.
+            if (is_opaque_closure)
+                theArg = decay_derived(ctx, funcArg);
+            else
+                theArg = funcArg;
         }
         else {
             Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
@@ -6617,8 +6744,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
         args[idx] = theArg;
         idx++;
     }
-    CallInst *call = ctx.builder.CreateCall(f.decl, ArrayRef<Value*>(&args[0], nfargs));
-    call->setAttributes(f.decl->getAttributes());
+    CallInst *call = ctx.builder.CreateCall(f.decl, args);
+    call->setAttributes(f.attrs);
 
     jl_cgval_t retval;
     if (retarg != -1) {
@@ -6661,7 +6788,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     return w;
 }
 
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure)
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure)
 {
     jl_returninfo_t props = {};
     SmallVector<Type*, 8> fsig;
@@ -6720,11 +6847,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     SmallVector<AttributeSet, 8> attrs; // function declaration attributes
     if (props.cc == jl_returninfo_t::SRet) {
         assert(srt);
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         param.addStructRetAttr(srt);
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
@@ -6733,11 +6856,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
         assert(fsig.size() == 1);
     }
     if (props.cc == jl_returninfo_t::Union) {
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
@@ -6746,11 +6865,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     }
 
     if (props.return_roots) {
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
@@ -6760,20 +6875,21 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
 
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *jt = jl_tparam(sig, i);
+        bool isboxed = false;
+        Type *ty = NULL;
         if (i == 0 && is_opaque_closure) {
-            jt = (jl_value_t*)jl_any_type;
+            ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+            isboxed = true; // true-ish anyway - we might not have the type tag
+        }
+        else {
+            if (is_uniquerep_Type(jt))
+                continue;
+            isboxed = deserves_argbox(jt);
+            ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         }
-        if (is_uniquerep_Type(jt))
-            continue;
-        bool isboxed = deserves_argbox(jt);
-        Type *ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         if (type_is_ghost(ty))
             continue;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         if (ty->isAggregateType()) { // aggregate types are passed by pointer
             param.addAttribute(Attribute::NoCapture);
             param.addAttribute(Attribute::ReadOnly);
@@ -6798,17 +6914,29 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     else if (rt == ctx.types().T_prjlvalue)
         RetAttrs = RetAttrs.addAttribute(ctx.builder.getContext(), Attribute::NonNull);
     AttributeList attributes = AttributeList::get(ctx.builder.getContext(), FnAttrs, RetAttrs, attrs);
+
     FunctionType *ftype = FunctionType::get(rt, fsig, false);
-    Function *f = M ? cast_or_null<Function>(M->getNamedValue(name)) : NULL;
-    if (f == NULL) {
-        f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M);
-        jl_init_function(f);
-        f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+    if (fval == NULL) {
+        Function *f = M ? cast_or_null<Function>(M->getNamedValue(name)) : NULL;
+        if (f == NULL) {
+            f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M);
+            jl_init_function(f, ctx.emission_context.TargetTriple);
+            f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+        }
+        else {
+            assert(f->getFunctionType() == ftype);
+        }
+        fval = f;
     }
     else {
-        assert(f->getFunctionType() == ftype);
+        if (fval->getType()->isIntegerTy())
+            fval = emit_inttoptr(ctx, fval, ftype->getPointerTo());
+        else
+            fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
     }
-    props.decl = f;
+
+    props.decl = FunctionCallee(ftype, fval);
+    props.attrs = attributes;
     return props;
 }
 
@@ -6841,6 +6969,7 @@ static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
     JL_GC_PUSH1(&tupargs);
     for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
         jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+        // n.b. specTypes is required to be a datatype by construction for specsig
         if (is_uniquerep_Type(argType))
             argType = jl_typeof(jl_tparam0(argType));
         else if (jl_has_intersect_type_not_kind(argType)) {
@@ -6849,11 +6978,29 @@ static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
         }
         jl_svecset(tupargs, i-nreq, argType);
     }
-    jl_datatype_t *typ = jl_apply_tuple_type(tupargs);
+    jl_value_t *typ = jl_apply_tuple_type(tupargs);
     JL_GC_POP();
-    return typ;
+    return (jl_datatype_t*)typ;
 }
 
+static std::string get_function_name(bool specsig, bool needsparams, const char *unadorned_name, const Triple &TargetTriple)
+{
+    std::string _funcName;
+    raw_string_ostream funcName(_funcName);
+    // try to avoid conflicts in the global symbol table
+    if (specsig)
+        funcName << "julia_"; // api 5
+    else if (needsparams)
+        funcName << "japi3_";
+    else
+        funcName << "japi1_";
+    if (TargetTriple.isOSLinux()) {
+        if (unadorned_name[0] == '@')
+            unadorned_name++;
+    }
+    funcName << unadorned_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    return funcName.str();
+}
 
 // Compile to LLVM IR, using a specialized signature if applicable.
 static jl_llvm_functions_t
@@ -6961,6 +7108,8 @@ static jl_llvm_functions_t
         if (argname == jl_unused_sym)
             continue;
         jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
+        // TODO: jl_nth_slot_type should call jl_rewrap_unionall
+        //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
         // OpaqueClosure implicitly loads the env
         if (i == 0 && ctx.is_opaque_closure) {
             if (jl_is_array(src->slottypes)) {
@@ -7000,22 +7149,8 @@ static jl_llvm_functions_t
     if (!specsig)
         ctx.nReqArgs--;  // function not part of argArray in jlcall
 
-    std::string _funcName;
-    raw_string_ostream funcName(_funcName);
-    // try to avoid conflicts in the global symbol table
-    if (specsig)
-        funcName << "julia_"; // api 5
-    else if (needsparams)
-        funcName << "japi3_";
-    else
-        funcName << "japi1_";
-    const char* unadorned_name = ctx.name;
-#if defined(_OS_LINUX_)
-    if (unadorned_name[0] == '@')
-        unadorned_name++;
-#endif
-    funcName << unadorned_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
-    declarations.specFunctionObject = funcName.str();
+    std::string _funcName = get_function_name(specsig, needsparams, ctx.name, ctx.emission_context.TargetTriple);
+    declarations.specFunctionObject = _funcName;
 
     // allocate Function declarations and wrapper objects
     //Safe because params holds ctx lock
@@ -7026,10 +7161,10 @@ static jl_llvm_functions_t
     Function *f = NULL;
     bool has_sret = false;
     if (specsig) { // assumes !va and !needsparams
-        returninfo = get_specsig_function(ctx, M, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure);
-        f = returninfo.decl;
+        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure);
+        f = cast<Function>(returninfo.decl.getCallee());
         has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
-        jl_init_function(f);
+        jl_init_function(f, ctx.emission_context.TargetTriple);
 
         // common pattern: see if all return statements are an argument in that
         // case the apply-generic call can re-use the original box for the return
@@ -7056,7 +7191,7 @@ static jl_llvm_functions_t
         }();
 
         std::string wrapName;
-        raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+        raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
         declarations.functionObject = wrapName;
         (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
         // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType)
@@ -7067,22 +7202,14 @@ static jl_llvm_functions_t
         f = Function::Create(needsparams ? ctx.types().T_jlfuncparams : ctx.types().T_jlfunc,
                              GlobalVariable::ExternalLinkage,
                              declarations.specFunctionObject, M);
-        jl_init_function(f);
+        jl_init_function(f, ctx.emission_context.TargetTriple);
         f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()}));
         returninfo.decl = f;
         declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args";
     }
 
-#if JL_LLVM_VERSION >= 140000
     AttrBuilder FnAttrs(ctx.builder.getContext(), f->getAttributes().getFnAttrs());
-#else
-    AttrBuilder FnAttrs(f->getAttributes().getFnAttributes());
-#endif
-#if JL_LLVM_VERSION >= 140000
     AttrBuilder RetAttrs(ctx.builder.getContext(), f->getAttributes().getRetAttrs());
-#else
-    AttrBuilder RetAttrs(f->getAttributes().getRetAttributes());
-#endif
 
     if (jlrettype == (jl_value_t*)jl_bottom_type)
         FnAttrs.addAttribute(Attribute::NoReturn);
@@ -7115,47 +7242,26 @@ static jl_llvm_functions_t
     ctx.f = f;
 
     // Step 4b. determine debug info signature and other type info for locals
-    DIBuilder dbuilder(*M);
+    DICompileUnit::DebugEmissionKind emissionKind = (DICompileUnit::DebugEmissionKind) ctx.params->debug_info_kind;
+    DICompileUnit::DebugNameTableKind tableKind;
+    if (JL_FEAT_TEST(ctx, gnu_pubnames))
+        tableKind = DICompileUnit::DebugNameTableKind::GNU;
+    else
+        tableKind = DICompileUnit::DebugNameTableKind::None;
+    DIBuilder dbuilder(*M, true, ctx.debug_enabled ? getOrCreateJuliaCU(*M, emissionKind, tableKind) : NULL);
     DIFile *topfile = NULL;
     DISubprogram *SP = NULL;
     DebugLoc noDbg, topdebugloc;
     if (ctx.debug_enabled) {
-        DICompileUnit::DebugEmissionKind emissionKind = (DICompileUnit::DebugEmissionKind) ctx.params->debug_info_kind;
-        DICompileUnit::DebugNameTableKind tableKind;
-
-        if (JL_FEAT_TEST(ctx, gnu_pubnames)) {
-            tableKind = DICompileUnit::DebugNameTableKind::GNU;
-        }
-        else {
-            tableKind = DICompileUnit::DebugNameTableKind::None;
-        }
         topfile = dbuilder.createFile(ctx.file, ".");
-        DICompileUnit *CU =
-            dbuilder.createCompileUnit(llvm::dwarf::DW_LANG_Julia
-                                       ,topfile      // File
-                                       ,"julia"      // Producer
-                                       ,true         // isOptimized
-                                       ,""           // Flags
-                                       ,0            // RuntimeVersion
-                                       ,""           // SplitName
-                                       ,emissionKind // Kind
-                                       ,0            // DWOId
-                                       ,true         // SplitDebugInlining
-                                       ,false        // DebugInfoForProfiling
-                                       ,tableKind    // NameTableKind
-                                       );
-
         DISubroutineType *subrty;
-        if (jl_options.debug_level <= 1) {
+        if (jl_options.debug_level <= 1)
             subrty = debuginfo.jl_di_func_null_sig;
-        }
-        else if (!specsig) {
+        else if (!specsig)
             subrty = debuginfo.jl_di_func_sig;
-        }
-        else {
+        else
             subrty = get_specsig_di(ctx, debuginfo, jlrettype, lam->specTypes, dbuilder);
-        }
-        SP = dbuilder.createFunction(CU
+        SP = dbuilder.createFunction(nullptr
                                      ,dbgFuncName      // Name
                                      ,f->getName()     // LinkageName
                                      ,topfile          // File
@@ -7248,7 +7354,7 @@ static jl_llvm_functions_t
     if (toplevel || ctx.is_opaque_closure) {
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
         last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-            getSizeTy(ctx.builder.getContext()), world_age_field, Align(sizeof(size_t))));
+            ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
     }
 
     // step 7. allocate local variables slots
@@ -7396,11 +7502,7 @@ static jl_llvm_functions_t
         }
         Argument *Arg = &*AI;
         ++AI;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-#else
-        AttrBuilder param(f->getAttributes().getParamAttributes(Arg->getArgNo()));
-#endif
         jl_cgval_t theArg;
         if (llvmArgType->isAggregateType()) {
             maybe_mark_argument_dereferenceable(param, argType);
@@ -7420,11 +7522,7 @@ static jl_llvm_functions_t
     if (has_sret) {
         Argument *Arg = &*AI;
         ++AI;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-#else
-        AttrBuilder param(f->getAttributes().getParamAttributes(Arg->getArgNo()));
-#endif
         if (returninfo.cc == jl_returninfo_t::Union) {
             param.addAttribute(Attribute::NonNull);
             // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
@@ -7446,11 +7544,7 @@ static jl_llvm_functions_t
     if (returninfo.return_roots) {
         Argument *Arg = &*AI;
         ++AI;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-#else
-        AttrBuilder param(f->getAttributes().getParamAttributes(Arg->getArgNo()));
-#endif
         param.addAttribute(Attribute::NonNull);
         // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
         size_t size = returninfo.return_roots * sizeof(jl_value_t*);
@@ -7460,10 +7554,19 @@ static jl_llvm_functions_t
     }
     for (i = 0; i < nreq; i++) {
         jl_sym_t *s = slot_symbol(ctx, i);
-        jl_value_t *argType = (i == 0 && ctx.is_opaque_closure) ? (jl_value_t*)jl_any_type :
-            jl_nth_slot_type(lam->specTypes, i);
+        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+        // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
+        //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
         bool isboxed = deserves_argbox(argType);
-        Type *llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+        Type *llvmArgType = NULL;
+        if (i == 0 && ctx.is_opaque_closure) {
+            isboxed = true;
+            llvmArgType = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+            argType = (jl_value_t*)jl_any_type;
+        }
+        else {
+            llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+        }
         if (s == jl_unused_sym) {
             if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
                 ++AI;
@@ -7477,14 +7580,36 @@ static jl_llvm_functions_t
                 ++AI;
         }
         else {
-            if (specsig) {
+            // If this is an opaque closure, implicitly load the env and switch
+            // the world age.
+            if (i == 0 && ctx.is_opaque_closure) {
+                // Load closure world
+                Value *oc_this = decay_derived(ctx, &*AI++);
+                Value *argaddr = emit_bitcast(ctx, oc_this, getInt8PtrTy(ctx.builder.getContext()));
+                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
+                        getInt8Ty(ctx.builder.getContext()), argaddr,
+                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world)));
+
+                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
+                    nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
+                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr.value());
+
+                // Load closure env
+                Value *envaddr = ctx.builder.CreateInBoundsGEP(
+                        getInt8Ty(ctx.builder.getContext()), argaddr,
+                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures)));
+
+                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
+                    nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
+                theArg = update_julia_type(ctx, closure_env, vi.value.typ);
+            }
+            else if (specsig) {
                 theArg = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             else {
                 if (i == 0) {
                     // first (function) arg is separate in jlcall
-                    theArg = mark_julia_type(ctx, fArg, true, ctx.is_opaque_closure ?
-                        argType : vi.value.typ);
+                    theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
                 }
                 else {
                     Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
@@ -7507,28 +7632,6 @@ static jl_llvm_functions_t
                 }
             }
 
-            // If this is an opaque closure, implicitly load the env and switch
-            // the world age.
-            if (i == 0 && ctx.is_opaque_closure) {
-                // Load closure world
-                Value *argaddr = emit_bitcast(ctx, data_pointer(ctx, theArg), getInt8PtrTy(ctx.builder.getContext()));
-                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_opaque_closure_t, world)));
-
-                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
-                    theArg.tbaa, nullptr, false, AtomicOrdering::NotAtomic, false, sizeof(size_t));
-                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, sizeof(size_t));
-
-                // Load closure env
-                Value *envaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_opaque_closure_t, captures)));
-
-                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
-                    theArg.tbaa, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
-                theArg = convert_julia_type(ctx, closure_env, vi.value.typ);
-            }
 
             if (vi.boxroot == NULL) {
                 assert(vi.value.V == NULL && "unexpected variable slot created for argument");
@@ -7567,20 +7670,21 @@ static jl_llvm_functions_t
         }
         else if (specsig) {
             ctx.nvargs = jl_nparams(lam->specTypes) - nreq;
-            jl_cgval_t *vargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * ctx.nvargs);
+            SmallVector<jl_cgval_t> vargs(ctx.nvargs);
             for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
                 jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+                // n.b. specTypes is required to be a datatype by construction for specsig
                 bool isboxed = deserves_argbox(argType);
                 Type *llvmArgType = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
                 vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             if (jl_is_concrete_type(vi.value.typ)) {
-                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs);
+                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs.data());
                 emit_varinfo_assign(ctx, vi, tuple);
             }
             else {
                 restTuple = emit_jlcall(ctx, jltuple_func, Constant::getNullValue(ctx.types().T_prjlvalue),
-                    vargs, ctx.nvargs, julia_call);
+                    vargs.data(), ctx.nvargs, julia_call);
                 jl_cgval_t tuple = mark_julia_type(ctx, restTuple, true, vi.value.typ);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
@@ -7592,7 +7696,7 @@ static jl_llvm_functions_t
                 ctx.builder.CreateCall(F,
                         { Constant::getNullValue(ctx.types().T_prjlvalue),
                           ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
-                                  ConstantInt::get(getSizeTy(ctx.builder.getContext()), nreq - 1)),
+                                  ConstantInt::get(ctx.types().T_size, nreq - 1)),
                           ctx.builder.CreateSub(argCount,
                                   ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
@@ -7644,7 +7748,7 @@ static jl_llvm_functions_t
             // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int32, inlined_at::Int32)
             jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, i);
             DebugLineTable &info = linetable[i + 1];
-            assert(jl_typeis(locinfo, jl_lineinfonode_type));
+            assert(jl_typetagis(locinfo, jl_lineinfonode_type));
             jl_module_t *module = (jl_module_t*)jl_fieldref_noalloc(locinfo, 0);
             jl_value_t *method = jl_fieldref_noalloc(locinfo, 1);
             jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 2);
@@ -7732,11 +7836,19 @@ static jl_llvm_functions_t
 
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
-    // step 11a. Emit the entry safepoint
+    // step 11a. For top-level code, load the world age
+    if (toplevel && !ctx.is_opaque_closure) {
+        LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
+        world->setOrdering(AtomicOrdering::Acquire);
+        ctx.builder.CreateAlignedStore(world, world_age_field, ctx.types().alignof_ptr);
+    }
+
+    // step 11b. Emit the entry safepoint
     if (JL_FEAT_TEST(ctx, safepoint_on_entry))
-        emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
+        emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
 
-    // step 11b. Do codegen in control flow order
+    // step 11c. Do codegen in control flow order
     std::vector<int> workstack;
     std::map<int, BasicBlock*> BB;
     std::map<size_t, BasicBlock*> come_from_bb;
@@ -7909,6 +8021,11 @@ static jl_llvm_functions_t
             // this is basically a copy of emit_assignment,
             // but where the assignment slot is the retval
             jl_cgval_t retvalinfo = emit_expr(ctx, retexpr);
+
+            if (ctx.is_opaque_closure) {
+                emit_typecheck(ctx, retvalinfo, jlrettype, "OpaqueClosure");
+            }
+
             retvalinfo = convert_julia_type(ctx, retvalinfo, jlrettype);
             if (retvalinfo.typ == jl_bottom_type) {
                 CreateTrap(ctx.builder, false);
@@ -8059,13 +8176,6 @@ static jl_llvm_functions_t
             ctx.builder.SetInsertPoint(tryblk);
         }
         else {
-            if (!jl_is_method(ctx.linfo->def.method) && !ctx.is_opaque_closure) {
-                // TODO: inference is invalid if this has any effect (which it often does)
-                LoadInst *world = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
-                    prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
-                world->setOrdering(AtomicOrdering::Acquire);
-                ctx.builder.CreateAlignedStore(world, world_age_field, Align(sizeof(size_t)));
-            }
             emit_stmtpos(ctx, stmt, cursor);
             mallocVisitStmt(debuginfoloc, nullptr);
         }
@@ -8291,12 +8401,12 @@ static jl_llvm_functions_t
     }
 
     // step 12. Perform any delayed instantiations
-    if (ctx.debug_enabled) {
-        bool in_prologue = true;
-        for (auto &BB : *ctx.f) {
-            for (auto &I : BB) {
-                CallBase *call = dyn_cast<CallBase>(&I);
-                if (call && !I.getDebugLoc()) {
+    bool in_prologue = true;
+    for (auto &BB : *ctx.f) {
+        for (auto &I : BB) {
+            CallBase *call = dyn_cast<CallBase>(&I);
+            if (call) {
+                if (ctx.debug_enabled && !I.getDebugLoc()) {
                     // LLVM Verifier: inlinable function call in a function with debug info must have a !dbg location
                     // make sure that anything we attempt to call has some inlining info, just in case optimization messed up
                     // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
@@ -8305,12 +8415,24 @@ static jl_llvm_functions_t
                         I.setDebugLoc(topdebugloc);
                     }
                 }
-                if (&I == &prologue_end)
-                    in_prologue = false;
+                if (toplevel && !ctx.is_opaque_closure && !in_prologue) {
+                    // we're at toplevel; insert an atomic barrier between every instruction
+                    // TODO: inference is invalid if this has any effect (which it often does)
+                    LoadInst *world = new LoadInst(ctx.types().T_size,
+                        prepare_global_in(jl_Module, jlgetworld_global), Twine(),
+                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
+                    world->setOrdering(AtomicOrdering::Acquire);
+                    StoreInst *store_world = new StoreInst(world, world_age_field,
+                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
+                    (void)store_world;
+                }
             }
+            if (&I == &prologue_end)
+                in_prologue = false;
         }
-        dbuilder.finalize();
     }
+    if (ctx.debug_enabled)
+        dbuilder.finalize();
 
     if (ctx.vaSlot > 0) {
         // remove VA allocation if we never referenced it
@@ -8394,7 +8516,8 @@ jl_llvm_functions_t jl_emit_code(
         jl_value_t *jlrettype,
         jl_codegen_params_t &params)
 {
-    JL_TIMING(CODEGEN);
+    JL_TIMING(CODEGEN, CODEGEN_LLVM);
+    jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_CURRENT_BLOCK);
     // caller must hold codegen_lock
     jl_llvm_functions_t decls = {};
     assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
@@ -8417,11 +8540,11 @@ jl_llvm_functions_t jl_emit_code(
     JL_CATCH {
         // Something failed! This is very, very bad.
         // Try to pretend that it isn't and attempt to recover.
-        const char *mname = m.getModuleUnlocked()->getModuleIdentifier().data();
+        std::string mname = m.getModuleUnlocked()->getModuleIdentifier();
         m = orc::ThreadSafeModule();
         decls.functionObject = "";
         decls.specFunctionObject = "";
-        jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname);
+        jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname.c_str());
         jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
@@ -8430,19 +8553,45 @@ jl_llvm_functions_t jl_emit_code(
     return decls;
 }
 
+static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t &params, jl_method_instance_t *mi, jl_value_t *rettype)
+{
+    Module *M = m.getModuleUnlocked();
+    jl_codectx_t ctx(M->getContext(), params);
+    ctx.name = M->getModuleIdentifier().data();
+    std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
+    jl_llvm_functions_t declarations;
+    declarations.functionObject = "jl_f_opaque_closure_call";
+    if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
+        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, 1);
+        Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
+        jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
+        size_t nrealargs = jl_nparams(mi->specTypes);
+        emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context);
+        declarations.specFunctionObject = funcName;
+    }
+    return declarations;
+}
+
 jl_llvm_functions_t jl_emit_codeinst(
         orc::ThreadSafeModule &m,
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
         jl_codegen_params_t &params)
 {
-    JL_TIMING(CODEGEN);
+    JL_TIMING(CODEGEN, CODEGEN_Codeinst);
+    jl_timing_show_method_instance(codeinst->def, JL_TIMING_CURRENT_BLOCK);
     JL_GC_PUSH1(&src);
     if (!src) {
         src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
         jl_method_t *def = codeinst->def->def.method;
+        // Check if this is the generic method for opaque closure wrappers -
+        // if so, generate the specsig -> invoke converter.
+        if (def == jl_opaque_closure_method) {
+            JL_GC_POP();
+            return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype);
+        }
         if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def))
-            src = jl_uncompress_ir(def, codeinst, (jl_array_t*)src);
+            src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src);
         if (!src || !jl_is_code_info(src)) {
             JL_GC_POP();
             m = orc::ThreadSafeModule();
@@ -8471,7 +8620,7 @@ jl_llvm_functions_t jl_emit_codeinst(
         }
 
         if (params.world) {// don't alter `inferred` when the code is not directly being used
-            auto inferred = jl_atomic_load_relaxed(&codeinst->inferred);
+            jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
             // don't change inferred state
             if (inferred) {
                 jl_method_t *def = codeinst->def->def.method;
@@ -8483,8 +8632,8 @@ jl_llvm_functions_t jl_emit_codeinst(
                     if (inferred != (jl_value_t*)src) {
                         if (jl_is_method(def)) {
                             src = (jl_code_info_t*)jl_compress_ir(def, src);
-                            assert(jl_typeis(src, jl_array_uint8_type));
-                            codeinst->relocatability = ((uint8_t*)jl_array_data(src))[jl_array_len(src)-1];
+                            assert(jl_is_string(src));
+                            codeinst->relocatability = jl_string_data(src)[jl_string_len(src)-1];
                         }
                         jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src);
                         jl_gc_wb(codeinst, src);
@@ -8495,7 +8644,7 @@ jl_llvm_functions_t jl_emit_codeinst(
                             inferred != jl_nothing &&
                             // don't delete inlineable code, unless it is constant
                             (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr ||
-                                (jl_ir_inlining_cost((jl_array_t*)inferred) == UINT16_MAX)) &&
+                                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) &&
                             // don't delete code when generating a precompile file
                             !(params.imaging || jl_options.incremental)) {
                         // if not inlineable, code won't be needed again
@@ -8515,7 +8664,7 @@ void jl_compile_workqueue(
     Module &original,
     jl_codegen_params_t &params, CompilationPolicy policy)
 {
-    JL_TIMING(CODEGEN);
+    JL_TIMING(CODEGEN, CODEGEN_Workqueue);
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
     while (!params.workqueue.empty()) {
@@ -8533,18 +8682,22 @@ void jl_compile_workqueue(
             "invalid world for code-instance");
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
         bool cache_valid = params.cache;
-        if (params.external_linkage) {
-            cache_valid = jl_object_in_image((jl_value_t*)codeinst);
-        }
         // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
         if (cache_valid && invoke != NULL) {
             auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+            if (fptr) {
+                while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                    jl_cpu_pause();
+                }
+                // in case we are racing with another thread that is emitting this function
+                invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            }
             if (invoke == jl_fptr_args_addr) {
                 preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
             }
-            else if (codeinst->isspecsig) {
+            else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
                 preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                 preal_specsig = true;
             }
@@ -8603,10 +8756,10 @@ void jl_compile_workqueue(
                 Function *preal = emit_tojlinvoke(codeinst, mod, params);
                 protodecl->setLinkage(GlobalVariable::InternalLinkage);
                 //protodecl->setAlwaysInline();
-                jl_init_function(protodecl);
+                jl_init_function(protodecl, params.TargetTriple);
                 size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
                 // TODO: maybe this can be cached in codeinst->specfptr?
-                emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, nrealargs, params, preal);
+                emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal);
                 preal_decl = ""; // no need to fixup the name
             }
             else {
@@ -8650,21 +8803,76 @@ static JuliaVariable *julia_const_gv(jl_value_t *val)
     return nullptr;
 }
 
+// Handle FLOAT16 ABI v2
+#if JULIA_FLOAT16_ABI == 2
+static void makeCastCall(Module &M, StringRef wrapperName, StringRef calledName, FunctionType *FTwrapper, FunctionType *FTcalled, bool external)
+{
+    Function *calledFun = M.getFunction(calledName);
+    if (!calledFun) {
+        calledFun = Function::Create(FTcalled, Function::ExternalLinkage, calledName, M);
+    }
+    auto linkage = external ? Function::ExternalLinkage : Function::InternalLinkage;
+    auto wrapperFun = Function::Create(FTwrapper, linkage, wrapperName, M);
+    wrapperFun->addFnAttr(Attribute::AlwaysInline);
+    llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", wrapperFun));
+    SmallVector<Value *, 4> CallArgs;
+    if (wrapperFun->arg_size() != calledFun->arg_size()){
+        llvm::errs() << "FATAL ERROR: Can't match wrapper to called function";
+        abort();
+    }
+    for (auto wrapperArg = wrapperFun->arg_begin(), calledArg = calledFun->arg_begin();
+            wrapperArg != wrapperFun->arg_end() && calledArg != calledFun->arg_end(); ++wrapperArg, ++calledArg)
+    {
+        CallArgs.push_back(builder.CreateBitCast(wrapperArg, calledArg->getType()));
+    }
+    auto val = builder.CreateCall(calledFun, CallArgs);
+    auto retval = builder.CreateBitCast(val,wrapperFun->getReturnType());
+    builder.CreateRet(retval);
+}
+
+void emitFloat16Wrappers(Module &M, bool external)
+{
+    auto &ctx = M.getContext();
+    makeCastCall(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee", FunctionType::get(Type::getFloatTy(ctx), { Type::getHalfTy(ctx) }, false),
+                FunctionType::get(Type::getFloatTy(ctx), { Type::getInt16Ty(ctx) }, false), external);
+    makeCastCall(M, "__extendhfsf2", "julia__gnu_h2f_ieee", FunctionType::get(Type::getFloatTy(ctx), { Type::getHalfTy(ctx) }, false),
+                FunctionType::get(Type::getFloatTy(ctx), { Type::getInt16Ty(ctx) }, false), external);
+    makeCastCall(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee", FunctionType::get(Type::getHalfTy(ctx), { Type::getFloatTy(ctx) }, false),
+                FunctionType::get(Type::getInt16Ty(ctx), { Type::getFloatTy(ctx) }, false), external);
+    makeCastCall(M, "__truncsfhf2", "julia__gnu_f2h_ieee", FunctionType::get(Type::getHalfTy(ctx), { Type::getFloatTy(ctx) }, false),
+                FunctionType::get(Type::getInt16Ty(ctx), { Type::getFloatTy(ctx) }, false), external);
+    makeCastCall(M, "__truncdfhf2", "julia__truncdfhf2", FunctionType::get(Type::getHalfTy(ctx), { Type::getDoubleTy(ctx) }, false),
+                FunctionType::get(Type::getInt16Ty(ctx), { Type::getDoubleTy(ctx) }, false), external);
+}
+
+static void init_f16_funcs(void)
+{
+    auto ctx = jl_ExecutionEngine->acquireContext();
+    auto TSM =  jl_create_ts_module("F16Wrappers", ctx, imaging_default());
+    auto aliasM = TSM.getModuleUnlocked();
+    emitFloat16Wrappers(*aliasM, true);
+    jl_ExecutionEngine->addModule(std::move(TSM));
+}
+#endif
+
 static void init_jit_functions(void)
 {
+    add_named_global(jlsmall_typeof_var, &small_typeof);
     add_named_global(jlstack_chk_guard_var, &__stack_chk_guard);
     add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle);
-#ifdef _OS_WINDOWS_
     add_named_global(jlexe_var, &jl_exe_handle);
     add_named_global(jldll_var, &jl_libjulia_handle);
     add_named_global(jldlli_var, &jl_libjulia_internal_handle);
-#endif
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, get_pjlvalue}, &jl_true);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, get_pjlvalue}, &jl_false);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptysvec", true, get_pjlvalue}, (jl_value_t**)&jl_emptysvec);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptytuple", true, get_pjlvalue}, &jl_emptytuple);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_diverror_exception", true, get_pjlvalue}, &jl_diverror_exception);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, get_pjlvalue}, &jl_undefref_exception);
+    auto size2pjlvalue = [](Type *T_size) -> Type * {
+        return get_pjlvalue(T_size->getContext());
+    };
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, size2pjlvalue}, &jl_true);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, size2pjlvalue}, &jl_false);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_nothing", true, size2pjlvalue}, &jl_nothing);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptysvec", true, size2pjlvalue}, (jl_value_t**)&jl_emptysvec);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptytuple", true, size2pjlvalue}, &jl_emptytuple);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_diverror_exception", true, size2pjlvalue}, &jl_diverror_exception);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, size2pjlvalue}, &jl_undefref_exception);
     add_named_global(jlgetworld_global, &jl_world_counter);
     add_named_global("__stack_chk_fail", &__stack_chk_fail);
     add_named_global(jlpgcstack_func, (void*)NULL);
@@ -8711,7 +8919,6 @@ static void init_jit_functions(void)
     add_named_global(jl_loopinfo_marker_func, (void*)NULL);
     add_named_global(jl_typeof_func, (void*)NULL);
     add_named_global(jl_write_barrier_func, (void*)NULL);
-    add_named_global(jl_write_barrier_binding_func, (void*)NULL);
     add_named_global(jldlsym_func, &jl_load_and_lookup);
     add_named_global("jl_adopt_thread", &jl_adopt_thread);
     add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
@@ -8804,12 +9011,29 @@ extern "C" void jl_init_llvm(void)
     clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
     if (clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "0", 1);
+#ifdef JL_USE_NEW_PM
+    // For parity with LoopUnswitch
+    clopt = llvmopts.lookup("unswitch-threshold");
+    if (clopt->getNumOccurrences() == 0)
+        cl::ProvidePositionalOption(clopt, "100", 1);
+#endif
     // if the patch adding this option has been applied, lower its limit to provide
     // better DAGCombiner performance.
     clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
     if (clopt && clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "4", 1);
 
+#if JL_LLVM_VERSION >= 150000
+    clopt = llvmopts.lookup("opaque-pointers");
+    if (clopt && clopt->getNumOccurrences() == 0) {
+#ifdef JL_LLVM_OPAQUE_POINTERS
+        cl::ProvidePositionalOption(clopt, "true", 1);
+#else
+        cl::ProvidePositionalOption(clopt, "false", 1);
+#endif
+    }
+#endif
+
     jl_ExecutionEngine = new JuliaOJIT();
 
     bool jl_using_gdb_jitevents = false;
@@ -8872,17 +9096,22 @@ extern "C" void jl_init_llvm(void)
     cl::PrintOptionValues();
 }
 
-extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
+extern "C" JL_DLLEXPORT_CODEGEN void jl_init_codegen_impl(void)
 {
     jl_init_llvm();
     // Now that the execution engine exists, initialize all modules
     init_jit_functions();
+#if JULIA_FLOAT16_ABI == 2
+    init_f16_funcs();
+#endif
 }
 
-extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT_CODEGEN void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
 {
     // output LLVM timings and statistics
-    reportAndResetTimings();
+    // Guard against exits before we have initialized the ExecutionEngine
+    if (jl_ExecutionEngine)
+        jl_ExecutionEngine->printTimers();
     PrintStatistics();
 }
 
@@ -8954,17 +9183,21 @@ extern void jl_write_bitcode_module(void *M, char *fname) {
 
 #include <llvm-c/Core.h>
 
-extern "C" JL_DLLEXPORT jl_value_t *jl_get_libllvm_impl(void) JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT_CODEGEN jl_value_t *jl_get_libllvm_impl(void) JL_NOTSAFEPOINT
 {
 #if defined(_OS_WINDOWS_)
     HMODULE mod;
     if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCSTR)&llvm::DebugFlag, &mod))
         return jl_nothing;
-
-    char path[MAX_PATH];
-    if (!GetModuleFileNameA(mod, path, sizeof(path)))
+    wchar_t path16[MAX_PATH];
+    DWORD n16 = GetModuleFileNameW(mod, path16, MAX_PATH);
+    if (n16 <= 0)
+        return jl_nothing;
+    path16[n16++] = 0;
+    char path8[MAX_PATH * 3];
+    if (!WideCharToMultiByte(CP_UTF8, 0, path16, n16, path8, MAX_PATH * 3, NULL, NULL))
         return jl_nothing;
-    return (jl_value_t*) jl_symbol(path);
+    return (jl_value_t*) jl_symbol(path8);
 #else
     Dl_info dli;
     if (!dladdr((void*)LLVMContextCreate, &dli))
diff --git a/src/coverage.cpp b/src/coverage.cpp
index 2be064726b1fe..95924f326524b 100644
--- a/src/coverage.cpp
+++ b/src/coverage.cpp
@@ -192,7 +192,7 @@ static void write_lcov_data(logdata_t &logData, const std::string &outfile)
     outf.close();
 }
 
-extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
+extern "C" void jl_write_coverage_data(const char *output)
 {
     if (output) {
         StringRef output_pattern(output);
@@ -206,7 +206,7 @@ extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
     }
 }
 
-extern "C" JL_DLLEXPORT void jl_write_malloc_log(void)
+extern "C" void jl_write_malloc_log(void)
 {
     std::string stm;
     raw_string_ostream(stm) << "." << uv_os_getpid() << ".mem";
diff --git a/src/datatype.c b/src/datatype.c
index 6e71c6573c91f..95c3b11c9abdc 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -53,7 +53,7 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
     jl_atomic_store_relaxed(&mt->cache, jl_nothing);
     jl_atomic_store_relaxed(&mt->max_args, 0);
     mt->backedges = NULL;
-    JL_MUTEX_INIT(&mt->writelock);
+    JL_MUTEX_INIT(&mt->writelock, "methodtable->writelock");
     mt->offs = 0;
     mt->frozen = 0;
     return mt;
@@ -68,7 +68,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->name = name;
     tn->module = module;
     tn->wrapper = NULL;
-    jl_atomic_store_release(&tn->Typeofwrapper, NULL);
+    jl_atomic_store_relaxed(&tn->Typeofwrapper, NULL);
     jl_atomic_store_relaxed(&tn->cache, jl_emptysvec);
     jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec);
     tn->names = NULL;
@@ -96,6 +96,7 @@ jl_datatype_t *jl_new_uninitialized_datatype(void)
 {
     jl_task_t *ct = jl_current_task;
     jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type);
+    jl_set_typetagof(t, jl_datatype_tag, 0);
     t->hash = 0;
     t->hasfreetypevars = 0;
     t->isdispatchtuple = 0;
@@ -103,9 +104,10 @@ jl_datatype_t *jl_new_uninitialized_datatype(void)
     t->isprimitivetype = 0;
     t->zeroinit = 0;
     t->has_concrete_subtype = 1;
-    t->cached_by_hash = 0;
+    t->maybe_subtype_of_cache = 1;
     t->ismutationfree = 0;
     t->isidentityfree = 0;
+    t->smalltag = 0;
     t->name = NULL;
     t->super = NULL;
     t->parameters = NULL;
@@ -963,6 +965,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
     if (bt == jl_uint16_type)  return jl_box_uint16(*(uint16_t*)data);
     if (bt == jl_char_type)    return jl_box_char(*(uint32_t*)data);
 
+    assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
     memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
@@ -988,6 +991,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
     if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data));
     if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data));
 
+    assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
     // data is aligned to the power of two,
@@ -1055,6 +1059,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl
     if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src));
     if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
 
+    assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
     if (nb == 1)
@@ -1223,30 +1228,29 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
 }
 
 // used by boot.jl
-JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
+JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_datatype_t *bt)
 {
     uint64_t data = 0xffffffffffffffffULL;
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt);
+    if (bt->smalltag)
+        jl_set_typetagof(v, bt->smalltag, 0);
     memcpy(v, &data, sizeof(size_t));
     return v;
 }
 
-#define PERMBOXN_FUNC(nb,nw)                                            \
-    jl_value_t *jl_permbox##nb(jl_datatype_t *t, int##nb##_t x)         \
+#define PERMBOXN_FUNC(nb)                                               \
+    jl_value_t *jl_permbox##nb(jl_datatype_t *t, uintptr_t tag, uint##nb##_t x) \
     {   /* n.b. t must be a concrete isbits datatype of the right size */ \
-        jl_value_t *v = jl_gc_permobj(nw * sizeof(void*), t);           \
-        *(int##nb##_t*)jl_data_ptr(v) = x;                              \
+        jl_value_t *v = jl_gc_permobj(LLT_ALIGN(nb, sizeof(void*)), t); \
+        if (tag) jl_set_typetagof(v, tag, GC_OLD_MARKED);               \
+        *(uint##nb##_t*)jl_data_ptr(v) = x;                             \
         return v;                                                       \
     }
-PERMBOXN_FUNC(8,  1)
-PERMBOXN_FUNC(16, 1)
-PERMBOXN_FUNC(32, 1)
-#ifdef _P64
-PERMBOXN_FUNC(64, 1)
-#else
-PERMBOXN_FUNC(64, 2)
-#endif
+PERMBOXN_FUNC(8)
+PERMBOXN_FUNC(16)
+PERMBOXN_FUNC(32)
+PERMBOXN_FUNC(64)
 
 #define UNBOX_FUNC(j_type,c_type)                                       \
     JL_DLLEXPORT c_type jl_unbox_##j_type(jl_value_t *v)                \
@@ -1269,27 +1273,27 @@ UNBOX_FUNC(float64, double)
 UNBOX_FUNC(voidpointer, void*)
 UNBOX_FUNC(uint8pointer, uint8_t*)
 
-#define BOX_FUNC(typ,c_type,pfx,nw)                                     \
+#define BOX_FUNC(typ,c_type,pfx)                                        \
     JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x)                      \
     {                                                                   \
         jl_task_t *ct = jl_current_task;                                \
-        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, LLT_ALIGN(sizeof(x), sizeof(void*)), \
                                     jl_##typ##_type);                   \
         *(c_type*)jl_data_ptr(v) = x;                                   \
         return v;                                                       \
     }
-BOX_FUNC(float32, float,  jl_box, 1)
-BOX_FUNC(voidpointer, void*,  jl_box, 1)
-BOX_FUNC(uint8pointer, uint8_t*,  jl_box, 1)
-#ifdef _P64
-BOX_FUNC(float64, double, jl_box, 1)
-#else
-BOX_FUNC(float64, double, jl_box, 2)
-#endif
+BOX_FUNC(float32, float,  jl_box)
+BOX_FUNC(float64, double, jl_box)
+BOX_FUNC(voidpointer, void*,  jl_box)
+BOX_FUNC(uint8pointer, uint8_t*,  jl_box)
 
 #define NBOX_C 1024
 
-#define SIBOX_FUNC(typ,c_type,nw)\
+// some shims to support UIBOX_FUNC definition
+#define jl_ssavalue_tag (((uintptr_t)jl_ssavalue_type) >> 4)
+#define jl_slotnumber_tag (((uintptr_t)jl_slotnumber_type) >> 4)
+
+#define SIBOX_FUNC(typ,c_type)                                          \
     static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
     JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
     {                                                                   \
@@ -1297,36 +1301,33 @@ BOX_FUNC(float64, double, jl_box, 2)
         c_type idx = x+NBOX_C/2;                                        \
         if ((u##c_type)idx < (u##c_type)NBOX_C)                         \
             return boxed_##typ##_cache[idx];                            \
-        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, LLT_ALIGN(sizeof(x), sizeof(void*)), \
                                     jl_##typ##_type);                   \
+        jl_set_typetagof(v, jl_##typ##_tag, 0);                         \
         *(c_type*)jl_data_ptr(v) = x;                                   \
         return v;                                                       \
     }
-#define UIBOX_FUNC(typ,c_type,nw)                                       \
+#define UIBOX_FUNC(typ,c_type)                                          \
     static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
     JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
     {                                                                   \
         jl_task_t *ct = jl_current_task;                                \
         if (x < NBOX_C)                                                 \
             return boxed_##typ##_cache[x];                              \
-        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, LLT_ALIGN(sizeof(x), sizeof(void*)), \
                                     jl_##typ##_type);                   \
+        jl_set_typetagof(v, jl_##typ##_tag, 0);                         \
         *(c_type*)jl_data_ptr(v) = x;                                   \
         return v;                                                       \
     }
-SIBOX_FUNC(int16,  int16_t, 1)
-SIBOX_FUNC(int32,  int32_t, 1)
-UIBOX_FUNC(uint16, uint16_t, 1)
-UIBOX_FUNC(uint32, uint32_t, 1)
-UIBOX_FUNC(ssavalue, size_t, 1)
-UIBOX_FUNC(slotnumber, size_t, 1)
-#ifdef _P64
-SIBOX_FUNC(int64,  int64_t, 1)
-UIBOX_FUNC(uint64, uint64_t, 1)
-#else
-SIBOX_FUNC(int64,  int64_t, 2)
-UIBOX_FUNC(uint64, uint64_t, 2)
-#endif
+SIBOX_FUNC(int16,  int16_t)
+SIBOX_FUNC(int32,  int32_t)
+UIBOX_FUNC(uint16, uint16_t)
+UIBOX_FUNC(uint32, uint32_t)
+UIBOX_FUNC(ssavalue, size_t)
+UIBOX_FUNC(slotnumber, size_t)
+SIBOX_FUNC(int64,  int64_t)
+UIBOX_FUNC(uint64, uint64_t)
 
 static jl_value_t *boxed_char_cache[128];
 JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
@@ -1336,6 +1337,7 @@ JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
     if (u < 128)
         return boxed_char_cache[(uint8_t)u];
     jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(void*), jl_char_type);
+    jl_set_typetagof(v, jl_char_tag, 0);
     *(uint32_t*)jl_data_ptr(v) = x;
     return v;
 }
@@ -1355,35 +1357,35 @@ void jl_init_int32_int64_cache(void)
 {
     int64_t i;
     for(i=0; i < NBOX_C; i++) {
-        boxed_int32_cache[i]  = jl_permbox32(jl_int32_type, i-NBOX_C/2);
-        boxed_int64_cache[i]  = jl_permbox64(jl_int64_type, i-NBOX_C/2);
+        boxed_int32_cache[i]  = jl_permbox32(jl_int32_type, jl_int32_tag, i-NBOX_C/2);
+        boxed_int64_cache[i]  = jl_permbox64(jl_int64_type, jl_int64_tag, i-NBOX_C/2);
 #ifdef _P64
-        boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, i);
-        boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, i);
+        boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, 0, i);
+        boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, 0, i);
 #else
-        boxed_ssavalue_cache[i] = jl_permbox32(jl_ssavalue_type, i);
-        boxed_slotnumber_cache[i] = jl_permbox32(jl_slotnumber_type, i);
+        boxed_ssavalue_cache[i] = jl_permbox32(jl_ssavalue_type, 0, i);
+        boxed_slotnumber_cache[i] = jl_permbox32(jl_slotnumber_type, 0, i);
 #endif
     }
     for(i=0; i < 256; i++) {
-        jl_boxed_uint8_cache[i] = jl_permbox8(jl_uint8_type, i);
+        jl_boxed_uint8_cache[i] = jl_permbox8(jl_uint8_type, jl_uint8_tag, i);
     }
 }
 
 void jl_init_box_caches(void)
 {
-    int64_t i;
-    for(i=0; i < 128; i++) {
-        boxed_char_cache[i] = jl_permbox32(jl_char_type, i << 24);
+    uint32_t i;
+    for (i = 0; i < 128; i++) {
+        boxed_char_cache[i] = jl_permbox32(jl_char_type, jl_char_tag, i << 24);
     }
-    for(i=0; i < 256; i++) {
-        jl_boxed_int8_cache[i] = jl_permbox8(jl_int8_type, i);
+    for (i = 0; i < 256; i++) {
+        jl_boxed_int8_cache[i] = jl_permbox8(jl_int8_type, jl_int8_tag, i);
     }
-    for(i=0; i < NBOX_C; i++) {
-        boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, i-NBOX_C/2);
-        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, i);
-        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, i);
-        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, i);
+    for (i = 0; i < NBOX_C; i++) {
+        boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, jl_int16_tag, i-NBOX_C/2);
+        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, jl_uint16_tag, i);
+        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, jl_uint32_tag, i);
+        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, jl_uint64_tag, i);
     }
 }
 
@@ -1400,10 +1402,15 @@ JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 {
     jl_task_t *ct = jl_current_task;
     if (type->instance != NULL) return type->instance;
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL) {
+        jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
+    }
     va_list args;
     size_t i, nf = jl_datatype_nfields(type);
     va_start(args, type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (type->smalltag) // TODO: move to callers?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (nf > 0 && jl_field_offset(type, 0) != 0) {
         memset(jv, 0, jl_field_offset(type, 0));
     }
@@ -1417,7 +1424,7 @@ JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na)
 {
     jl_task_t *ct = jl_current_task;
-    if (!jl_is_datatype(type) || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
     size_t nf = jl_datatype_nfields(type);
@@ -1431,6 +1438,8 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args,
     if (type->instance != NULL)
         return type->instance;
     jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (type->smalltag) // TODO: do we need this?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (jl_datatype_nfields(type) > 0) {
         if (jl_field_offset(type, 0) != 0) {
             memset(jl_data_ptr(jv), 0, jl_field_offset(type, 0));
@@ -1454,7 +1463,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
     jl_task_t *ct = jl_current_task;
     if (!jl_is_tuple(tup))
         jl_type_error("new", (jl_value_t*)jl_tuple_type, tup);
-    if (!jl_is_datatype(type) || type->layout == NULL)
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL)
         jl_type_error("new", (jl_value_t *)jl_datatype_type, (jl_value_t *)type);
     size_t nargs = jl_nfields(tup);
     size_t nf = jl_datatype_nfields(type);
@@ -1472,6 +1481,8 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
     }
     size_t size = jl_datatype_size(type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
+    if (type->smalltag) // TODO: do we need this?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (nf == 0)
         return jv;
     jl_value_t *fi = NULL;
@@ -1500,8 +1511,13 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
     jl_task_t *ct = jl_current_task;
     if (type->instance != NULL) return type->instance;
+    if (!jl_is_datatype(type) || type->layout == NULL) {
+        jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
+    }
     size_t size = jl_datatype_size(type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
+    if (type->smalltag) // TODO: do we need this?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (size > 0)
         memset(jl_data_ptr(jv), 0, size);
     return jv;
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 2d087178afef1..35e41fe657045 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -41,10 +41,10 @@ using namespace llvm;
 #include "julia_assert.h"
 #include "debug-registry.h"
 
-static JITDebugInfoRegistry DebugRegistry;
+static JITDebugInfoRegistry *DebugRegistry = new JITDebugInfoRegistry;
 
 static JITDebugInfoRegistry &getJITDebugRegistry() JL_NOTSAFEPOINT {
-    return DebugRegistry;
+    return *DebugRegistry;
 }
 
 struct debug_link_info {
@@ -503,7 +503,7 @@ static int lookup_pointer(
                 std::size_t semi_pos = func_name.find(';');
                 if (semi_pos != std::string::npos) {
                     func_name = func_name.substr(0, semi_pos);
-                    frame->linfo = NULL; // TODO: if (new_frames[n_frames - 1].linfo) frame->linfo = lookup(func_name in linfo)?
+                    frame->linfo = NULL; // Looked up on Julia side
                 }
             }
         }
@@ -687,7 +687,7 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info) JL_NOTSAFEPO
             std::move(error_splitobj.get()),
             std::move(SplitFile.get()));
 }
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_register_fptrs_impl(uint64_t image_base, const jl_image_fptrs_t *fptrs,
     jl_method_instance_t **linfos, size_t n)
 {
@@ -1217,7 +1217,7 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
 }
 
 // Set *name and *filename to either NULL or malloc'd string
-extern "C" JL_DLLEXPORT int jl_getFunctionInfo_impl(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT_CODEGEN int jl_getFunctionInfo_impl(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables if noInline
     // since it can be called from an unmanaged thread on OSX.
@@ -1607,7 +1607,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 
 #endif
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr)
 {
     // Might be called from unmanaged thread
diff --git a/src/disasm.cpp b/src/disasm.cpp
index e693fe7427570..96595d4381987 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -92,11 +92,7 @@
 #include <llvm/Support/MemoryBuffer.h>
 #include <llvm/Support/NativeFormatting.h>
 #include <llvm/Support/SourceMgr.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
 
@@ -489,7 +485,7 @@ void jl_strip_llvm_addrspaces(Module *m) JL_NOTSAFEPOINT
 
 // print an llvm IR acquired from jl_get_llvmf
 // warning: this takes ownership of, and destroys, dump->TSM
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo)
 {
     std::string code;
@@ -578,7 +574,7 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
 }
 
 // print a native disassembly for the function starting at fptr
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
 {
     assert(fptr != 0);
@@ -883,16 +879,10 @@ static void jl_dump_asm_internal(
       TheTarget->createMCSubtargetInfo(TheTriple.str(), cpu, features));
     assert(STI && "Unable to create subtarget info!");
 
-#if JL_LLVM_VERSION >= 130000
     MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
     std::unique_ptr<MCObjectFileInfo> MOFI(
       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false, /*LargeCodeModel=*/ false));
     Ctx.setObjectFileInfo(MOFI.get());
-#else
-    std::unique_ptr<MCObjectFileInfo> MOFI(new MCObjectFileInfo());
-    MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr);
-    MOFI->InitMCObjectFileInfo(TheTriple, /* PIC */ false, Ctx);
-#endif
 
     std::unique_ptr<MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
@@ -1212,7 +1202,7 @@ class LineNumberPrinterHandler : public AsmPrinterHandler {
 };
 
 // get a native assembly for llvm::Function
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
 {
     // precise printing via IR assembler
@@ -1286,7 +1276,7 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const
     return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 LLVMDisasmContextRef jl_LLVMCreateDisasm_impl(
         const char *TripleName, void *DisInfo, int TagType,
         LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp)
@@ -1294,8 +1284,8 @@ LLVMDisasmContextRef jl_LLVMCreateDisasm_impl(
     return LLVMCreateDisasm(TripleName, DisInfo, TagType, GetOpInfo, SymbolLookUp);
 }
 
-extern "C" JL_DLLEXPORT
-JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_impl(
+extern "C" JL_DLLEXPORT_CODEGEN
+size_t jl_LLVMDisasmInstruction_impl(
         LLVMDisasmContextRef DC, uint8_t *Bytes, uint64_t BytesSize,
         uint64_t PC, char *OutString, size_t OutStringSize)
 {
diff --git a/src/dlload.c b/src/dlload.c
index 9f4e8be29952d..3fb5a08ba2438 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -189,6 +189,7 @@ JL_DLLEXPORT JL_NO_SANITIZE void *jl_dlopen(const char *filename, unsigned flags
         if (!dlopen)
             return NULL;
         void *libdl_handle = dlopen("libdl.so", RTLD_NOW | RTLD_NOLOAD);
+        assert(libdl_handle);
         dlopen = (dlopen_prototype*)dlsym(libdl_handle, "dlopen");
         dlclose(libdl_handle);
         assert(dlopen);
@@ -239,6 +240,25 @@ JL_DLLEXPORT int jl_dlclose(void *handle) JL_NOTSAFEPOINT
 #endif
 }
 
+void *jl_find_dynamic_library_by_addr(void *symbol) {
+    void *handle;
+#ifdef _OS_WINDOWS_
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)symbol,
+                            (HMODULE*)&handle)) {
+        jl_error("could not load base module");
+    }
+#else
+    Dl_info info;
+    if (!dladdr(symbol, &info) || !info.dli_fname) {
+        jl_error("could not load base module");
+    }
+    handle = dlopen(info.dli_fname, RTLD_NOW | RTLD_NOLOAD | RTLD_LOCAL);
+    dlclose(handle); // Undo ref count increment from `dlopen`
+#endif
+    return handle;
+}
+
 JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err)
 {
     char path[PATHBUF], relocated[PATHBUF];
@@ -255,29 +275,17 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     int n_extensions = endswith_extension(modname) ? 1 : N_EXTENSIONS;
     int ret;
 
-    /*
-      this branch returns handle of libjulia-internal
-    */
-    if (modname == NULL) {
-#ifdef _OS_WINDOWS_
-        if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
-                                (LPCWSTR)(uintptr_t)(&jl_load_dynamic_library),
-                                (HMODULE*)&handle)) {
-            jl_error("could not load base module");
-        }
-#else
-        Dl_info info;
-        if (!dladdr((void*)(uintptr_t)&jl_load_dynamic_library, &info) || !info.dli_fname) {
-            jl_error("could not load base module");
-        }
-        handle = dlopen(info.dli_fname, RTLD_NOW);
-#endif
-        goto done;
-    }
+    // modname == NULL is a sentinel value requesting the handle of libjulia-internal
+    if (modname == NULL)
+        return jl_find_dynamic_library_by_addr(&jl_load_dynamic_library);
 
     abspath = jl_isabspath(modname);
     is_atpath = 0;
 
+    JL_TIMING(DL_OPEN, DL_OPEN);
+    if (!(flags & JL_RTLD_NOLOAD))
+        jl_timing_puts(JL_TIMING_CURRENT_BLOCK, modname);
+
     // Detect if our `modname` is something like `@rpath/libfoo.dylib`
 #ifdef _OS_DARWIN_
     size_t nameLen = strlen(modname);
@@ -333,8 +341,10 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
                     if (i == 0) { // LoadLibrary already tested the extensions, we just need to check the `stat` result
 #endif
                         handle = jl_dlopen(path, flags);
+                        if (handle && !(flags & JL_RTLD_NOLOAD))
+                            jl_timing_puts(JL_TIMING_CURRENT_BLOCK, jl_pathname_for_handle(handle));
                         if (handle)
-                            goto done;
+                            return handle;
 #ifdef _OS_WINDOWS_
                         err = GetLastError();
                     }
@@ -353,8 +363,10 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
         path[0] = '\0';
         snprintf(path, PATHBUF, "%s%s", modname, ext);
         handle = jl_dlopen(path, flags);
+        if (handle && !(flags & JL_RTLD_NOLOAD))
+            jl_timing_puts(JL_TIMING_CURRENT_BLOCK, jl_pathname_for_handle(handle));
         if (handle)
-            goto done;
+            return handle;
 #ifdef _OS_WINDOWS_
         err = GetLastError();
         break; // LoadLibrary already tested the rest
@@ -377,7 +389,6 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     }
     handle = NULL;
 
-done:
     return handle;
 }
 
@@ -421,9 +432,8 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
     return symbol_found;
 }
 
-#ifdef _OS_WINDOWS_
-//Look for symbols in win32 libraries
-JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
+// Look for symbols in internal libraries
+JL_DLLEXPORT const char *jl_dlfind(const char *f_name)
 {
     void * dummy;
     if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0))
@@ -432,6 +442,7 @@ JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
         return JL_LIBJULIA_INTERNAL_DL_LIBNAME;
     if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0))
         return JL_LIBJULIA_DL_LIBNAME;
+#ifdef _OS_WINDOWS_
     if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0))
         return "kernel32";
     if (jl_dlsym(jl_crtdll_handle, f_name, &dummy, 0)) // Prefer crtdll over ntdll
@@ -440,6 +451,7 @@ JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
         return "ntdll";
     if (jl_dlsym(jl_winsock_handle, f_name, &dummy, 0))
         return "ws2_32";
+#endif
     // additional common libraries (libc?) could be added here, but in general,
     // it is better to specify the library explicitly in the code. This exists
     // mainly to ease compatibility with linux, and for libraries that don't
@@ -451,7 +463,6 @@ JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
     // which defaults to jl_libjulia_internal_handle, where we won't find it, and
     // will throw the appropriate error.
 }
-#endif
 
 #ifdef __cplusplus
 }
diff --git a/src/features_x86.h b/src/features_x86.h
index acacaa68751d3..08f979df546b7 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -74,11 +74,13 @@ JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 0)
 // EAX=7,ECX=0: EDX
 // JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
 // JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
+JL_FEATURE_DEF(uintr, 32 * 4 + 5, 140000)
 JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
 JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
 JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
 JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
 JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
+JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
 JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
 JL_FEATURE_DEF_NAME(amx_int8, 32 * 4 + 25, 110000, "amx-int8")
 
diff --git a/src/flisp/Makefile b/src/flisp/Makefile
index 7a363b0ec13d7..c2bf30300b041 100644
--- a/src/flisp/Makefile
+++ b/src/flisp/Makefile
@@ -49,7 +49,7 @@ endif
 
 FLAGS := -I$(LLTSRCDIR) $(JCFLAGS) $(HFILEDIRS:%=-I%) \
         -I$(LIBUV_INC) -I$(UTF8PROC_INC) -I$(build_includedir) $(LIBDIRS:%=-L%) \
-        -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
+        -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
 ifneq ($(OS), emscripten)
 FLAGS += -DUSE_COMPUTED_GOTO
 endif
diff --git a/src/flisp/julia_charmap.h b/src/flisp/julia_charmap.h
index 3c54eaf98f484..8471d1e3b3b91 100644
--- a/src/flisp/julia_charmap.h
+++ b/src/flisp/julia_charmap.h
@@ -10,4 +10,5 @@ static const uint32_t charmap[][2] = {
     { 0x00B7, 0x22C5 }, // middot char -> dot operator (#25098)
     { 0x0387, 0x22C5 }, // Greek interpunct -> dot operator (#25098)
     { 0x2212, 0x002D }, // minus -> hyphen-minus (#26193)
+    { 0x210F, 0x0127 }, // hbar -> small letter h with stroke (#48870)
 };
diff --git a/src/gc-common.c b/src/gc-common.c
index 8abee999ec48a..cfb83c08a7a6b 100644
--- a/src/gc-common.c
+++ b/src/gc-common.c
@@ -6,6 +6,8 @@ jl_gc_num_t gc_num = {0};
 size_t last_long_collect_interval;
 int gc_n_threads;
 jl_ptls_t* gc_all_tls_states;
+// `tid` of first GC thread
+int gc_first_tid;
 
 int64_t live_bytes = 0;
 
@@ -46,7 +48,9 @@ memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
 
 // finalizers
 // ---
-uint64_t finalizer_rngState[4];
+uint64_t finalizer_rngState[JL_RNG_SIZE];
+
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
 {
@@ -126,6 +130,10 @@ JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
     }
 }
 
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
+{
+    return jl_current_task->ptls->in_finalizer;
+}
 
 // allocation
 // ---
@@ -244,10 +252,16 @@ void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
     }
 }
 
+static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
+{
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
+    return live_bytes += inc;
+}
+
 void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
 {
     combine_thread_gc_counts(&gc_num);
-    live_bytes += (gc_num.deferred_alloc + gc_num.allocd);
+    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
     gc_num.allocd = 0;
     gc_num.deferred_alloc = 0;
     reset_thread_gc_counts();
@@ -379,6 +393,11 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
         max_total_memory = max_mem;
 }
 
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+{
+    return max_total_memory;
+}
+
 // callback for passing OOM errors from gmp
 JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
 {
@@ -484,7 +503,7 @@ void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
     // TODO: not needed? gc_cache.*?
     if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
         ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
-        live_bytes += allocsz - oldsz;
+        inc_live_bytes(allocsz - oldsz);
     }
     else if (allocsz < oldsz)
         jl_atomic_store_relaxed(&ptls->gc_num.freed,
diff --git a/src/gc-debug.c b/src/gc-debug.c
index c5ab21a3fb3c1..df2e3487506fa 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -200,12 +200,21 @@ static void restore(void)
 
 static void gc_verify_track(jl_ptls_t ptls)
 {
+    // `gc_verify_track` is limited to single-threaded GC
+    if (jl_n_gcthreads != 0)
+        return;
     do {
         jl_gc_markqueue_t mq;
-        mq.current = mq.start = ptls->mark_queue.start;
-        mq.end = ptls->mark_queue.end;
-        mq.current_chunk = mq.chunk_start = ptls->mark_queue.chunk_start;
-        mq.chunk_end = ptls->mark_queue.chunk_end;
+        jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+        ws_queue_t *cq = &mq.chunk_queue;
+        ws_queue_t *q = &mq.ptr_queue;
+        jl_atomic_store_relaxed(&cq->top, 0);
+        jl_atomic_store_relaxed(&cq->bottom, 0);
+        jl_atomic_store_relaxed(&cq->array, jl_atomic_load_relaxed(&mq2->chunk_queue.array));
+        jl_atomic_store_relaxed(&q->top, 0);
+        jl_atomic_store_relaxed(&q->bottom, 0);
+        jl_atomic_store_relaxed(&q->array, jl_atomic_load_relaxed(&mq2->ptr_queue.array));
+        arraylist_new(&mq.reclaim_set, 32);
         arraylist_push(&lostval_parents_done, lostval);
         jl_safe_printf("Now looking for %p =======\n", lostval);
         clear_mark(GC_CLEAN);
@@ -216,7 +225,7 @@ static void gc_verify_track(jl_ptls_t ptls)
             gc_mark_finlist(&mq, &ptls2->finalizers, 0);
         }
         gc_mark_finlist(&mq, &finalizer_list_marked, 0);
-        gc_mark_loop_(ptls, &mq);
+        gc_mark_loop_serial_(ptls, &mq);
         if (lostval_parents.len == 0) {
             jl_safe_printf("Could not find the missing link. We missed a toplevel root. This is odd.\n");
             break;
@@ -250,11 +259,22 @@ static void gc_verify_track(jl_ptls_t ptls)
 
 void gc_verify(jl_ptls_t ptls)
 {
+    // `gc_verify` is limited to single-threaded GC
+    if (jl_n_gcthreads != 0) {
+        jl_safe_printf("Warn. GC verify disabled in multi-threaded GC\n");
+        return;
+    }
     jl_gc_markqueue_t mq;
-    mq.current = mq.start = ptls->mark_queue.start;
-    mq.end = ptls->mark_queue.end;
-    mq.current_chunk = mq.chunk_start = ptls->mark_queue.chunk_start;
-    mq.chunk_end = ptls->mark_queue.chunk_end;
+    jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+    ws_queue_t *cq = &mq.chunk_queue;
+    ws_queue_t *q = &mq.ptr_queue;
+    jl_atomic_store_relaxed(&cq->top, 0);
+    jl_atomic_store_relaxed(&cq->bottom, 0);
+    jl_atomic_store_relaxed(&cq->array, jl_atomic_load_relaxed(&mq2->chunk_queue.array));
+    jl_atomic_store_relaxed(&q->top, 0);
+    jl_atomic_store_relaxed(&q->bottom, 0);
+    jl_atomic_store_relaxed(&q->array, jl_atomic_load_relaxed(&mq2->ptr_queue.array));
+    arraylist_new(&mq.reclaim_set, 32);
     lostval = NULL;
     lostval_parents.len = 0;
     lostval_parents_done.len = 0;
@@ -267,7 +287,7 @@ void gc_verify(jl_ptls_t ptls)
         gc_mark_finlist(&mq, &ptls2->finalizers, 0);
     }
     gc_mark_finlist(&mq, &finalizer_list_marked, 0);
-    gc_mark_loop_(ptls, &mq);
+    gc_mark_loop_serial_(ptls, &mq);
     int clean_len = bits_save[GC_CLEAN].len;
     for(int i = 0; i < clean_len + bits_save[GC_OLD].len; i++) {
         jl_taggedvalue_t *v = (jl_taggedvalue_t*)bits_save[i >= clean_len ? GC_OLD : GC_CLEAN].items[i >= clean_len ? i - clean_len : i];
@@ -351,10 +371,10 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
         if (!in_freelist) {
             jl_value_t *dt = jl_typeof(jl_valueof(v));
             if (dt != (jl_value_t*)jl_buff_tag &&
-                    // the following are used by the deserializer to invalidate objects
-                    v->header != 0x10 && v->header != 0x20 &&
-                    v->header != 0x30 && v->header != 0x40 &&
-                    v->header != 0x50 && v->header != 0x60) {
+                    // the following may be use (by the deserializer) to invalidate objects
+                    v->header != 0xf10 && v->header != 0xf20 &&
+                    v->header != 0xf30 && v->header != 0xf40 &&
+                    v->header != 0xf50 && v->header != 0xf60) {
                 assert(jl_typeof(dt) == (jl_value_t*)jl_datatype_type);
             }
         }
@@ -565,11 +585,11 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high)
         // Find the age bit
         char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
         int obj_id = (((char*)tag) - page_begin) / osize;
-        uint8_t *ages = pg->ages + obj_id / 8;
+        uint32_t *ages = pg->ages + obj_id / 32;
         // Force this to be a young object to save some memory
         // (especially on 32bit where it's more likely to have pointer-like
         //  bit patterns)
-        *ages &= ~(1 << (obj_id % 8));
+        *ages &= ~(1 << (obj_id % 32));
         memset(tag, 0xff, osize);
         // set mark to GC_MARKED (young and marked)
         tag->bits.gc = GC_MARKED;
@@ -627,91 +647,6 @@ void jl_gc_debug_print_status(void)
 }
 #endif
 
-#ifdef OBJPROFILE
-static htable_t obj_counts[3];
-static htable_t obj_sizes[3];
-void objprofile_count(void *ty, int old, int sz)
-{
-    if (gc_verifying) return;
-    if ((intptr_t)ty <= 0x10) {
-        ty = (void*)jl_buff_tag;
-    }
-    else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
-             jl_typeof(ty) == (jl_value_t*)jl_datatype_type &&
-             ((jl_datatype_t*)ty)->instance) {
-        ty = jl_singleton_tag;
-    }
-    void **bp = ptrhash_bp(&obj_counts[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)2;
-    else
-        (*((intptr_t*)bp))++;
-    bp = ptrhash_bp(&obj_sizes[old], ty);
-    if (*bp == HT_NOTFOUND)
-        *bp = (void*)(intptr_t)(1 + sz);
-    else
-        *((intptr_t*)bp) += sz;
-}
-
-void objprofile_reset(void)
-{
-    for (int g = 0; g < 3; g++) {
-        htable_reset(&obj_counts[g], 0);
-        htable_reset(&obj_sizes[g], 0);
-    }
-}
-
-static void objprofile_print(htable_t nums, htable_t sizes)
-{
-    for(int i=0; i < nums.size; i+=2) {
-        if (nums.table[i+1] != HT_NOTFOUND) {
-            void *ty = nums.table[i];
-            int num = (intptr_t)nums.table[i + 1] - 1;
-            size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1;
-            static const int ptr_hex_width = 2 * sizeof(void*);
-            if (sz > 2e9) {
-                jl_safe_printf(" %6d : %*.1f GB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e6) {
-                jl_safe_printf(" %6d : %*.1f MB of (%*p) ",
-                               num, 6, ((double)sz) / 1024 / 1024,
-                               ptr_hex_width, ty);
-            }
-            else if (sz > 2e3) {
-                jl_safe_printf(" %6d : %*.1f kB of (%*p) ",
-                               num, 6, ((double)sz) / 1024,
-                               ptr_hex_width, ty);
-            }
-            else {
-                jl_safe_printf(" %6d : %*d  B of (%*p) ",
-                          num, 6, (int)sz, ptr_hex_width, ty);
-            }
-            if (ty == (void*)jl_buff_tag)
-                jl_safe_printf("#<buffer>");
-            else if (ty == jl_malloc_tag)
-                jl_safe_printf("#<malloc>");
-            else if (ty == jl_singleton_tag)
-                jl_safe_printf("#<singletons>");
-            else
-                jl_static_show(JL_STDERR, (jl_value_t*)ty);
-            jl_safe_printf("\n");
-        }
-    }
-}
-
-void objprofile_printall(void)
-{
-    jl_safe_printf("Transient mark :\n");
-    objprofile_print(obj_counts[0], obj_sizes[0]);
-    jl_safe_printf("Perm mark :\n");
-    objprofile_print(obj_counts[1], obj_sizes[1]);
-    jl_safe_printf("Remset :\n");
-    objprofile_print(obj_counts[2], obj_sizes[2]);
-}
-#endif
-
 #if defined(GC_TIME) || defined(GC_FINAL_STATS)
 STATIC_INLINE double jl_ns2ms(int64_t t)
 {
@@ -978,7 +913,7 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
                    "(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
                    jl_ns2ms(sweep_pause), live_bytes / 1024,
                    gc_num.freed / 1024, estimate_freed / 1024,
-                   gc_num.freed - estimate_freed, pct, gc_num.since_sweep / 1024,
+                   gc_num.freed - estimate_freed, pct, gc_num.allocd / 1024,
                    jl_ns2ms(gc_postmark_end - gc_premark_end),
                    sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
 }
@@ -1237,25 +1172,68 @@ void gc_count_pool(void)
 // `offset` will be added to `mq->current` for convenience in the debugger.
 NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset)
 {
-    jl_jmp_buf *old_buf = jl_get_safe_restore();
-    jl_jmp_buf buf;
-    jl_set_safe_restore(&buf);
-    if (jl_setjmp(buf, 0) != 0) {
-        jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n");
-        jl_set_safe_restore(old_buf);
-        return;
-    }
-    jl_value_t **start = mq->start;
-    jl_value_t **end = mq->current + offset;
-    for (; start < end; start++) {
-        jl_value_t *obj = *start;
-        jl_taggedvalue_t *o = jl_astaggedvalue(obj);
-        jl_safe_printf("Queued object: %p :: (tag: %zu) (bits: %zu)\n", obj,
-                       (uintptr_t)o->header, ((uintptr_t)o->header & 3));
-        jl_((void*)(jl_datatype_t *)(o->header & ~(uintptr_t)0xf));
-    }
-    jl_set_safe_restore(old_buf);
-}
+    // jl_jmp_buf *old_buf = jl_get_safe_restore();
+    // jl_jmp_buf buf;
+    // jl_set_safe_restore(&buf);
+    // if (jl_setjmp(buf, 0) != 0) {
+    //     jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n");
+    //     jl_set_safe_restore(old_buf);
+    //     return;
+    // }
+    // jl_value_t **start = mq->start;
+    // jl_value_t **end = mq->current + offset;
+    // for (; start < end; start++) {
+    //     jl_value_t *obj = *start;
+    //     jl_taggedvalue_t *o = jl_astaggedvalue(obj);
+    //     jl_safe_printf("Queued object: %p :: (tag: %zu) (bits: %zu)\n", obj,
+    //                    (uintptr_t)o->header, ((uintptr_t)o->header & 3));
+    //     jl_((void*)(jl_datatype_t *)(o->header & ~(uintptr_t)0xf));
+    // }
+    // jl_set_safe_restore(old_buf);
+}
+
+// int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
+// {
+//     int nf = (int)jl_datatype_nfields(vt);
+//     for (int i = 1; i < nf; i++) {
+//         if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
+//             return i - 1;
+//     }
+//     return nf - 1;
+// }
+
+// int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
+// {
+//     char *slot = (char*)_slot;
+//     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
+//     char *start = NULL;
+//     size_t len = 0;
+//     size_t elsize = sizeof(void*);
+//     if (vt == jl_module_type) {
+//         jl_module_t *m = (jl_module_t*)obj;
+//         start = (char*)m->usings.items;
+//         len = m->usings.len;
+//     }
+//     else if (vt == jl_simplevector_type) {
+//         start = (char*)jl_svec_data(obj);
+//         len = jl_svec_len(obj);
+//     }
+//     else if (vt->name == jl_array_typename) {
+//         jl_array_t *a = (jl_array_t*)obj;
+//         start = (char*)a->data;
+//         len = jl_array_len(a);
+//         elsize = a->elsize;
+//     }
+//     if (slot < start || slot >= start + elsize * len)
+//         return -1;
+//     return (slot - start) / elsize;
+// }
+
+static int gc_logging_enabled = 0;
+
+// JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
+//     gc_logging_enabled = enable;
+// }
 
 void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
     if (!gc_logging_enabled) {
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 40292cf472037..b35c1722c82ff 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -165,9 +165,11 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
         ssize = LLT_ALIGN(ssize, jl_page_size);
     }
     if (stk == NULL) {
-        if (jl_atomic_load_relaxed(&num_stack_mappings) >= MAX_STACK_MAPPINGS)
+        if (jl_atomic_load_relaxed(&num_stack_mappings) >= MAX_STACK_MAPPINGS) {
             // we accept that this can go over by as much as nthreads since it's not a CAS
+            errno = ENOMEM;
             return NULL;
+        }
         // TODO: allocate blocks of stacks? but need to mprotect individually anyways
         stk = malloc_stack(ssize);
         if (stk == MAP_FAILED)
diff --git a/src/gc.c b/src/gc.c
index 0f6c13777e265..ce80597a937f1 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -13,6 +13,14 @@
 extern "C" {
 #endif
 
+// Number of threads currently running the GC mark-loop
+_Atomic(int) gc_n_threads_marking;
+// `tid` of mutator thread that triggered GC
+_Atomic(int) gc_master_tid;
+// Mutex/cond used to synchronize sleep/wakeup of GC threads
+uv_mutex_t gc_threads_lock;
+uv_cond_t gc_threads_cond;
+
 // Linked list of callback functions
 
 typedef void (*jl_gc_cb_func_t)(void);
@@ -293,8 +301,8 @@ static _Atomic(int) support_conservative_marking = 0;
  * Note about GC synchronization:
  *
  * When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
- * `0` to `1` to make sure that only one thread can be running the GC. Other
- * threads that enters `jl_gc_collect()` at the same time (or later calling
+ * `0` to `1` to make sure that only one thread can be running `_jl_gc_collect`. Other
+ * mutator threads that enters `jl_gc_collect()` at the same time (or later calling
  * from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
  *
  * Before starting the mark phase the GC thread calls `jl_safepoint_start_gc()`
@@ -304,7 +312,7 @@ static _Atomic(int) support_conservative_marking = 0;
  * GC (`gc_state != 0`). It also acquires the `finalizers` lock so that no
  * other thread will access them when the GC is running.
  *
- * During the mark and sweep phase of the GC, the threads that are not running
+ * During the mark and sweep phase of the GC, the mutator threads that are not running
  * the GC should either be running unmanaged code (or code section that does
  * not have a GC critical region mainly including storing to the stack or
  * another object) or paused at a safepoint and wait for the GC to finish.
@@ -335,6 +343,7 @@ bigval_t *big_objects_marked = NULL;
 arraylist_t finalizer_list_marked;
 arraylist_t to_finalize;
 
+
 NOINLINE uintptr_t gc_get_stack_ptr(void)
 {
     return (uintptr_t)jl_get_frame_addr();
@@ -344,6 +353,11 @@ NOINLINE uintptr_t gc_get_stack_ptr(void)
 
 void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
 {
+    JL_TIMING(GC, GC_Stop);
+#ifdef USE_TRACY
+    TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
+    TracyCZoneColor(ctx, 0x696969);
+#endif
     assert(gc_n_threads);
     if (gc_n_threads > 1)
         jl_wake_libuv();
@@ -362,7 +376,6 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
     }
 }
 
-
 // malloc wrappers, aligned allocation
 
 #if defined(_OS_WINDOWS_)
@@ -530,7 +543,7 @@ static void run_finalizers(jl_task_t *ct)
     jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
     arraylist_new(&to_finalize, 0);
 
-    uint64_t save_rngState[4];
+    uint64_t save_rngState[JL_RNG_SIZE];
     memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
     jl_rng_split(ct->rngState, finalizer_rngState);
 
@@ -628,7 +641,7 @@ JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTS
 
 JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
 {
-    if (__unlikely(jl_typeis(f, jl_voidpointer_type))) {
+    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
         jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
     }
     else {
@@ -892,8 +905,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
             page->has_young = 1;
             char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
             int obj_id = (((char*)o) - page_begin) / page->osize;
-            uint8_t *ages = page->ages + obj_id / 8;
-            jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
+            uint32_t *ages = page->ages + obj_id / 32;
+            jl_atomic_fetch_and_relaxed((_Atomic(uint32_t)*)ages, ~(1 << (obj_id % 32)));
         }
     }
     objprofile_count(jl_typeof(jl_valueof(o)),
@@ -1224,7 +1237,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
-    pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
+    pg->ages = (uint32_t*)malloc_s(LLT_ALIGN(GC_PAGE_SZ / 8 / p->osize + 1, sizeof(uint32_t)));
     pg->thread_n = ptls->tid;
     jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL);
     p->newpages = fl;
@@ -1298,7 +1311,7 @@ int64_t lazy_freed_pages = 0;
 static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
 {
     char *data = pg->data;
-    uint8_t *ages = pg->ages;
+    uint32_t *ages = pg->ages;
     jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
     char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize;
     size_t old_nfree = pg->nfree;
@@ -1349,18 +1362,25 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
         int16_t prev_nold = 0;
         int pg_nfree = 0;
         jl_taggedvalue_t **pfl_begin = NULL;
-        uint8_t msk = 1; // mask for the age bit in the current age byte
+        uint32_t msk = 1; // mask for the age bit in the current age byte
+        uint32_t age = *ages;
         while ((char*)v <= lim) {
+            if (!msk) {
+                msk = 1;
+                *ages = age;
+                ages++;
+                age = *ages;
+            }
             int bits = v->bits.gc;
             if (!gc_marked(bits)) {
                 *pfl = v;
                 pfl = &v->next;
                 pfl_begin = pfl_begin ? pfl_begin : pfl;
                 pg_nfree++;
-                *ages &= ~msk;
+                age &= ~msk;
             }
             else { // marked young or old
-                if (*ages & msk || bits == GC_OLD_MARKED) { // old enough
+                if (age & msk || bits == GC_OLD_MARKED) { // old enough
                     // `!age && bits == GC_OLD_MARKED` is possible for
                     // non-first-class objects like array buffers
                     // (they may get promoted by jl_gc_wb_buf for example,
@@ -1376,17 +1396,13 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
                     has_young = 1;
                 }
                 has_marked |= gc_marked(bits);
-                *ages |= msk;
+                age |= msk;
                 freedall = 0;
             }
             v = (jl_taggedvalue_t*)((char*)v + osize);
             msk <<= 1;
-            if (!msk) {
-                msk = 1;
-                ages++;
-            }
         }
-
+        *ages = age;
         assert(!freedall);
         pg->has_marked = has_marked;
         pg->has_young = has_young;
@@ -1678,7 +1694,6 @@ JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t
     jl_gc_debug_print_status();
     jl_(vt);
     jl_gc_debug_critical_error();
-    gc_mark_loop_unwind(ptls, mq, 0);
     abort();
 }
 
@@ -1701,65 +1716,53 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
     }
 }
 
-// Double the mark queue
-static NOINLINE void gc_markqueue_resize(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
-{
-    jl_value_t **old_start = mq->start;
-    size_t old_queue_size = (mq->end - mq->start);
-    size_t offset = (mq->current - old_start);
-    mq->start = (jl_value_t **)realloc_s(old_start, 2 * old_queue_size * sizeof(jl_value_t *));
-    mq->current = (mq->start + offset);
-    mq->end = (mq->start + 2 * old_queue_size);
-}
-
 // Push a work item to the queue
-STATIC_INLINE void gc_markqueue_push(jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_ptr_queue_push(jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT
 {
-    if (__unlikely(mq->current == mq->end))
-        gc_markqueue_resize(mq);
-    *mq->current = obj;
-    mq->current++;
+    ws_array_t *old_a = ws_queue_push(&mq->ptr_queue, &obj, sizeof(jl_value_t*));
+    // Put `old_a` in `reclaim_set` to be freed after the mark phase
+    if (__unlikely(old_a != NULL))
+        arraylist_push(&mq->reclaim_set, old_a);
 }
 
 // Pop from the mark queue
-STATIC_INLINE jl_value_t *gc_markqueue_pop(jl_gc_markqueue_t *mq)
+STATIC_INLINE jl_value_t *gc_ptr_queue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
 {
-    jl_value_t *obj = NULL;
-    if (mq->current != mq->start) {
-        mq->current--;
-        obj = *mq->current;
-    }
-    return obj;
+    jl_value_t *v = NULL;
+    ws_queue_pop(&mq->ptr_queue, &v, sizeof(jl_value_t*));
+    return v;
 }
 
-// Double the chunk queue
-static NOINLINE void gc_chunkqueue_resize(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
+// Steal from `mq2`
+STATIC_INLINE jl_value_t *gc_ptr_queue_steal_from(jl_gc_markqueue_t *mq2) JL_NOTSAFEPOINT
 {
-    jl_gc_chunk_t *old_start = mq->chunk_start;
-    size_t old_queue_size = (mq->chunk_end - mq->chunk_start);
-    size_t offset = (mq->current_chunk - old_start);
-    mq->chunk_start = (jl_gc_chunk_t *)realloc_s(old_start, 2 * old_queue_size * sizeof(jl_gc_chunk_t));
-    mq->current_chunk = (mq->chunk_start + offset);
-    mq->chunk_end = (mq->chunk_start + 2 * old_queue_size);
+    jl_value_t *v = NULL;
+    ws_queue_steal_from(&mq2->ptr_queue, &v, sizeof(jl_value_t*));
+    return v;
 }
 
 // Push chunk `*c` into chunk queue
 STATIC_INLINE void gc_chunkqueue_push(jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
 {
-    if (__unlikely(mq->current_chunk == mq->chunk_end))
-        gc_chunkqueue_resize(mq);
-    *mq->current_chunk = *c;
-    mq->current_chunk++;
+    ws_array_t *old_a = ws_queue_push(&mq->chunk_queue, c, sizeof(jl_gc_chunk_t));
+    // Put `old_a` in `reclaim_set` to be freed after the mark phase
+    if (__unlikely(old_a != NULL))
+        arraylist_push(&mq->reclaim_set, old_a);
 }
 
 // Pop chunk from chunk queue
 STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
 {
     jl_gc_chunk_t c = {.cid = GC_empty_chunk};
-    if (mq->current_chunk != mq->chunk_start) {
-        mq->current_chunk--;
-        c = *mq->current_chunk;
-    }
+    ws_queue_pop(&mq->chunk_queue, &c, sizeof(jl_gc_chunk_t));
+    return c;
+}
+
+// Steal chunk from `mq2`
+STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_steal_from(jl_gc_markqueue_t *mq2) JL_NOTSAFEPOINT
+{
+    jl_gc_chunk_t c = {.cid = GC_empty_chunk};
+    ws_queue_steal_from(&mq2->chunk_queue, &c, sizeof(jl_gc_chunk_t));
     return c;
 }
 
@@ -1774,7 +1777,7 @@ STATIC_INLINE void gc_try_claim_and_push(jl_gc_markqueue_t *mq, void *_obj,
     if (!gc_old(o->header) && nptr)
         *nptr |= 1;
     if (gc_try_setmark_tag(o, GC_MARKED))
-        gc_markqueue_push(mq, obj);
+        gc_ptr_queue_push(mq, obj);
 }
 
 // Mark object with 8bit field descriptors
@@ -1822,7 +1825,6 @@ STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint
         if (new_obj != NULL) {
             verify_parent2("object", obj16_parent, slot, "field(%d)",
                             gc_slot_to_fieldidx(obj16_parent, slot, (jl_datatype_t*)jl_typeof(obj16_parent)));
-            gc_try_claim_and_push(mq, new_obj, &nptr);
             if (obj16_begin + 1 != obj16_end) {
                 gc_try_claim_and_push(mq, new_obj, &nptr);
             }
@@ -1867,6 +1869,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint
             gc_heap_snapshot_record_object_edge((jl_value_t*)obj32_parent, slot);
         }
     }
+    gc_mark_push_remset(ptls, (jl_value_t *)obj32_parent, nptr);
     return new_obj;
 }
 
@@ -1878,24 +1881,59 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v
     jl_value_t *new_obj;
     // Decide whether need to chunk objary
     (void)jl_assume(step > 0);
-    size_t nobjs = (obj_end - obj_begin) / step;
-    if (nobjs > MAX_REFS_AT_ONCE) {
-        jl_gc_chunk_t c = {GC_objary_chunk, obj_parent, obj_begin + step * MAX_REFS_AT_ONCE,
-                           obj_end,      NULL,       NULL,
-                           step,         nptr};
-        gc_chunkqueue_push(mq, &c);
-        obj_end = obj_begin + step * MAX_REFS_AT_ONCE;
+    if ((nptr & 0x2) == 0x2) {
+        // pre-scan this object: most of this object should be old, so look for
+        // the first young object before starting this chunk
+        // (this also would be valid for young objects, but probably less beneficial)
+        for (; obj_begin < obj_end; obj_begin += step) {
+            new_obj = *obj_begin;
+            if (new_obj != NULL) {
+                verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
+                               gc_slot_to_arrayidx(obj_parent, obj_begin));
+                jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                if (!gc_old(o->header))
+                    nptr |= 1;
+                if (!gc_marked(o->header))
+                    break;
+                gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
+            }
+        }
     }
-    for (; obj_begin < obj_end; obj_begin += step) {
+    size_t too_big = (obj_end - obj_begin) / GC_CHUNK_BATCH_SIZE > step; // use this order of operations to avoid idiv
+    jl_value_t **scan_end = obj_end;
+    int pushed_chunk = 0;
+    if (too_big) {
+        scan_end = obj_begin + step * GC_CHUNK_BATCH_SIZE;
+        // case 1: array owner is young, so we won't need to scan through all its elements
+        // to know that we will never need to push it to the remset. it's fine
+        // to create a chunk with "incorrect" `nptr` and push it to the chunk-queue
+        // ASAP in order to expose as much parallelism as possible
+        // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
+        // scanning the array elements
+        if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
+            jl_gc_chunk_t c = {GC_objary_chunk, obj_parent, scan_end, obj_end, NULL, NULL, step, nptr};
+            gc_chunkqueue_push(mq, &c);
+            pushed_chunk = 1;
+        }
+    }
+    for (; obj_begin < scan_end; obj_begin += step) {
         new_obj = *obj_begin;
         if (new_obj != NULL) {
             verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
-                           gc_slot_to_arrayidx(obj_parent, obj_begin));
+                        gc_slot_to_arrayidx(obj_parent, obj_begin));
             gc_try_claim_and_push(mq, new_obj, &nptr);
             gc_heap_snapshot_record_array_edge(obj_parent, &new_obj);
         }
     }
-    gc_mark_push_remset(ptls, obj_parent, nptr);
+    if (too_big) {
+        if (!pushed_chunk) {
+            jl_gc_chunk_t c = {GC_objary_chunk, obj_parent, scan_end, obj_end, NULL, NULL, step, nptr};
+            gc_chunkqueue_push(mq, &c);
+        }
+    }
+    else {
+        gc_mark_push_remset(ptls, obj_parent, nptr);
+    }
 }
 
 // Mark array with 8bit field descriptors
@@ -1906,14 +1944,49 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
     jl_gc_markqueue_t *mq = &ptls->mark_queue;
     jl_value_t *new_obj;
     size_t elsize = ((jl_array_t *)ary8_parent)->elsize / sizeof(jl_value_t *);
-    // Decide whether need to chunk ary8
-    size_t nrefs = (ary8_end - ary8_begin) / elsize;
-    if (nrefs > MAX_REFS_AT_ONCE) {
-        jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, ary8_begin + elsize * MAX_REFS_AT_ONCE,
-                           ary8_end,   elem_begin,  elem_end,
-                           0,          nptr};
-        gc_chunkqueue_push(mq, &c);
-        ary8_end = ary8_begin + elsize * MAX_REFS_AT_ONCE;
+    assert(elsize > 0);
+    // Decide whether need to chunk objary
+    if ((nptr & 0x2) == 0x2) {
+        // pre-scan this object: most of this object should be old, so look for
+        // the first young object before starting this chunk
+        // (this also would be valid for young objects, but probably less beneficial)
+        for (; ary8_begin < ary8_end; ary8_begin += elsize) {
+            int early_end = 0;
+            for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
+                new_obj = ary8_begin[*pindex];
+                if (new_obj != NULL) {
+                    verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
+                                gc_slot_to_arrayidx(ary8_parent, ary8_begin));
+                    jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                    if (!gc_old(o->header))
+                        nptr |= 1;
+                    if (!gc_marked(o->header)){
+                        early_end = 1;
+                        break;
+                    }
+                    gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj);
+                }
+            }
+            if (early_end)
+                break;
+        }
+    }
+    size_t too_big = (ary8_end - ary8_begin) / GC_CHUNK_BATCH_SIZE > elsize; // use this order of operations to avoid idiv
+    jl_value_t **scan_end = ary8_end;
+    int pushed_chunk = 0;
+    if (too_big) {
+        scan_end = ary8_begin + elsize * GC_CHUNK_BATCH_SIZE;
+        // case 1: array owner is young, so we won't need to scan through all its elements
+        // to know that we will never need to push it to the remset. it's fine
+        // to create a chunk with "incorrect" `nptr` and push it to the chunk-queue
+        // ASAP in order to expose as much parallelism as possible
+        // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
+        // scanning the array elements
+        if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, 0, nptr};
+            gc_chunkqueue_push(mq, &c);
+            pushed_chunk = 1;
+        }
     }
     for (; ary8_begin < ary8_end; ary8_begin += elsize) {
         for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
@@ -1926,7 +1999,15 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va
             }
         }
     }
-    gc_mark_push_remset(ptls, ary8_parent, nptr);
+    if (too_big) {
+        if (!pushed_chunk) {
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, 0, nptr};
+            gc_chunkqueue_push(mq, &c);
+        }
+    }
+    else {
+        gc_mark_push_remset(ptls, ary8_parent, nptr);
+    }
 }
 
 // Mark array with 16bit field descriptors
@@ -1937,16 +2018,51 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
     jl_gc_markqueue_t *mq = &ptls->mark_queue;
     jl_value_t *new_obj;
     size_t elsize = ((jl_array_t *)ary16_parent)->elsize / sizeof(jl_value_t *);
-    // Decide whether need to chunk ary16
-    size_t nrefs = (ary16_end - ary16_begin) / elsize;
-    if (nrefs > MAX_REFS_AT_ONCE) {
-        jl_gc_chunk_t c = {GC_ary16_chunk, ary16_parent, ary16_begin + elsize * MAX_REFS_AT_ONCE,
-                           ary16_end,   elem_begin,   elem_end,
-                           0,           nptr};
-        gc_chunkqueue_push(mq, &c);
-        ary16_end = ary16_begin + elsize * MAX_REFS_AT_ONCE;
-    }
-    for (; ary16_begin < ary16_end; ary16_begin += elsize) {
+    assert(elsize > 0);
+    // Decide whether need to chunk objary
+    if ((nptr & 0x2) == 0x2) {
+        // pre-scan this object: most of this object should be old, so look for
+        // the first young object before starting this chunk
+        // (this also would be valid for young objects, but probably less beneficial)
+        for (; ary16_begin < ary16_end; ary16_begin += elsize) {
+            int early_end = 0;
+            for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
+                new_obj = ary16_begin[*pindex];
+                if (new_obj != NULL) {
+                    verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
+                                gc_slot_to_arrayidx(ary16_parent, ary16_begin));
+                    jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                    if (!gc_old(o->header))
+                        nptr |= 1;
+                    if (!gc_marked(o->header)){
+                        early_end = 1;
+                        break;
+                    }
+                    gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj);
+                }
+            }
+            if (early_end)
+                break;
+        }
+    }
+    size_t too_big = (ary16_end - ary16_begin) / GC_CHUNK_BATCH_SIZE > elsize; // use this order of operations to avoid idiv
+    jl_value_t **scan_end = ary16_end;
+    int pushed_chunk = 0;
+    if (too_big) {
+        scan_end = ary16_begin + elsize * GC_CHUNK_BATCH_SIZE;
+        // case 1: array owner is young, so we won't need to scan through all its elements
+        // to know that we will never need to push it to the remset. it's fine
+        // to create a chunk with "incorrect" `nptr` and push it to the chunk-queue
+        // ASAP in order to expose as much parallelism as possible
+        // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
+        // scanning the array elements
+        if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
+            jl_gc_chunk_t c = {GC_ary16_chunk, ary16_parent, scan_end, ary16_end, elem_begin, elem_end, elsize, nptr};
+            gc_chunkqueue_push(mq, &c);
+            pushed_chunk = 1;
+        }
+    }
+    for (; ary16_begin < scan_end; ary16_begin += elsize) {
         for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
             new_obj = ary16_begin[*pindex];
             if (new_obj != NULL) {
@@ -1957,7 +2073,15 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_
             }
         }
     }
-    gc_mark_push_remset(ptls, ary16_parent, nptr);
+    if (too_big) {
+        if (!pushed_chunk) {
+            jl_gc_chunk_t c = {GC_ary16_chunk, ary16_parent, scan_end, ary16_end, elem_begin, elem_end, elsize, nptr};
+            gc_chunkqueue_push(mq, &c);
+        }
+    }
+    else {
+        gc_mark_push_remset(ptls, ary16_parent, nptr);
+    }
 }
 
 // Mark chunk of large array
@@ -2023,6 +2147,8 @@ STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroot
             if (nroots & 1) {
                 void **slot = (void **)gc_read_stack(&rts[i], offset, lb, ub);
                 new_obj = (jl_value_t *)gc_read_stack(slot, offset, lb, ub);
+                if (new_obj == NULL)
+                    continue;
             }
             else {
                 new_obj = (jl_value_t *)gc_read_stack(&rts[i], offset, lb, ub);
@@ -2034,11 +2160,13 @@ STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroot
                 }
                 if (gc_ptr_tag(new_obj, 2))
                     continue;
+                // conservatively check for the presence of any smalltag type, instead of just NULL
+                // in the very unlikely event that codegen decides to root the result of julia.typeof
+                if (new_obj < (jl_value_t*)((uintptr_t)jl_max_tags << 4))
+                    continue;
             }
-            if (new_obj != NULL) {
-                gc_try_claim_and_push(mq, new_obj, NULL);
-                gc_heap_snapshot_record_frame_to_object_edge(s, new_obj);
-            }
+            gc_try_claim_and_push(mq, new_obj, NULL);
+            gc_heap_snapshot_record_frame_to_object_edge(s, new_obj);
         }
         jl_gcframe_t *sprev = (jl_gcframe_t *)gc_read_stack(&s->prev, offset, lb, ub);
         if (sprev == NULL)
@@ -2121,10 +2249,10 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
     jl_value_t *new_obj;
     // Decide whether need to chunk finlist
     size_t nrefs = (fl_end - fl_begin);
-    if (nrefs > MAX_REFS_AT_ONCE) {
-        jl_gc_chunk_t c = {GC_finlist_chunk, NULL, fl_begin + MAX_REFS_AT_ONCE, fl_end, 0, 0, 0, 0};
+    if (nrefs > GC_CHUNK_BATCH_SIZE) {
+        jl_gc_chunk_t c = {GC_finlist_chunk, NULL, fl_begin + GC_CHUNK_BATCH_SIZE, fl_end, 0, 0, 0, 0};
         gc_chunkqueue_push(mq, &c);
-        fl_end = fl_begin + MAX_REFS_AT_ONCE;
+        fl_end = fl_begin + GC_CHUNK_BATCH_SIZE;
     }
     for (; fl_begin < fl_end; fl_begin++) {
         new_obj = *fl_begin;
@@ -2156,14 +2284,14 @@ JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
 {
     int may_claim = gc_try_setmark_tag(jl_astaggedvalue(obj), GC_MARKED);
     if (may_claim)
-        gc_markqueue_push(&ptls->mark_queue, obj);
+        gc_ptr_queue_push(&ptls->mark_queue, obj);
     return may_claim;
 }
 
 JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
                                             jl_value_t **objs, size_t nobjs)
 {
-    uintptr_t nptr = (nobjs << 2) & (jl_astaggedvalue(parent)->bits.gc & 3);
+    uintptr_t nptr = (nobjs << 2) | (jl_astaggedvalue(parent)->bits.gc & 2);
     gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
 }
 
@@ -2180,32 +2308,150 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
             jl_raise_debugger();
     #endif
         jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
-        jl_datatype_t *vt = (jl_datatype_t *)(o->header & ~(uintptr_t)0xf);
+        uintptr_t vtag = o->header & ~(uintptr_t)0xf;
         uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
         int update_meta = __likely(!meta_updated && !gc_verifying);
         int foreign_alloc = 0;
-        if (update_meta && jl_object_in_image(new_obj)) {
+        // directly point at eyt_obj_in_img to encourage inlining
+        if (update_meta && o->bits.in_image) {
             foreign_alloc = 1;
             update_meta = 0;
         }
         // Symbols are always marked
-        assert(vt != jl_symbol_type);
-        if (vt == jl_simplevector_type) {
-            size_t l = jl_svec_len(new_obj);
-            jl_value_t **data = jl_svec_data(new_obj);
-            size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
-            if (update_meta)
-                gc_setmark(ptls, o, bits, dtsz);
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
-            jl_value_t *objary_parent = new_obj;
-            jl_value_t **objary_begin = data;
-            jl_value_t **objary_end = data + l;
-            uint32_t step = 1;
-            uintptr_t nptr = (l << 2) | (bits & GC_OLD);
-            gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
-        }
-        else if (vt->name == jl_array_typename) {
+        assert(vtag != (uintptr_t)jl_symbol_type && vtag != jl_symbol_tag << 4);
+        if (vtag == (jl_datatype_tag << 4) ||
+            vtag == (jl_unionall_tag << 4) ||
+            vtag == (jl_uniontype_tag << 4) ||
+            vtag == (jl_tvar_tag << 4) ||
+            vtag == (jl_vararg_tag << 4)) {
+            // these objects have pointers in them, but no other special handling
+            // so we want these to fall through to the end
+            vtag = (uintptr_t)small_typeof[vtag / sizeof(*small_typeof)];
+        }
+        else if (vtag < jl_max_tags << 4) {
+            // these objects either have specialing handling
+            if (vtag == jl_simplevector_tag << 4) {
+                size_t l = jl_svec_len(new_obj);
+                jl_value_t **data = jl_svec_data(new_obj);
+                size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, dtsz);
+                else if (foreign_alloc)
+                    objprofile_count(jl_simplevector_type, bits == GC_OLD_MARKED, dtsz);
+                jl_value_t *objary_parent = new_obj;
+                jl_value_t **objary_begin = data;
+                jl_value_t **objary_end = data + l;
+                uint32_t step = 1;
+                uintptr_t nptr = (l << 2) | (bits & GC_OLD);
+                gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
+            }
+            else if (vtag == jl_module_tag << 4) {
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, sizeof(jl_module_t));
+                else if (foreign_alloc)
+                    objprofile_count(jl_module_type, bits == GC_OLD_MARKED, sizeof(jl_module_t));
+                jl_module_t *mb_parent = (jl_module_t *)new_obj;
+                jl_svec_t *bindings = jl_atomic_load_relaxed(&mb_parent->bindings);
+                jl_binding_t **table = (jl_binding_t**)jl_svec_data(bindings);
+                size_t bsize = jl_svec_len(bindings);
+                uintptr_t nptr = ((bsize + mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
+                jl_binding_t **mb_begin = table + 1;
+                jl_binding_t **mb_end = table + bsize;
+                gc_mark_module_binding(ptls, mb_parent, mb_begin, mb_end, nptr, bits);
+            }
+            else if (vtag == jl_task_tag << 4) {
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, sizeof(jl_task_t));
+                else if (foreign_alloc)
+                    objprofile_count(jl_task_type, bits == GC_OLD_MARKED, sizeof(jl_task_t));
+                jl_task_t *ta = (jl_task_t *)new_obj;
+                gc_scrub_record_task(ta);
+                if (gc_cblist_task_scanner) {
+                    int16_t tid = jl_atomic_load_relaxed(&ta->tid);
+                    gc_invoke_callbacks(jl_gc_cb_task_scanner_t, gc_cblist_task_scanner,
+                                        (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
+                }
+        #ifdef COPY_STACKS
+                void *stkbuf = ta->stkbuf;
+                if (stkbuf && ta->copy_stack) {
+                    gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                    // For gc_heap_snapshot_record:
+                    // TODO: attribute size of stack
+                    // TODO: edge to stack data
+                    // TODO: synthetic node for stack data (how big is it?)
+                }
+        #endif
+                jl_gcframe_t *s = ta->gcstack;
+                size_t nroots;
+                uintptr_t offset = 0;
+                uintptr_t lb = 0;
+                uintptr_t ub = (uintptr_t)-1;
+        #ifdef COPY_STACKS
+                if (stkbuf && ta->copy_stack && !ta->ptls) {
+                    int16_t tid = jl_atomic_load_relaxed(&ta->tid);
+                    assert(tid >= 0);
+                    jl_ptls_t ptls2 = gc_all_tls_states[tid];
+                    ub = (uintptr_t)ptls2->stackbase;
+                    lb = ub - ta->copy_stack;
+                    offset = (uintptr_t)stkbuf - lb;
+                }
+        #endif
+                if (s != NULL) {
+                    nroots = gc_read_stack(&s->nroots, offset, lb, ub);
+                    gc_heap_snapshot_record_task_to_frame_edge(ta, s);
+                    assert(nroots <= UINT32_MAX);
+                    gc_mark_stack(ptls, s, (uint32_t)nroots, offset, lb, ub);
+                }
+                if (ta->excstack) {
+                    jl_excstack_t *excstack = ta->excstack;
+                    gc_heap_snapshot_record_task_to_frame_edge(ta, excstack);
+                    size_t itr = ta->excstack->top;
+                    gc_setmark_buf_(ptls, excstack, bits,
+                                    sizeof(jl_excstack_t) +
+                                        sizeof(uintptr_t) * excstack->reserved_size);
+                    gc_mark_excstack(ptls, excstack, itr);
+                }
+                const jl_datatype_layout_t *layout = jl_task_type->layout;
+                assert(layout->fielddesc_type == 0);
+                assert(layout->nfields > 0);
+                uint32_t npointers = layout->npointers;
+                char *obj8_parent = (char *)ta;
+                uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
+                uint8_t *obj8_end = obj8_begin + npointers;
+                // assume tasks always reference young objects: set lowest bit
+                uintptr_t nptr = (npointers << 2) | 1 | bits;
+                new_obj = gc_mark_obj8(ptls, obj8_parent, obj8_begin, obj8_end, nptr);
+                if (new_obj != NULL) {
+                    if (!meta_updated)
+                        goto mark_obj;
+                    else
+                        gc_ptr_queue_push(mq, new_obj);
+                }
+            }
+            else if (vtag == jl_string_tag << 4) {
+                size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, dtsz);
+                else if (foreign_alloc)
+                    objprofile_count(jl_string_type, bits == GC_OLD_MARKED, dtsz);
+            }
+            else {
+                jl_datatype_t *vt = small_typeof[vtag / sizeof(*small_typeof)];
+                size_t dtsz = jl_datatype_size(vt);
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, dtsz);
+                else if (foreign_alloc)
+                    objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
+            }
+            return;
+        }
+        else {
+            jl_datatype_t *vt = (jl_datatype_t *)vtag;
+            if (__unlikely(!jl_is_datatype(vt) || vt->smalltag))
+                gc_assert_datatype_fail(ptls, vt, mq);
+        }
+        jl_datatype_t *vt = (jl_datatype_t *)vtag;
+        if (vt->name == jl_array_typename) {
             jl_array_t *a = (jl_array_t *)new_obj;
             jl_array_flags_t flags = a->flags;
             if (update_meta) {
@@ -2294,175 +2540,81 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                     assert(0 && "unimplemented");
                 }
             }
+            return;
         }
-        else if (vt == jl_module_type) {
-            if (update_meta)
-                gc_setmark(ptls, o, bits, sizeof(jl_module_t));
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_module_t));
-            jl_module_t *mb_parent = (jl_module_t *)new_obj;
-            jl_svec_t *bindings = jl_atomic_load_relaxed(&mb_parent->bindings);
-            jl_binding_t **table = (jl_binding_t**)jl_svec_data(bindings);
-            size_t bsize = jl_svec_len(bindings);
-            uintptr_t nptr = ((bsize + mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
-            jl_binding_t **mb_begin = table + 1;
-            jl_binding_t **mb_end = table + bsize;
-            gc_mark_module_binding(ptls, mb_parent, mb_begin, mb_end, nptr, bits);
-        }
-        else if (vt == jl_task_type) {
-            if (update_meta)
-                gc_setmark(ptls, o, bits, sizeof(jl_task_t));
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
-            jl_task_t *ta = (jl_task_t *)new_obj;
-            gc_scrub_record_task(ta);
-            if (gc_cblist_task_scanner) {
-                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
-                gc_invoke_callbacks(jl_gc_cb_task_scanner_t, gc_cblist_task_scanner,
-                                    (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
-            }
-    #ifdef COPY_STACKS
-            void *stkbuf = ta->stkbuf;
-            if (stkbuf && ta->copy_stack) {
-                gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
-                // For gc_heap_snapshot_record:
-                // TODO: attribute size of stack
-                // TODO: edge to stack data
-                // TODO: synthetic node for stack data (how big is it?)
-            }
-    #endif
-            jl_gcframe_t *s = ta->gcstack;
-            size_t nroots;
-            uintptr_t offset = 0;
-            uintptr_t lb = 0;
-            uintptr_t ub = (uintptr_t)-1;
-    #ifdef COPY_STACKS
-            if (stkbuf && ta->copy_stack && !ta->ptls) {
-                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
-                assert(tid >= 0);
-                jl_ptls_t ptls2 = gc_all_tls_states[tid];
-                ub = (uintptr_t)ptls2->stackbase;
-                lb = ub - ta->copy_stack;
-                offset = (uintptr_t)stkbuf - lb;
-            }
-    #endif
-            if (s != NULL) {
-                nroots = gc_read_stack(&s->nroots, offset, lb, ub);
-                gc_heap_snapshot_record_task_to_frame_edge(ta, s);
-                assert(nroots <= UINT32_MAX);
-                gc_mark_stack(ptls, s, (uint32_t)nroots, offset, lb, ub);
-            }
-            if (ta->excstack) {
-                jl_excstack_t *excstack = ta->excstack;
-                gc_heap_snapshot_record_task_to_frame_edge(ta, excstack);
-                size_t itr = ta->excstack->top;
-                gc_setmark_buf_(ptls, excstack, bits,
-                                sizeof(jl_excstack_t) +
-                                    sizeof(uintptr_t) * excstack->reserved_size);
-                gc_mark_excstack(ptls, excstack, itr);
-            }
-            const jl_datatype_layout_t *layout = jl_task_type->layout;
-            assert(layout->fielddesc_type == 0);
-            assert(layout->nfields > 0);
-            uint32_t npointers = layout->npointers;
-            char *obj8_parent = (char *)ta;
+        size_t dtsz = jl_datatype_size(vt);
+        if (update_meta)
+            gc_setmark(ptls, o, bits, dtsz);
+        else if (foreign_alloc)
+            objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
+        if (vt == jl_weakref_type)
+            return;
+        const jl_datatype_layout_t *layout = vt->layout;
+        uint32_t npointers = layout->npointers;
+        if (npointers == 0)
+            return;
+        uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
+        assert((layout->nfields > 0 || layout->fielddesc_type == 3) &&
+               "opaque types should have been handled specially");
+        if (layout->fielddesc_type == 0) {
+            char *obj8_parent = (char *)new_obj;
             uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
             uint8_t *obj8_end = obj8_begin + npointers;
-            // assume tasks always reference young objects: set lowest bit
-            uintptr_t nptr = (npointers << 2) | 1 | bits;
+            assert(obj8_begin < obj8_end);
             new_obj = gc_mark_obj8(ptls, obj8_parent, obj8_begin, obj8_end, nptr);
             if (new_obj != NULL) {
                 if (!meta_updated)
                     goto mark_obj;
                 else
-                    gc_markqueue_push(mq, new_obj);
+                    gc_ptr_queue_push(mq, new_obj);
             }
         }
-        else if (vt == jl_string_type) {
-            size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
-            if (update_meta)
-                gc_setmark(ptls, o, bits, dtsz);
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
-        }
-        else {
-            if (__unlikely(!jl_is_datatype(vt)))
-                gc_assert_datatype_fail(ptls, vt, mq);
-            size_t dtsz = jl_datatype_size(vt);
-            if (update_meta)
-                gc_setmark(ptls, o, bits, dtsz);
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
-            if (vt == jl_weakref_type)
-                return;
-            const jl_datatype_layout_t *layout = vt->layout;
-            uint32_t npointers = layout->npointers;
-            if (npointers == 0)
-                return;
-            uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
-            assert((layout->nfields > 0 || layout->fielddesc_type == 3) &&
-                   "opaque types should have been handled specially");
-            if (layout->fielddesc_type == 0) {
-                char *obj8_parent = (char *)new_obj;
-                uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
-                uint8_t *obj8_end = obj8_begin + npointers;
-                assert(obj8_begin < obj8_end);
-                new_obj = gc_mark_obj8(ptls, obj8_parent, obj8_begin, obj8_end, nptr);
-                if (new_obj != NULL) {
-                    if (!meta_updated)
-                        goto mark_obj;
-                    else
-                        gc_markqueue_push(mq, new_obj);
-                }
-            }
-            else if (layout->fielddesc_type == 1) {
-                char *obj16_parent = (char *)new_obj;
-                uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
-                uint16_t *obj16_end = obj16_begin + npointers;
-                assert(obj16_begin < obj16_end);
-                new_obj = gc_mark_obj16(ptls, obj16_parent, obj16_begin, obj16_end, nptr);
-                if (new_obj != NULL) {
-                    if (!meta_updated)
-                        goto mark_obj;
-                    else
-                        gc_markqueue_push(mq, new_obj);
-                }
-            }
-            else if (layout->fielddesc_type == 2) {
-                // This is very uncommon
-                // Do not do store to load forwarding to save some code size
-                char *obj32_parent = (char *)new_obj;
-                uint32_t *obj32_begin = (uint32_t *)jl_dt_layout_ptrs(layout);
-                uint32_t *obj32_end = obj32_begin + npointers;
-                assert(obj32_begin < obj32_end);
-                new_obj = gc_mark_obj32(ptls, obj32_parent, obj32_begin, obj32_end, nptr);
-                if (new_obj != NULL) {
-                    if (!meta_updated)
-                        goto mark_obj;
-                    else
-                        gc_markqueue_push(mq, new_obj);
-                }
+        else if (layout->fielddesc_type == 1) {
+            char *obj16_parent = (char *)new_obj;
+            uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
+            uint16_t *obj16_end = obj16_begin + npointers;
+            assert(obj16_begin < obj16_end);
+            new_obj = gc_mark_obj16(ptls, obj16_parent, obj16_begin, obj16_end, nptr);
+            if (new_obj != NULL) {
+                if (!meta_updated)
+                    goto mark_obj;
+                else
+                    gc_ptr_queue_push(mq, new_obj);
             }
-            else {
-                assert(layout->fielddesc_type == 3);
-                jl_fielddescdyn_t *desc = (jl_fielddescdyn_t *)jl_dt_layout_fields(layout);
-                int old = jl_astaggedvalue(new_obj)->bits.gc & 2;
-                uintptr_t young = desc->markfunc(ptls, new_obj);
-                if (old && young)
-                    gc_mark_push_remset(ptls, new_obj, young * 4 + 3);
+        }
+        else if (layout->fielddesc_type == 2) {
+            // This is very uncommon
+            // Do not do store to load forwarding to save some code size
+            char *obj32_parent = (char *)new_obj;
+            uint32_t *obj32_begin = (uint32_t *)jl_dt_layout_ptrs(layout);
+            uint32_t *obj32_end = obj32_begin + npointers;
+            assert(obj32_begin < obj32_end);
+            new_obj = gc_mark_obj32(ptls, obj32_parent, obj32_begin, obj32_end, nptr);
+            if (new_obj != NULL) {
+                if (!meta_updated)
+                    goto mark_obj;
+                else
+                    gc_ptr_queue_push(mq, new_obj);
             }
         }
+        else {
+            assert(layout->fielddesc_type == 3);
+            jl_fielddescdyn_t *desc = (jl_fielddescdyn_t *)jl_dt_layout_fields(layout);
+            int old = jl_astaggedvalue(new_obj)->bits.gc & 2;
+            uintptr_t young = desc->markfunc(ptls, new_obj);
+            if (old && young)
+                gc_mark_push_remset(ptls, new_obj, young * 4 + 3);
+        }
     }
 }
 
 // Used in gc-debug
-void gc_mark_loop_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
+void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 {
     while (1) {
-        void *new_obj = (void *)gc_markqueue_pop(&ptls->mark_queue);
+        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->mark_queue);
         // No more objects to mark
-        if (new_obj == NULL) {
-            // TODO: work-stealing comes here...
+        if (__unlikely(new_obj == NULL)) {
             return;
         }
         gc_mark_outrefs(ptls, mq, new_obj, 0);
@@ -2477,21 +2629,277 @@ void gc_drain_own_chunkqueue(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
         c = gc_chunkqueue_pop(mq);
         if (c.cid != GC_empty_chunk) {
             gc_mark_chunk(ptls, mq, &c);
-            gc_mark_loop_(ptls, mq);
+            gc_mark_loop_serial_(ptls, mq);
         }
     } while (c.cid != GC_empty_chunk);
 }
 
-// Main mark loop. Single stack (allocated on the heap) of `jl_value_t *`
+// Main mark loop. Stack (allocated on the heap) of `jl_value_t *`
 // is used to keep track of processed items. Maintaning this stack (instead of
 // native one) avoids stack overflow when marking deep objects and
 // makes it easier to implement parallel marking via work-stealing
-JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls)
+JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
 {
-    gc_mark_loop_(ptls, &ptls->mark_queue);
+    gc_mark_loop_serial_(ptls, &ptls->mark_queue);
     gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
 }
 
+extern int gc_first_tid;
+
+void gc_mark_and_steal(jl_ptls_t ptls)
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq_master = NULL;
+    int master_tid = jl_atomic_load(&gc_master_tid);
+    if (master_tid != -1)
+        mq_master = &gc_all_tls_states[master_tid]->mark_queue;
+    void *new_obj;
+    jl_gc_chunk_t c;
+    pop : {
+        new_obj = gc_ptr_queue_pop(mq);
+        if (new_obj != NULL) {
+            goto mark;
+        }
+        c = gc_chunkqueue_pop(mq);
+        if (c.cid != GC_empty_chunk) {
+            gc_mark_chunk(ptls, mq, &c);
+            goto pop;
+        }
+        goto steal;
+    }
+    mark : {
+        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        goto pop;
+    }
+    // Note that for the stealing heuristics, we try to
+    // steal chunks much more agressively than pointers,
+    // since we know chunks will likely expand into a lot
+    // of work for the mark loop
+    steal : {
+        // Try to steal chunk from random GC thread
+        for (int i = 0; i < 4 * jl_n_gcthreads; i++) {
+            uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_gcthreads;
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            c = gc_chunkqueue_steal_from(mq2);
+            if (c.cid != GC_empty_chunk) {
+                gc_mark_chunk(ptls, mq, &c);
+                goto pop;
+            }
+        }
+        // Sequentially walk GC threads to try to steal chunk
+        for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) {
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+            c = gc_chunkqueue_steal_from(mq2);
+            if (c.cid != GC_empty_chunk) {
+                gc_mark_chunk(ptls, mq, &c);
+                goto pop;
+            }
+        }
+        // Try to steal chunk from master thread
+        if (mq_master != NULL) {
+            c = gc_chunkqueue_steal_from(mq_master);
+            if (c.cid != GC_empty_chunk) {
+                gc_mark_chunk(ptls, mq, &c);
+                goto pop;
+            }
+        }
+        // Try to steal pointer from random GC thread
+        for (int i = 0; i < 4 * jl_n_gcthreads; i++) {
+            uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_gcthreads;
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            new_obj = gc_ptr_queue_steal_from(mq2);
+            if (new_obj != NULL)
+                goto mark;
+        }
+        // Sequentially walk GC threads to try to steal pointer
+        for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) {
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+            new_obj = gc_ptr_queue_steal_from(mq2);
+            if (new_obj != NULL)
+                goto mark;
+        }
+        // Try to steal pointer from master thread
+        if (mq_master != NULL) {
+            new_obj = gc_ptr_queue_steal_from(mq_master);
+            if (new_obj != NULL)
+                goto mark;
+        }
+    }
+}
+
+#define GC_BACKOFF_MIN 4
+#define GC_BACKOFF_MAX 12
+
+void gc_mark_backoff(int *i)
+{
+    if (*i < GC_BACKOFF_MAX) {
+        (*i)++;
+    }
+    for (int j = 0; j < (1 << *i); j++) {
+        jl_cpu_pause();
+    }
+}
+
+void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
+{
+    int backoff = GC_BACKOFF_MIN;
+    if (master) {
+        jl_atomic_store(&gc_master_tid, ptls->tid);
+        // Wake threads up and try to do some work
+        uv_mutex_lock(&gc_threads_lock);
+        jl_atomic_fetch_add(&gc_n_threads_marking, 1);
+        uv_cond_broadcast(&gc_threads_cond);
+        uv_mutex_unlock(&gc_threads_lock);
+        gc_mark_and_steal(ptls);
+        jl_atomic_fetch_add(&gc_n_threads_marking, -1);
+    }
+    while (jl_atomic_load(&gc_n_threads_marking) > 0) {
+        // Try to become a thief while other threads are marking
+        jl_atomic_fetch_add(&gc_n_threads_marking, 1);
+        if (jl_atomic_load(&gc_master_tid) != -1) {
+            gc_mark_and_steal(ptls);
+        }
+        jl_atomic_fetch_add(&gc_n_threads_marking, -1);
+        // Failed to steal
+        gc_mark_backoff(&backoff);
+    }
+}
+
+void gc_mark_loop(jl_ptls_t ptls)
+{
+    if (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled) {
+        gc_mark_loop_serial(ptls);
+    }
+    else {
+        gc_mark_loop_parallel(ptls, 1);
+    }
+}
+
+void gc_mark_loop_barrier(void)
+{
+    jl_atomic_store(&gc_master_tid, -1);
+    while (jl_atomic_load(&gc_n_threads_marking) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+void gc_mark_clean_reclaim_sets(void)
+{
+    // Clean up `reclaim-sets` and reset `top/bottom` of queues
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set;
+        ws_array_t *a = NULL;
+        while ((a = (ws_array_t *)arraylist_pop(reclaim_set2)) != NULL) {
+            free(a->buffer);
+            free(a);
+        }
+    }
+}
+
+// void gc_premark(jl_ptls_t ptls2)
+// {
+//     arraylist_t *remset = ptls2->heap.remset;
+//     ptls2->heap.remset = ptls2->heap.last_remset;
+//     ptls2->heap.last_remset = remset;
+//     ptls2->heap.remset->len = 0;
+//     ptls2->heap.remset_nptr = 0;
+//     // avoid counting remembered objects
+//     // in `perm_scanned_bytes`
+//     size_t len = remset->len;
+//     void **items = remset->items;
+//     for (size_t i = 0; i < len; i++) {
+//         jl_value_t *item = (jl_value_t *)items[i];
+//         objprofile_count(jl_typeof(item), 2, 0);
+//         jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
+//     }
+// }
+
+#ifdef OBJPROFILE
+static htable_t obj_counts[3];
+static htable_t obj_sizes[3];
+void objprofile_count(void *ty, int old, int sz)
+{
+    if (gc_verifying) return;
+    if ((intptr_t)ty <= 0x10) {
+        ty = (void*)jl_buff_tag;
+    }
+    else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
+             jl_typeof(ty) == (jl_value_t*)jl_datatype_type &&
+             ((jl_datatype_t*)ty)->instance) {
+        ty = jl_singleton_tag;
+    }
+    void **bp = ptrhash_bp(&obj_counts[old], ty);
+    if (*bp == HT_NOTFOUND)
+        *bp = (void*)2;
+    else
+        (*((intptr_t*)bp))++;
+    bp = ptrhash_bp(&obj_sizes[old], ty);
+    if (*bp == HT_NOTFOUND)
+        *bp = (void*)(intptr_t)(1 + sz);
+    else
+        *((intptr_t*)bp) += sz;
+}
+
+void objprofile_reset(void)
+{
+    for (int g = 0; g < 3; g++) {
+        htable_reset(&obj_counts[g], 0);
+        htable_reset(&obj_sizes[g], 0);
+    }
+}
+
+static void objprofile_print(htable_t nums, htable_t sizes)
+{
+    for(int i=0; i < nums.size; i+=2) {
+        if (nums.table[i+1] != HT_NOTFOUND) {
+            void *ty = nums.table[i];
+            int num = (intptr_t)nums.table[i + 1] - 1;
+            size_t sz = (uintptr_t)ptrhash_get(&sizes, ty) - 1;
+            static const int ptr_hex_width = 2 * sizeof(void*);
+            if (sz > 2e9) {
+                jl_safe_printf(" %6d : %*.1f GB of (%*p) ",
+                               num, 6, ((double)sz) / 1024 / 1024 / 1024,
+                               ptr_hex_width, ty);
+            }
+            else if (sz > 2e6) {
+                jl_safe_printf(" %6d : %*.1f MB of (%*p) ",
+                               num, 6, ((double)sz) / 1024 / 1024,
+                               ptr_hex_width, ty);
+            }
+            else if (sz > 2e3) {
+                jl_safe_printf(" %6d : %*.1f kB of (%*p) ",
+                               num, 6, ((double)sz) / 1024,
+                               ptr_hex_width, ty);
+            }
+            else {
+                jl_safe_printf(" %6d : %*d  B of (%*p) ",
+                          num, 6, (int)sz, ptr_hex_width, ty);
+            }
+            if (ty == (void*)jl_buff_tag)
+                jl_safe_printf("#<buffer>");
+            else if (ty == jl_malloc_tag)
+                jl_safe_printf("#<malloc>");
+            else if (ty == jl_singleton_tag)
+                jl_safe_printf("#<singletons>");
+            else
+                jl_static_show(JL_STDERR, (jl_value_t*)ty);
+            jl_safe_printf("\n");
+        }
+    }
+}
+
+void objprofile_printall(void)
+{
+    jl_safe_printf("Transient mark :\n");
+    objprofile_print(obj_counts[0], obj_sizes[0]);
+    jl_safe_printf("Perm mark :\n");
+    objprofile_print(obj_counts[1], obj_sizes[1]);
+    jl_safe_printf("Remset :\n");
+    objprofile_print(obj_counts[2], obj_sizes[2]);
+}
+#endif
+
 static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
 {
     jl_task_t *task;
@@ -2571,7 +2979,6 @@ static void gc_mark_roots(jl_gc_markqueue_t *mq)
     }
     gc_try_claim_and_push(mq, jl_all_methods, NULL);
     gc_try_claim_and_push(mq, _jl_debug_method_invalidation, NULL);
-    gc_try_claim_and_push(mq, jl_build_ids, NULL);
     // constants
     gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
     gc_try_claim_and_push(mq, cmpswap_names, NULL);
@@ -2631,48 +3038,104 @@ static void sweep_finalizer_list(arraylist_t *list)
 
 size_t jl_maxrss(void);
 
+extern void objprofile_printall(void);
+extern void objprofile_reset(void);
+
 // Only one thread should be running in this function
 static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 {
     combine_thread_gc_counts(&gc_num);
 
+    // We separate the update of the graph from the update of live_bytes here
+    // so that the sweep shows a downward trend in memory usage.
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);
+
     jl_gc_markqueue_t *mq = &ptls->mark_queue;
 
     uint64_t gc_start_time = jl_hrtime();
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
-    JL_PROBE_GC_MARK_BEGIN();
     uint64_t start_mark_time = jl_hrtime();
-
-    // 1. fix GC bits of objects in the remset.
-    assert(gc_n_threads);
-    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL)
-            gc_premark(ptls2);
-    }
-
-    assert(gc_n_threads);
-    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        if (ptls2 != NULL) {
-            // 2.1. mark every thread local root
-            gc_queue_thread_local(mq, ptls2);
-            // 2.2. mark any managed objects in the backtrace buffer
-            // TODO: treat these as roots for gc_heap_snapshot_record
-            gc_queue_bt_buf(mq, ptls2);
-            // 2.3. mark every object in the `last_remsets` and `rem_binding`
-            gc_queue_remset(ptls, ptls2);
+    JL_PROBE_GC_MARK_BEGIN();
+    {
+        JL_TIMING(GC, GC_Mark);
+
+        // 1. fix GC bits of objects in the remset.
+        assert(gc_n_threads);
+        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 != NULL)
+                gc_premark(ptls2);
+        }
+
+        assert(gc_n_threads);
+        int single_threaded = (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled);
+        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            jl_ptls_t ptls_dest = ptls;
+            jl_gc_markqueue_t *mq_dest = mq;
+            if (!single_threaded) {
+                ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_gcthreads];
+                mq_dest = &ptls_dest->mark_queue;
+            }
+            if (ptls2 != NULL) {
+                // 2.1. mark every thread local root
+                gc_queue_thread_local(mq_dest, ptls2);
+                // 2.2. mark any managed objects in the backtrace buffer
+                // TODO: treat these as roots for gc_heap_snapshot_record
+                gc_queue_bt_buf(mq_dest, ptls2);
+                // 2.3. mark every object in the `last_remsets` and `rem_binding`
+                gc_queue_remset(ptls_dest, ptls2);
+            }
         }
-    }
 
-    // 3. walk roots
-    gc_mark_roots(mq);
-    if (gc_cblist_root_scanner) {
-        gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
-            gc_cblist_root_scanner, (collection));
+        // 3. walk roots
+        gc_mark_roots(mq);
+        if (gc_cblist_root_scanner) {
+            gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
+                gc_cblist_root_scanner, (collection));
+        }
+        gc_mark_loop(ptls);
+        gc_mark_loop_barrier();
+        gc_mark_clean_reclaim_sets();
+
+        // 4. check for objects to finalize
+        clear_weak_refs();
+        // Record the length of the marked list since we need to
+        // mark the object moved to the marked list from the
+        // `finalizer_list` by `sweep_finalizer_list`
+        size_t orig_marked_len = finalizer_list_marked.len;
+        assert(gc_n_threads);
+        for (int i = 0; i < gc_n_threads; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            if (ptls2 != NULL)
+                sweep_finalizer_list(&ptls2->finalizers);
+        }
+        if (prev_sweep_full) {
+            sweep_finalizer_list(&finalizer_list_marked);
+            orig_marked_len = 0;
+        }
+        assert(gc_n_threads);
+        for (int i = 0; i < gc_n_threads; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            if (ptls2 != NULL)
+                gc_mark_finlist(mq, &ptls2->finalizers, 0);
+        }
+        gc_mark_finlist(mq, &finalizer_list_marked, orig_marked_len);
+        // "Flush" the mark stack before flipping the reset_age bit
+        // so that the objects are not incorrectly reset.
+        gc_mark_loop_serial(ptls);
+        // Conservative marking relies on age to tell allocated objects
+        // and freelist entries apart.
+        mark_reset_age = !jl_gc_conservative_gc_support_enabled();
+        // Reset the age and old bit for any unmarked objects referenced by the
+        // `to_finalize` list. These objects are only reachable from this list
+        // and should not be referenced by any old objects so this won't break
+        // the GC invariant.
+        gc_mark_finlist(mq, &to_finalize, 0);
+        gc_mark_loop_serial(ptls);
+        mark_reset_age = 0;
     }
-    gc_mark_loop(ptls);
-    gc_num.since_sweep += gc_num.allocd;
+
     JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
     gc_settime_premark_end();
     gc_time_mark_pause(gc_start_time, scanned_bytes, perm_scanned_bytes);
@@ -2680,51 +3143,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     uint64_t mark_time = end_mark_time - start_mark_time;
     gc_num.mark_time = mark_time;
     gc_num.total_mark_time += mark_time;
-    int64_t actual_allocd = gc_num.since_sweep;
-    // marking is over
-
-    // 4. check for objects to finalize
-    clear_weak_refs();
-    // Record the length of the marked list since we need to
-    // mark the object moved to the marked list from the
-    // `finalizer_list` by `sweep_finalizer_list`
-    size_t orig_marked_len = finalizer_list_marked.len;
-    assert(gc_n_threads);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            sweep_finalizer_list(&ptls2->finalizers);
-    }
-    if (prev_sweep_full) {
-        sweep_finalizer_list(&finalizer_list_marked);
-        orig_marked_len = 0;
-    }
-    assert(gc_n_threads);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            gc_mark_finlist(mq, &ptls2->finalizers, 0);
-    }
-    gc_mark_finlist(mq, &finalizer_list_marked, orig_marked_len);
-    // "Flush" the mark stack before flipping the reset_age bit
-    // so that the objects are not incorrectly reset.
-    gc_mark_loop(ptls);
-    // Conservative marking relies on age to tell allocated objects
-    // and freelist entries apart.
-    mark_reset_age = !jl_gc_conservative_gc_support_enabled();
-    // Reset the age and old bit for any unmarked objects referenced by the
-    // `to_finalize` list. These objects are only reachable from this list
-    // and should not be referenced by any old objects so this won't break
-    // the GC invariant.
-    gc_mark_finlist(mq, &to_finalize, 0);
-    gc_mark_loop(ptls);
-    mark_reset_age = 0;
+    int64_t allocd = gc_num.allocd;
     gc_settime_postmark_end();
+    // marking is over
 
     // Flush everything in mark cache
     gc_sync_all_caches_nolock(ptls);
 
-    int64_t live_sz_ub = live_bytes + actual_allocd;
+    int64_t live_sz_ub = live_bytes + allocd;
     int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
     int64_t estimate_freed = live_sz_ub - live_sz_est;
 
@@ -2734,11 +3160,11 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_stats_big_obj();
     objprofile_printall();
     objprofile_reset();
-    gc_num.total_allocd += gc_num.since_sweep;
+    gc_num.total_allocd += gc_num.allocd;
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
     // 5. next collection decision
-    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
+    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(allocd/10));
     int nptr = 0;
     assert(gc_n_threads);
     for (int i = 0; i < gc_n_threads; i++) {
@@ -2754,16 +3180,18 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
     // update heuristics only if this GC was automatically triggered
     if (collection == JL_GC_AUTO) {
-        if (not_freed_enough) {
-            gc_num.interval = gc_num.interval * 2;
-        }
         if (large_frontier) {
             sweep_full = 1;
+            gc_num.interval = last_long_collect_interval;
+        }
+        if (not_freed_enough || large_frontier) {
+            gc_num.interval = gc_num.interval * 2;
         }
+
         size_t maxmem = 0;
 #ifdef _P64
         // on a big memory machine, increase max_collect_interval to totalmem / nthreads / 2
-        maxmem = total_mem / gc_n_threads / 2;
+        maxmem = total_mem / (gc_n_threads - jl_n_gcthreads) / 2;
 #endif
         if (maxmem < max_collect_interval)
             maxmem = max_collect_interval;
@@ -2773,7 +3201,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         }
     }
 
-
     // If the live data outgrows the suggested max_total_memory
     // we keep going with minimum intervals and full gcs until
     // we either free some space or get an OOM error.
@@ -2792,20 +3219,30 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         // on the first collection after sweep_full, and the current scan
         perm_scanned_bytes = 0;
         promoted_bytes = 0;
+        last_long_collect_interval = gc_num.interval;
     }
     scanned_bytes = 0;
     // 6. start sweeping
     uint64_t start_sweep_time = jl_hrtime();
     JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
-    sweep_weak_refs();
-    sweep_stack_pools();
-    gc_sweep_foreign_objs();
-    gc_sweep_other(ptls, sweep_full);
-    gc_scrub();
-    gc_verify_tags();
-    gc_sweep_pool(sweep_full);
-    if (sweep_full)
-        gc_sweep_perm_alloc();
+    {
+        JL_TIMING(GC, GC_Sweep);
+#ifdef USE_TRACY
+        if (sweep_full) {
+            TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
+            TracyCZoneColor(ctx, 0xFFA500);
+        }
+#endif
+        sweep_weak_refs();
+        sweep_stack_pools();
+        gc_sweep_foreign_objs();
+        gc_sweep_other(ptls, sweep_full);
+        gc_scrub();
+        gc_verify_tags();
+        gc_sweep_pool(sweep_full);
+        if (sweep_full)
+            gc_sweep_perm_alloc();
+    }
     JL_PROBE_GC_SWEEP_END();
 
     uint64_t gc_end_time = jl_hrtime();
@@ -2848,7 +3285,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     _report_gc_finished(pause, gc_num.freed, sweep_full, recollect);
 
     gc_final_pause_end(gc_start_time, gc_end_time);
-    gc_time_sweep_pause(gc_end_time, actual_allocd, live_bytes,
+    gc_time_sweep_pause(gc_end_time, allocd, live_bytes,
                         estimate_freed, sweep_full);
     gc_num.full_sweep += sweep_full;
     uint64_t max_memory = last_live_bytes + gc_num.allocd;
@@ -2856,14 +3293,28 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         gc_num.max_memory = max_memory;
     }
 
-    gc_num.allocd = 0;
     last_live_bytes = live_bytes;
-    live_bytes += -gc_num.freed + gc_num.since_sweep;
+    // Can't call inc_live_bytes here because we already added allocd
+    // to the graph earlier
+    live_bytes += -gc_num.freed + gc_num.allocd;
+    jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
 
     if (collection == JL_GC_AUTO) {
+        //If we aren't freeing enough or are seeing lots and lots of pointers let it increase faster
+        if (!not_freed_enough || large_frontier) {
+            int64_t tot = 2 * (live_bytes + gc_num.allocd) / 3;
+            if (gc_num.interval > tot) {
+                gc_num.interval = tot;
+                last_long_collect_interval = tot;
+            }
         // If the current interval is larger than half the live data decrease the interval
-        int64_t half = live_bytes/2;
-        if (gc_num.interval > half) gc_num.interval = half;
+        }
+        else {
+            int64_t half = (live_bytes / 2);
+            if (gc_num.interval > half)
+                gc_num.interval = half;
+        }
+
         // But never go below default
         if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval;
     }
@@ -2871,12 +3322,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (gc_num.interval + live_bytes > max_total_memory) {
         if (live_bytes < max_total_memory) {
             gc_num.interval = max_total_memory - live_bytes;
+            last_long_collect_interval = max_total_memory - live_bytes;
         }
         else {
             // We can't stay under our goal so let's go back to
             // the minimum interval and hope things get better
             gc_num.interval = default_collect_interval;
-       }
+        }
     }
 
     gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
@@ -2887,7 +3339,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     prev_sweep_full = sweep_full;
     gc_num.pause += !recollect;
     gc_num.total_time += pause;
-    gc_num.since_sweep = 0;
+    gc_num.allocd = 0;
     gc_num.freed = 0;
     if (pause > gc_num.max_pause) {
         gc_num.max_pause = pause;
@@ -2922,7 +3374,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
         jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
         return;
     }
-    JL_TIMING(GC);
+
+    JL_TIMING_SUSPEND(GC, ct);
+    JL_TIMING(GC, GC);
+
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -2947,6 +3402,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     if (duration > gc_num.max_time_to_safepoint)
         gc_num.max_time_to_safepoint = duration;
     gc_num.time_to_safepoint = duration;
+    gc_num.total_time_to_safepoint += duration;
 
     gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
         gc_cblist_pre_gc, (collection));
@@ -2974,6 +3430,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
     if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
+        JL_TIMING(GC, GC_Finalizers);
         run_finalizers(ct);
     }
     JL_PROBE_GC_FINALIZER();
@@ -2991,7 +3448,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
     assert(gc_n_threads);
     for (size_t i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2)
+        if (ptls2 != NULL)
             gc_queue_thread_local(mq, ptls2);
     }
     gc_mark_roots(mq);
@@ -3027,14 +3484,18 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     gc_cache->nbig_obj = 0;
 
     // Initialize GC mark-queue
-    size_t init_size = (1 << 18);
     jl_gc_markqueue_t *mq = &ptls->mark_queue;
-    mq->start = (jl_value_t **)malloc_s(init_size * sizeof(jl_value_t *));
-    mq->current = mq->start;
-    mq->end = mq->start + init_size;
-    size_t cq_init_size = (1 << 14);
-    mq->current_chunk = mq->chunk_start = (jl_gc_chunk_t *)malloc_s(cq_init_size * sizeof(jl_gc_chunk_t));
-    mq->chunk_end = mq->chunk_start + cq_init_size;
+    ws_queue_t *cq = &mq->chunk_queue;
+    ws_array_t *wsa = create_ws_array(GC_CHUNK_QUEUE_INIT_SIZE, sizeof(jl_gc_chunk_t));
+    jl_atomic_store_relaxed(&cq->top, 0);
+    jl_atomic_store_relaxed(&cq->bottom, 0);
+    jl_atomic_store_relaxed(&cq->array, wsa);
+    ws_queue_t *q = &mq->ptr_queue;
+    ws_array_t *wsa2 = create_ws_array(GC_PTR_QUEUE_INIT_SIZE, sizeof(jl_value_t *));
+    jl_atomic_store_relaxed(&q->top, 0);
+    jl_atomic_store_relaxed(&q->bottom, 0);
+    jl_atomic_store_relaxed(&q->array, wsa2);
+    arraylist_new(&mq->reclaim_set, 32);
 
     memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
     jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
@@ -3043,13 +3504,13 @@ void jl_init_thread_heap(jl_ptls_t ptls)
 // System-wide initializations
 void jl_gc_init(void)
 {
-    if (jl_options.heap_size_hint)
-        jl_gc_set_max_memory(jl_options.heap_size_hint);
 
-    JL_MUTEX_INIT(&heapsnapshot_lock);
-    JL_MUTEX_INIT(&finalizers_lock);
+    JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
+    JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
     uv_mutex_init(&gc_cache_lock);
     uv_mutex_init(&gc_perm_lock);
+    uv_mutex_init(&gc_threads_lock);
+    uv_cond_init(&gc_threads_cond);
 
     jl_gc_init_page();
     jl_gc_debug_init();
@@ -3068,14 +3529,16 @@ void jl_gc_init(void)
     uint64_t constrained_mem = uv_get_constrained_memory();
     if (constrained_mem > 0 && constrained_mem < total_mem)
         total_mem = constrained_mem;
+    double percent;
+    if (total_mem < 128e9)
+        percent = total_mem * 2.34375e-12 + 0.6; // 60% at 0 gigs and 90% at 128 to not
+    else                                         // overcommit too much on memory contrained devices
+        percent = 0.9;
+    max_total_memory = total_mem * percent;
 #endif
+    if (jl_options.heap_size_hint)
+        jl_gc_set_max_memory(jl_options.heap_size_hint);
 
-    // We allocate with abandon until we get close to the free memory on the machine.
-    uint64_t free_mem = uv_get_available_memory();
-    uint64_t high_water_mark = free_mem / 10 * 7;  // 70% high water mark
-
-    if (high_water_mark < max_total_memory)
-       max_total_memory = high_water_mark;
     t_start = jl_hrtime();
 }
 
@@ -3282,7 +3745,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             goto valid_object;
         // We know now that the age bit reflects liveness status during
         // the last sweep and that the cell has not been reused since.
-        if (!(meta->ages[obj_id / 8] & (1 << (obj_id % 8)))) {
+        if (!(meta->ages[obj_id / 32] & (1 << (obj_id % 32)))) {
             return NULL;
         }
         // Not a freelist entry, therefore a valid object.
@@ -3298,6 +3761,31 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
     return NULL;
 }
 
+// gc thread function
+void jl_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        uv_mutex_lock(&gc_threads_lock);
+        while (jl_atomic_load(&gc_n_threads_marking) == 0) {
+            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
+        }
+        uv_mutex_unlock(&gc_threads_lock);
+        gc_mark_loop_parallel(ptls, 0);
+    }
+}
+
 // added for MMTk integration
 void enable_collection(void)
 {
@@ -3306,6 +3794,22 @@ void disable_collection(void)
 {
 }
 
+JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
+{
+}
+
+JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
+{
+}
+
+JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT
+{
+}
+
+JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT
+{
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gc.h b/src/gc.h
index 1db0211eb6c68..701c2c769e1b4 100644
--- a/src/gc.h
+++ b/src/gc.h
@@ -10,6 +10,7 @@
 #ifndef JL_GC_H
 #define JL_GC_H
 
+#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
@@ -41,12 +42,29 @@ extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o);
 extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value);
 extern jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz);
 extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize);
-extern void jl_rng_split(uint64_t to[4], uint64_t from[4]);
+extern void jl_rng_split(uint64_t to[JL_RNG_SIZE], uint64_t from[JL_RNG_SIZE]);
 extern void gc_premark(jl_ptls_t ptls2);
 extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
                                  int isaligned, jl_value_t *owner, int8_t can_collect);
 extern size_t jl_array_nbytes(jl_array_t *a);
-extern void objprofile_count(void *ty, int old, int sz);
+
+#ifdef OBJPROFILE
+void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT;
+void objprofile_printall(void);
+void objprofile_reset(void);
+#else
+static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT
+{
+}
+
+static inline void objprofile_printall(void)
+{
+}
+
+static inline void objprofile_reset(void)
+{
+}
+#endif
 
 #define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
 #define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
@@ -69,7 +87,7 @@ extern uint64_t finalizer_rngState[];
 extern int gc_n_threads;
 extern jl_ptls_t* gc_all_tls_states;
 
-// keep in sync with the Julia type of the same name in base/timing.jl
+// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
 typedef struct {
     int64_t     allocd;
     int64_t     deferred_alloc;
@@ -81,7 +99,6 @@ typedef struct {
     uint64_t    freecall;
     uint64_t    total_time;
     uint64_t    total_allocd;
-    uint64_t    since_sweep;
     size_t      interval;
     int         pause;
     int         full_sweep;
@@ -89,6 +106,7 @@ typedef struct {
     uint64_t    max_memory;
     uint64_t    time_to_safepoint;
     uint64_t    max_time_to_safepoint;
+    uint64_t    total_time_to_safepoint;
     uint64_t    sweep_time;
     uint64_t    mark_time;
     uint64_t    total_sweep_time;
@@ -216,26 +234,33 @@ typedef struct {
     jl_alloc_num_t print;
 } jl_gc_debug_env_t;
 
+// Array chunks (work items representing suffixes of
+// large arrays of pointers left to be marked)
+
 typedef enum {
-    GC_empty_chunk,
-    GC_objary_chunk,
-    GC_ary8_chunk,
-    GC_ary16_chunk,
-    GC_finlist_chunk,
+    GC_empty_chunk = 0, // for sentinel representing no items left in chunk queue
+    GC_objary_chunk,    // for chunk of object array
+    GC_ary8_chunk,      // for chunk of array with 8 bit field descriptors
+    GC_ary16_chunk,     // for chunk of array with 16 bit field descriptors
+    GC_finlist_chunk,   // for chunk of finalizer list
 } gc_chunk_id_t;
 
 typedef struct _jl_gc_chunk_t {
     gc_chunk_id_t cid;
-    struct _jl_value_t *parent;
-    struct _jl_value_t **begin;
-    struct _jl_value_t **end;
-    void *elem_begin;
-    void *elem_end;
-    uint32_t step;
-    uintptr_t nptr;
+    struct _jl_value_t *parent; // array owner
+    struct _jl_value_t **begin; // pointer to first element that needs scanning
+    struct _jl_value_t **end;   // pointer to last element that needs scanning
+    void *elem_begin;           // used to scan pointers within objects when marking `ary8` or `ary16`
+    void *elem_end;             // used to scan pointers within objects when marking `ary8` or `ary16`
+    uint32_t step;              // step-size used when marking objarray
+    uintptr_t nptr;             // (`nptr` & 0x1) if array has young element and (`nptr` & 0x2) if array owner is old
 } jl_gc_chunk_t;
 
-#define MAX_REFS_AT_ONCE (1 << 16)
+#define GC_CHUNK_BATCH_SIZE (1 << 16)       // maximum number of references that can be processed
+                                            // without creating a chunk
+
+#define GC_PTR_QUEUE_INIT_SIZE (1 << 18)    // initial size of queue of `jl_value_t *`
+#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14)  // initial size of chunk-queue
 
 // pool page metadata
 typedef struct {
@@ -270,7 +295,7 @@ typedef struct {
     uint16_t fl_end_offset;   // offset of last free object in this page
     uint16_t thread_n;        // thread id of the heap that owns this page
     char *data;
-    uint8_t *ages;
+    uint32_t *ages;
 } jl_gc_pagemeta_t;
 
 // Page layout:
@@ -453,13 +478,17 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE
     *list = hdr;
 }
 
+extern uv_mutex_t gc_threads_lock;
+extern uv_cond_t gc_threads_cond;
+extern _Atomic(int) gc_n_threads_marking;
 void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
 void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin,
                                     jl_value_t **fl_end) JL_NOTSAFEPOINT;
 void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list,
                                    size_t start) JL_NOTSAFEPOINT;
-void gc_mark_loop_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
-void gc_mark_loop(jl_ptls_t ptls);
+void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
+void gc_mark_loop_serial(jl_ptls_t ptls);
+void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
 void sweep_stack_pools(void);
 void jl_gc_debug_init(void);
 
diff --git a/src/gf.c b/src/gf.c
index 894a8a415e002..431443dbbf451 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -27,6 +27,9 @@ extern "C" {
 JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 {
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->in_pure_callback)
+        return ~(size_t)0;
     return jl_atomic_load_acquire(&jl_world_counter);
 }
 
@@ -35,6 +38,36 @@ JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT
     return jl_current_task->world_age;
 }
 
+// Compute the maximum number of times to unroll Varargs{T}, based on
+// m->max_varargs (if specified) or a heuristic based on the maximum
+// number of non-varargs arguments in the provided method table.
+//
+// If provided, `may_increase` is set to 1 if the returned value is
+// heuristic-based and has a chance of increasing in the future.
+static size_t get_max_varargs(
+        jl_method_t *m,
+        jl_methtable_t *kwmt,
+        jl_methtable_t *mt,
+        uint8_t *may_increase) JL_NOTSAFEPOINT
+{
+    size_t max_varargs = 1;
+    if (may_increase != NULL)
+        *may_increase = 0;
+
+    if (m->max_varargs != UINT8_MAX)
+        max_varargs = m->max_varargs;
+    else if (kwmt != NULL && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt) {
+        if (may_increase != NULL)
+            *may_increase = 1; // `max_args` can increase as new methods are inserted
+
+        max_varargs = jl_atomic_load_relaxed(&kwmt->max_args) + 2;
+        if (mt == jl_kwcall_mt)
+            max_varargs += 2;
+        max_varargs -= m->nargs;
+    }
+    return max_varargs;
+}
+
 /// ----- Handling for Julia callbacks ----- ///
 
 JL_DLLEXPORT int8_t jl_is_in_pure_context(void)
@@ -101,17 +134,37 @@ static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
 // get or create the MethodInstance for a specialization
 static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams, jl_method_instance_t *mi_insert)
 {
-    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL)
+    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL && m != jl_opaque_closure_method)
         return jl_atomic_load_relaxed(&m->unspecialized); // handle builtin methods
     jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
     JL_TYPECHK(specializations, datatype, ut);
     uint_t hv = ((jl_datatype_t*)ut)->hash;
-    for (int locked = 0; ; locked++) {
-        jl_array_t *speckeyset = jl_atomic_load_acquire(&m->speckeyset);
-        jl_svec_t *specializations = jl_atomic_load_relaxed(&m->specializations);
-        size_t i = -1, cl = jl_svec_len(specializations);
+    jl_array_t *speckeyset = NULL;
+    jl_value_t *specializations = NULL;
+    size_t i = -1, cl = 0, lastcl;
+    for (int locked = 0; locked < 2; locked++) {
+        if (locked) {
+            if (!sparams) // can't insert without knowing this
+                return NULL;
+            JL_LOCK(&m->writelock);
+        }
+        lastcl = cl;
+        speckeyset = jl_atomic_load_acquire(&m->speckeyset);
+        specializations = jl_atomic_load_relaxed(&m->specializations);
+        if (specializations == (jl_value_t*)jl_emptysvec)
+            continue;
+        if (!jl_is_svec(specializations)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+            if (jl_types_equal(mi->specTypes, type)) {
+                if (locked)
+                    JL_UNLOCK(&m->writelock);
+                return mi;
+            }
+            continue;
+        }
+        cl = jl_svec_len(specializations);
         if (hv) {
-            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv);
+            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, (jl_svec_t*)specializations, hv);
             if (idx != -1) {
                 jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, idx);
                 if (locked)
@@ -122,8 +175,9 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
         else {
             _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
             JL_GC_PUSH1(&specializations); // clang-sa doesn't realize this loop uses specializations
-            for (i = cl; i > 0; i--) {
-                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i - 1]);
+            // the last lastcl-i-1 elements are already checked when locked, so start search with the new elements only
+            for (i += cl - lastcl; i > 0; i--) {
+                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
                 if ((jl_value_t*)mi == jl_nothing)
                     break;
                 if (jl_types_equal(mi->specTypes, type)) {
@@ -133,55 +187,66 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
                     return mi;
                 }
             }
+            // i points to the first unchecked element, or the place to insert
             JL_GC_POP();
         }
-        if (!sparams) // can't insert without knowing this
-            return NULL;
-        if (!locked) {
-            JL_LOCK(&m->writelock);
+    }
+    jl_method_instance_t *mi = mi_insert ? mi_insert : jl_get_specialized(m, type, sparams);
+    if (specializations == (jl_value_t*)jl_emptysvec) {
+        jl_atomic_store_release(&m->specializations, (jl_value_t*)mi);
+        jl_gc_wb(m, mi);
+    }
+    else {
+        JL_GC_PUSH1(&mi);
+        if (!jl_is_svec(specializations)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+            jl_value_t *type = mi->specTypes;
+            jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
+            uint_t hv = ((jl_datatype_t*)ut)->hash;
+            cl = 7;
+            i = cl - 1;
+            specializations = (jl_value_t*)jl_svec_fill(cl, jl_nothing);
+            jl_svecset(specializations, hv ? 0 : i--, mi);
+            jl_atomic_store_release(&m->specializations, specializations);
+            jl_gc_wb(m, specializations);
+            if (hv)
+                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, (jl_svec_t*)specializations);
         }
-        else {
-            if (hv) {
-                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
-                for (i = 0; i < cl; i++) {
-                    jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
-                    if ((jl_value_t*)mi == jl_nothing)
-                        break;
-                    assert(!jl_types_equal(mi->specTypes, type));
-                }
-            }
-            jl_method_instance_t *mi = mi_insert ? mi_insert : jl_get_specialized(m, type, sparams);
-            JL_GC_PUSH1(&mi);
-            if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != jl_nothing) : (i <= 1 || jl_svecref(specializations, i - 2) != jl_nothing)) {
-                size_t ncl = cl < 8 ? 8 : (cl*3)>>1;
-                jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
-                if (i > 0)
-                    memcpy((char*)jl_svec_data(nc), jl_svec_data(specializations), sizeof(void*) * i);
-                for (int j = 0; j < ncl - cl; j++)
-                    jl_svecset(nc, j+i, jl_nothing);
-                if (i < cl)
-                    memcpy((char*)jl_svec_data(nc) + sizeof(void*) * (i + ncl - cl),
-                           (char*)jl_svec_data(specializations) + sizeof(void*) * i,
-                           sizeof(void*) * (cl - i));
-                jl_atomic_store_release(&m->specializations, nc);
-                jl_gc_wb(m, nc);
-                specializations = nc;
-                if (!hv)
-                    i += ncl - cl;
+        if (hv) {
+            _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
+            for (i = 0; i < cl; i++) {
+                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
+                if ((jl_value_t*)mi == jl_nothing)
+                    break;
+                assert(!jl_types_equal(mi->specTypes, type));
             }
+            // i points at the place to insert
+        }
+        if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != jl_nothing) : (i <= 1 || jl_svecref(specializations, i - 2) != jl_nothing)) {
+            size_t ncl = cl < 7 ? 7 : (cl*3)>>1;
+            jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
+            if (i > 0)
+                memcpy((char*)jl_svec_data(nc), jl_svec_data(specializations), sizeof(void*) * i);
+            for (int j = 0; j < ncl - cl; j++)
+                jl_svecset(nc, j+i, jl_nothing);
+            if (i < cl)
+                memcpy((char*)jl_svec_data(nc) + sizeof(void*) * (i + ncl - cl),
+                       (char*)jl_svec_data(specializations) + sizeof(void*) * i,
+                       sizeof(void*) * (cl - i));
+            specializations = (jl_value_t*)nc;
+            jl_atomic_store_release(&m->specializations, specializations);
+            jl_gc_wb(m, specializations);
             if (!hv)
-                i -= 1;
-            assert(jl_svecref(specializations, i) == jl_nothing);
-            jl_svecset(specializations, i, mi); // jl_atomic_store_relaxed?
-            if (hv) {
-                // TODO: fuse lookup and insert steps?
-                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, specializations);
-            }
-            JL_UNLOCK(&m->writelock);
-            JL_GC_POP();
-            return mi;
+                i += ncl - cl;
         }
+        assert(jl_svecref(specializations, i) == jl_nothing);
+        jl_svecset(specializations, i, mi);
+        if (hv)
+            jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, (jl_svec_t*)specializations);
+        JL_GC_POP();
     }
+    JL_UNLOCK(&m->writelock); // may gc
+    return mi;
 }
 
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams)
@@ -207,8 +272,11 @@ JL_DLLEXPORT jl_value_t *jl_specializations_lookup(jl_method_t *m, jl_value_t *t
 
 JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world)
 {
+    // TODO: this is sort of an odd lookup strategy (and the only user of
+    // jl_typemap_assoc_by_type with subtype=0), while normally jl_gf_invoke_lookup would be
+    // expected to be used instead
     struct jl_typemap_assoc search = {type, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
+    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, jl_cachearg_offset(mt), /*subtype*/0);
     if (!sf)
         return jl_nothing;
     return sf->func.value;
@@ -248,7 +316,7 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 
     newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
             (jl_value_t*)m, 1, ~(size_t)0);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
 
     jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)jl_anytuple_type, jl_emptysvec);
     jl_atomic_store_relaxed(&m->unspecialized, mi);
@@ -276,28 +344,30 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 // if inference doesn't occur (or can't finish), returns NULL instead
 jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
 {
-    JL_TIMING(INFERENCE);
     if (jl_typeinf_func == NULL)
         return NULL;
     jl_task_t *ct = jl_current_task;
-    if (ct->reentrant_inference == (uint16_t)-1) {
+    if (ct->reentrant_timing & 0b1000) {
         // We must avoid attempting to re-enter inference here
         assert(0 && "attempted to enter inference while writing out image");
         abort();
     }
-    if (ct->reentrant_inference > 2)
+    // In case we use higher bits later, mask them out
+    if ((ct->reentrant_timing & 0b1111) >= 0b110)
         return NULL;
 
     jl_code_info_t *src = NULL;
 #ifdef ENABLE_INFERENCE
     if (mi->inInference && !force)
         return NULL;
-
+    JL_TIMING(INFERENCE, INFERENCE);
     jl_value_t **fargs;
     JL_GC_PUSHARGS(fargs, 3);
     fargs[0] = (jl_value_t*)jl_typeinf_func;
     fargs[1] = (jl_value_t*)mi;
     fargs[2] = jl_box_ulong(world);
+
+    jl_timing_show_method_instance(mi, JL_TIMING_CURRENT_BLOCK);
 #ifdef TRACE_INFERENCE
     if (mi->specTypes != (jl_value_t*)jl_emptytuple_type) {
         jl_printf(JL_STDERR,"inference on ");
@@ -312,7 +382,14 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
     mi->inInference = 1;
-    ct->reentrant_inference++;
+    // first bit is for reentrant timing,
+    // so adding 1 to the bit above performs
+    // inference reentrancy counter addition.
+    // Note that this is only safe because
+    // the counter varies from 0-3; if we
+    // increase that limit, we'll need to
+    // allocate another bit for the counter.
+    ct->reentrant_timing += 0b10;
     JL_TRY {
         src = (jl_code_info_t*)jl_apply(fargs, 3);
     }
@@ -333,7 +410,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         src = NULL;
     }
     ct->world_age = last_age;
-    ct->reentrant_inference--;
+    ct->reentrant_timing -= 0b10;
     mi->inInference = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
@@ -345,6 +422,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     }
     JL_GC_POP();
 #endif
+
     return src;
 }
 
@@ -364,13 +442,14 @@ JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *mi, size_t mi
     while (codeinst) {
         if (codeinst->min_world <= min_world && max_world <= codeinst->max_world) {
             jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
-            if (code && (code == jl_nothing || jl_ir_flag_inferred((jl_array_t*)code)))
+            if (code && (code == jl_nothing || jl_ir_flag_inferred(code)))
                 return (jl_value_t*)codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     return (jl_value_t*)jl_nothing;
 }
+JL_DLLEXPORT jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT = jl_rettype_inferred;
 
 
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
@@ -414,13 +493,13 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
-    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
+    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     if ((const_flags & 1) != 0) {
         assert(const_flags & 2);
         jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
     }
-    codeinst->isspecsig = 0;
+    jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
     jl_atomic_store_relaxed(&codeinst->precompile, 0);
     jl_atomic_store_relaxed(&codeinst->next, NULL);
     codeinst->ipo_purity_bits = ipo_effects;
@@ -450,9 +529,19 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
 
 static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
 {
-    jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
-    size_t i, l = jl_svec_len(specializations);
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
+    if (specializations == (jl_value_t*)jl_emptysvec)
+        return 1;
+    if (!jl_is_svec(specializations)) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+        assert(jl_is_method_instance(mi));
+        if (jl_rettype_inferred(mi, world, world) == jl_nothing)
+            jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
+        return 1;
+    }
+    size_t i, l = jl_svec_len(specializations);
+    JL_GC_PUSH1(&specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
@@ -461,6 +550,7 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
                 jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
         }
     }
+    JL_GC_POP();
     return 1;
 }
 
@@ -642,8 +732,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams)
                 int T_has_tv = T && jl_has_typevar(T, v);
                 int N_has_tv = N && jl_has_typevar(N, v); // n.b. JL_VARARG_UNBOUND check means this should be false
                 assert(!N_has_tv || N == (jl_value_t*)v);
-                if (T_has_tv)
-                    vm = jl_type_unionall(v, T);
+                vm = T_has_tv ? jl_type_unionall(v, T) : T;
                 if (N_has_tv)
                     N = NULL;
                 vm = (jl_value_t*)jl_wrap_vararg(vm, N); // this cannot throw for these inputs
@@ -668,13 +757,14 @@ static void jl_compilation_sig(
     jl_tupletype_t *const tt, // the original tupletype of the call (or DataType from precompile)
     jl_svec_t *sparams,
     jl_method_t *definition,
-    intptr_t nspec,
+    intptr_t max_varargs,
     // output:
     jl_svec_t **const newparams JL_REQUIRE_ROOTED_SLOT)
 {
     assert(jl_is_tuple_type(tt));
     jl_value_t *decl = definition->sig;
     size_t nargs = definition->nargs; // == jl_nparams(jl_unwrap_unionall(decl));
+    size_t nspec = max_varargs + nargs;
 
     if (definition->generator) {
         // staged functions aren't optimized
@@ -710,7 +800,8 @@ static void jl_compilation_sig(
     case JL_VARARG_UNBOUND:
         if (np < nspec && jl_is_va_tuple(tt))
             // there are insufficient given parameters for jl_isa_compileable_sig now to like this type
-            // (there were probably fewer methods defined when we first selected this signature)
+            // (there were probably fewer methods defined when we first selected this signature, or
+            //  the max varargs limit was not reached indicating the type is already fully-specialized)
             return;
         break;
     }
@@ -863,7 +954,13 @@ static void jl_compilation_sig(
     // and the types we find should be bigger.
     if (np >= nspec && jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND) {
         if (!*newparams) *newparams = tt->parameters;
-        type_i = jl_svecref(*newparams, nspec - 2);
+        if (max_varargs > 0) {
+            type_i = jl_svecref(*newparams, nspec - 2);
+        } else {
+            // If max varargs is zero, always specialize to (Any...) since
+            // there is no preceding parameter to use for `type_i`
+            type_i = jl_bottom_type;
+        }
         // if all subsequent arguments are subtypes of type_i, specialize
         // on that instead of decl. for example, if decl is
         // (Any...)
@@ -932,18 +1029,16 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
     // supertype of any other method signatures. so far we are conservative
     // and the types we find should be bigger.
     if (definition->isva) {
-        unsigned nspec_min = nargs + 1; // min number of non-vararg values before vararg
-        unsigned nspec_max = INT32_MAX; // max number of non-vararg values before vararg
+        unsigned nspec_min = nargs + 1; // min number of arg values (including tail vararg)
+        unsigned nspec_max = INT32_MAX; // max number of arg values (including tail vararg)
         jl_methtable_t *mt = jl_method_table_for(decl);
         jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(decl) : mt;
         if ((jl_value_t*)mt != jl_nothing) {
             // try to refine estimate of min and max
-            if (kwmt != NULL && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt)
-                // new methods may be added, increasing nspec_min later
-                nspec_min = jl_atomic_load_relaxed(&kwmt->max_args) + 2 + 2 * (mt == jl_kwcall_mt);
-            else
-                // nspec is always nargs+1, regardless of the other contents of these mt
-                nspec_max = nspec_min;
+            uint8_t heuristic_used = 0;
+            nspec_max = nspec_min = nargs + get_max_varargs(definition, kwmt, mt, &heuristic_used);
+            if (heuristic_used)
+                nspec_max = INT32_MAX; // new methods may be added, increasing nspec_min later
         }
         int isunbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND);
         if (jl_is_vararg(jl_tparam(type, np - 1))) {
@@ -1168,10 +1263,10 @@ static jl_method_instance_t *cache_method(
     int cache_with_orig = 1;
     jl_tupletype_t *compilationsig = tt;
     jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(definition->sig) : mt;
-    intptr_t nspec = (kwmt == NULL || kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || kwmt == jl_kwcall_mt ? definition->nargs + 1 : jl_atomic_load_relaxed(&kwmt->max_args) + 2 + 2 * (mt == jl_kwcall_mt));
-    jl_compilation_sig(tt, sparams, definition, nspec, &newparams);
+    intptr_t max_varargs = get_max_varargs(definition, kwmt, mt, NULL);
+    jl_compilation_sig(tt, sparams, definition, max_varargs, &newparams);
     if (newparams) {
-        temp2 = (jl_value_t*)jl_apply_tuple_type(newparams);
+        temp2 = jl_apply_tuple_type(newparams);
         // Now there may be a problem: the widened signature is more general
         // than just the given arguments, so it might conflict with another
         // definition that does not have cache instances yet. To fix this, we
@@ -1195,6 +1290,8 @@ static jl_method_instance_t *cache_method(
     }
     // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, sparams, definition));
     newmeth = jl_specializations_get_linfo(definition, (jl_value_t*)compilationsig, sparams);
+    if (newmeth->cache_with_orig)
+        cache_with_orig = 1;
 
     jl_tupletype_t *cachett = tt;
     jl_svec_t* guardsigs = jl_emptysvec;
@@ -1261,6 +1358,10 @@ static jl_method_instance_t *cache_method(
             max_valid = max_valid2;
             cachett = compilationsig;
         }
+        else {
+            // do not revisit this decision
+            newmeth->cache_with_orig = 1;
+        }
     }
 
     // now scan `cachett` and ensure that `Type{T}` in the cache will be matched exactly by `typeof(T)`
@@ -1288,7 +1389,7 @@ static jl_method_instance_t *cache_method(
         }
     }
     if (newparams) {
-        simplett = jl_apply_tuple_type(newparams);
+        simplett = (jl_datatype_t*)jl_apply_tuple_type(newparams);
         temp2 = (jl_value_t*)simplett;
     }
 
@@ -1374,24 +1475,29 @@ struct matches_env {
     struct typemap_intersection_env match;
     jl_typemap_entry_t *newentry;
     jl_value_t *shadowed;
+    jl_typemap_entry_t *replaced;
 };
+
 static int get_intersect_visitor(jl_typemap_entry_t *oldentry, struct typemap_intersection_env *closure0)
 {
     struct matches_env *closure = container_of(closure0, struct matches_env, match);
-    if (oldentry == closure->newentry)
-        return 1;
-    if (oldentry->max_world < ~(size_t)0 || oldentry->min_world == closure->newentry->min_world)
-        // skip if no world has both active
-        // also be careful not to try to scan something from the current dump-reload though
-        return 1;
+    assert(oldentry != closure->newentry && "entry already added");
+    assert(oldentry->min_world <= closure->newentry->min_world && "old method cannot be newer than new method");
+    assert(oldentry->max_world == ~(size_t)0 && "method cannot be added at the same time as method deleted");
+    // don't need to consider other similar methods if this oldentry will always fully intersect with them and dominates all of them
+    typemap_slurp_search(oldentry, &closure->match);
     jl_method_t *oldmethod = oldentry->func.method;
+    if (closure->match.issubty // e.g. jl_subtype(closure->newentry.sig, oldentry->sig)
+        && jl_subtype(oldmethod->sig, (jl_value_t*)closure->newentry->sig)) { // e.g. jl_type_equal(closure->newentry->sig, oldentry->sig)
+        closure->replaced = oldentry;
+    }
     if (closure->shadowed == NULL)
         closure->shadowed = (jl_value_t*)jl_alloc_vec_any(0);
     jl_array_ptr_1d_push((jl_array_t*)closure->shadowed, (jl_value_t*)oldmethod);
     return 1;
 }
 
-static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t *newentry)
+static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t *newentry, jl_typemap_entry_t **replaced, int8_t offs, size_t world)
 {
     jl_tupletype_t *type = newentry->sig;
     jl_tupletype_t *ttypes = (jl_tupletype_t*)jl_unwrap_unionall((jl_value_t*)type);
@@ -1404,11 +1510,16 @@ static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t
         else
             va = NULL;
     }
-    struct matches_env env = {{get_intersect_visitor, (jl_value_t*)type, va,
+    // search for all intersecting methods active in the previous world, to determine the changes needed to be made for the next world
+    struct matches_env env = {{get_intersect_visitor, (jl_value_t*)type, va, /* .search_slurp = */ 0,
+            /* .min_valid = */ world, /* .max_valid = */ world,
             /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
-        /* .newentry = */ newentry, /* .shadowed */ NULL};
+        /* .newentry = */ newentry, /* .shadowed */ NULL, /* .replaced */ NULL};
     JL_GC_PUSH3(&env.match.env, &env.match.ti, &env.shadowed);
-    jl_typemap_intersection_visitor(defs, 0, &env.match);
+    jl_typemap_intersection_visitor(defs, offs, &env.match);
+    env.match.env = NULL;
+    env.match.ti = NULL;
+    *replaced = env.replaced;
     JL_GC_POP();
     return env.shadowed;
 }
@@ -1531,6 +1642,7 @@ static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
 // recursively invalidate cached methods that had an edge to a replaced method
 static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
 {
+    jl_timing_counter_inc(JL_TIMING_COUNTER_Invalidations, 1);
     if (_jl_debug_method_invalidation) {
         jl_value_t *boxeddepth = NULL;
         JL_GC_PUSH1(&boxeddepth);
@@ -1605,31 +1717,32 @@ JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee,
     JL_LOCK(&callee->def.method->writelock);
     if (invokesig == jl_nothing)
         invokesig = NULL;      // julia uses `nothing` but C uses NULL (#undef)
+    int found = 0;
+    // TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
     if (!callee->backedges) {
         // lazy-init the backedges array
         callee->backedges = jl_alloc_vec_any(0);
         jl_gc_wb(callee, callee->backedges);
-        push_edge(callee->backedges, invokesig, caller);
     }
     else {
         size_t i = 0, l = jl_array_len(callee->backedges);
-        int found = 0;
-        jl_value_t *invokeTypes;
-        jl_method_instance_t *mi;
-        while (i < l) {
-            i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
-            // TODO: it would be better to canonicalize (how?) the Tuple-type so
-            // that we don't have to call `jl_egal`
-            if (mi == caller && ((invokesig == NULL && invokeTypes == NULL) ||
-                                 (invokesig && invokeTypes && jl_egal(invokesig, invokeTypes)))) {
+        for (i = 0; i < l; i++) {
+            // optimized version of while (i < l) i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
+            jl_value_t *mi = jl_array_ptr_ref(callee->backedges, i);
+            if (mi != (jl_value_t*)caller)
+                continue;
+            jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(callee->backedges, i - 1) : NULL;
+            if (invokeTypes && jl_is_method_instance(invokeTypes))
+                invokeTypes = NULL;
+            if ((invokesig == NULL && invokeTypes == NULL) ||
+                (invokesig && invokeTypes && jl_types_equal(invokesig, invokeTypes))) {
                 found = 1;
                 break;
             }
         }
-        if (!found) {
-            push_edge(callee->backedges, invokesig, caller);
-        }
     }
+    if (!found)
+        push_edge(callee->backedges, invokesig, caller);
     JL_UNLOCK(&callee->def.method->writelock);
 }
 
@@ -1645,6 +1758,7 @@ JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *t
         jl_array_ptr_set(mt->backedges, 1, caller);
     }
     else {
+        // TODO: use jl_cache_type_(tt) like cache_method does, instead of a linear scan
         size_t i, l = jl_array_len(mt->backedges);
         for (i = 1; i < l; i += 2) {
             if (jl_types_equal(jl_array_ptr_ref(mt->backedges, i - 1), typ)) {
@@ -1683,6 +1797,22 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
                 break;
             }
         }
+        if (intersects && (jl_value_t*)oldentry->sig != mi->specTypes) {
+            // the entry may point to a widened MethodInstance, in which case it is worthwhile to check if the new method
+            // actually has any meaningful intersection with the old one
+            intersects = !jl_has_empty_intersection((jl_value_t*)oldentry->sig, (jl_value_t*)env->newentry->sig);
+        }
+        if (intersects && oldentry->guardsigs != jl_emptysvec) {
+            // similarly, if it already matches an existing guardsigs, this is already safe to keep
+            size_t i, l;
+            for (i = 0, l = jl_svec_len(oldentry->guardsigs); i < l; i++) {
+                // see corresponding code in jl_typemap_entry_assoc_exact
+                if (jl_subtype((jl_value_t*)env->newentry->sig, jl_svecref(oldentry->guardsigs, i))) {
+                    intersects = 0;
+                    break;
+                }
+            }
+        }
         if (intersects) {
             if (_jl_debug_method_invalidation) {
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
@@ -1728,8 +1858,9 @@ static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_RO
 }
 #endif
 
-static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, jl_method_t *method, size_t max_world)
+static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, size_t max_world)
 {
+    jl_method_t *method = methodentry->func.method;
     assert(!method->is_for_opaque_closure);
     method->deleted_world = methodentry->max_world = max_world;
     // drop this method from mt->cache
@@ -1753,16 +1884,22 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     }
     // Invalidate the backedges
     int invalidated = 0;
-    jl_svec_t *specializations = jl_atomic_load_relaxed(&methodentry->func.method->specializations);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    JL_GC_PUSH1(&specializations);
+    if (!jl_is_svec(specializations))
+        specializations = (jl_value_t*)jl_svec1(specializations);
     l = jl_svec_len(specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
             invalidated = 1;
-            invalidate_external(mi, methodentry->max_world);
-            invalidate_backedges(&do_nothing_with_codeinst, mi, methodentry->max_world, "jl_method_table_disable");
+            invalidate_external(mi, max_world);
+            invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_disable");
         }
     }
+    JL_GC_POP();
+    // XXX: this might have resolved an ambiguity, for which we have not tracked the edge here,
+    // and thus now introduce a mistake into inference
     if (invalidated && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
         jl_value_t *loctag = jl_cstr_to_string("jl_method_table_disable");
@@ -1781,11 +1918,11 @@ JL_DLLEXPORT void jl_method_table_disable(jl_methtable_t *mt, jl_method_t *metho
     JL_LOCK(&mt->writelock);
     // Narrow the world age on the method to make it uncallable
     size_t world = jl_atomic_fetch_add(&jl_world_counter, 1);
-    jl_method_table_invalidate(mt, methodentry, method, world);
+    jl_method_table_invalidate(mt, methodentry, world);
     JL_UNLOCK(&mt->writelock);
 }
 
-static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect, jl_value_t **isect2)
+static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect JL_REQUIRE_ROOTED_SLOT, jl_value_t **isect2 JL_REQUIRE_ROOTED_SLOT)
 {
     *isect2 = NULL;
     int is_subty = 0;
@@ -1811,11 +1948,45 @@ static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **is
     return 1;
 }
 
+enum morespec_options {
+    morespec_unknown,
+    morespec_isnot,
+    morespec_is
+};
+
+// check if `type` is replacing `m` with an ambiguity here, given other methods in `d` that already match it
+static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_t *const *d, size_t n, jl_value_t *isect, jl_value_t *isect2, char *morespec)
+{
+    size_t k;
+    for (k = 0; k < n; k++) {
+        jl_method_t *m2 = d[k];
+        // see if m2 also fully covered this intersection
+        if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect2 && jl_subtype(isect2, m2->sig))))
+            continue;
+        if (morespec[k] == (char)morespec_unknown)
+            morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
+        if (morespec[k] == (char)morespec_is)
+            // not actually shadowing this--m2 will still be better
+            return 0;
+        // if type is not more specific than m (thus now dominating it)
+        // then there is a new ambiguity here,
+        // since m2 was also a previous match over isect,
+        // see if m was previously dominant over all m2
+        // or if this was already ambiguous before
+        if (ambig != morespec_is && !jl_type_morespecific(m->sig, m2->sig)) {
+            // m and m2 were previously ambiguous over the full intersection of mi with type, and will still be ambiguous with addition of type
+            return 0;
+        }
+    }
+    return 1;
+}
+
 JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
 {
-    JL_TIMING(ADD_METHOD);
+    JL_TIMING(ADD_METHOD, ADD_METHOD);
     assert(jl_is_method(method));
     assert(jl_is_mtable(mt));
+    jl_timing_show_method(method, JL_TIMING_CURRENT_BLOCK);
     jl_value_t *type = method->sig;
     jl_value_t *oldvalue = NULL;
     jl_array_t *oldmi = NULL;
@@ -1829,23 +2000,22 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     jl_typemap_entry_t *newentry = NULL;
     JL_GC_PUSH7(&oldvalue, &oldmi, &newentry, &loctag, &isect, &isect2, &isect3);
     JL_LOCK(&mt->writelock);
-    // first find if we have an existing entry to delete
-    struct jl_typemap_assoc search = {(jl_value_t*)type, method->primary_world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *oldentry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
-    // then add our new entry
+    // add our new entry
     newentry = jl_typemap_alloc((jl_tupletype_t*)type, simpletype, jl_emptysvec,
             (jl_value_t*)method, method->primary_world, method->deleted_world);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
-    if (oldentry) {
-        jl_method_t *m = oldentry->func.method;
-        method_overwrite(newentry, m);
-        jl_method_table_invalidate(mt, oldentry, m, max_world);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
+    jl_typemap_entry_t *replaced = NULL;
+    // then check what entries we replaced
+    oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry, &replaced, jl_cachearg_offset(mt), max_world);
+    int invalidated = 0;
+    if (replaced) {
+        oldvalue = (jl_value_t*)replaced;
+        invalidated = 1;
+        method_overwrite(newentry, replaced->func.method);
+        jl_method_table_invalidate(mt, replaced, max_world);
     }
     else {
-        oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry);
-
-        int invalidated = 0;
-        jl_method_t **d;
+        jl_method_t *const *d;
         size_t j, n;
         if (oldvalue == NULL) {
             d = NULL;
@@ -1874,6 +2044,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                     //    -> less specific or ambiguous with any one of them: can ignore the missing edge (not missing)
                     //      -> some may have been ambiguous: still are
                     //      -> some may have been called: they may be partly replaced (will be detected in the loop later)
+                    // c.f. `is_replacing`, which is a similar query, but with an existing method match to compare against
                     missing = 1;
                     size_t j;
                     for (j = 0; j < n; j++) {
@@ -1908,20 +2079,23 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
         }
         if (oldvalue) {
             oldmi = jl_alloc_vec_any(0);
-            enum morespec_options {
-                morespec_unknown,
-                morespec_isnot,
-                morespec_is
-            };
             char *morespec = (char*)alloca(n);
             memset(morespec, morespec_unknown, n);
             for (j = 0; j < n; j++) {
                 jl_method_t *m = d[j];
                 if (morespec[j] == (char)morespec_is)
                     continue;
-                jl_svec_t *specializations = jl_atomic_load_relaxed(&m->specializations);
-                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
-                size_t i, l = jl_svec_len(specializations);
+                loctag = jl_atomic_load_relaxed(&m->specializations); // use loctag for a gcroot
+                _Atomic(jl_method_instance_t*) *data;
+                size_t i, l;
+                if (jl_is_svec(loctag)) {
+                    data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(loctag);
+                    l = jl_svec_len(loctag);
+                }
+                else {
+                    data = (_Atomic(jl_method_instance_t*)*) &loctag;
+                    l = 1;
+                }
                 enum morespec_options ambig = morespec_unknown;
                 for (i = 0; i < l; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
@@ -1929,6 +2103,11 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                         continue;
                     isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
                     if (jl_type_intersection2(type, isect3, &isect, &isect2)) {
+                        // TODO: this only checks pair-wise for ambiguities, but the ambiguities could arise from the interaction of multiple methods
+                        // and thus might miss a case where we introduce an ambiguity between two existing methods
+                        // We could instead work to sort this into 3 groups `morespecific .. ambiguous .. lesspecific`, with `type` in ambiguous,
+                        // such that everything in `morespecific` dominates everything in `ambiguous`, and everything in `ambiguous` dominates everything in `lessspecific`
+                        // And then compute where each isect falls, and whether it changed group--necessitating invalidation--or not.
                         if (morespec[j] == (char)morespec_unknown)
                             morespec[j] = (char)(jl_type_morespecific(m->sig, type) ? morespec_is : morespec_isnot);
                         if (morespec[j] == (char)morespec_is)
@@ -1937,68 +2116,49 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                         if (ambig == morespec_unknown)
                             ambig = jl_type_morespecific(type, m->sig) ? morespec_is : morespec_isnot;
                         // replacing a method--see if this really was the selected method previously
-                        // over the intersection
-                        if (ambig == morespec_isnot)  {
-                            size_t k;
-                            for (k = 0; k < n; k++) {
-                                jl_method_t *m2 = d[k];
-                                if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect && jl_subtype(isect, m2->sig))))
-                                    continue;
-                                if (morespec[k] == (char)morespec_unknown)
-                                    morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
-                                if (morespec[k] == (char)morespec_is)
-                                    // not actually shadowing this--m2 will still be better
-                                    break;
-                                // since m2 was also a previous match over isect,
-                                // see if m was also previously dominant over all m2
-                                if (!jl_type_morespecific(m->sig, m2->sig))
-                                    break;
-                            }
-                            if (k != n)
-                                continue;
-                        }
-                        // Before deciding whether to invalidate `mi`, check each backedge for `invoke`s
-                        if (mi->backedges) {
-                            jl_array_t *backedges = mi->backedges;
+                        // over the intersection (not ambiguous) and the new method will be selected now (morespec_is)
+                        int replaced_dispatch = is_replacing(ambig, type, m, d, n, isect, isect2, morespec);
+                        // found that this specialization dispatch got replaced by m
+                        // call invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_insert");
+                        // but ignore invoke-type edges
+                        jl_array_t *backedges = mi->backedges;
+                        if (backedges) {
                             size_t ib = 0, insb = 0, nb = jl_array_len(backedges);
                             jl_value_t *invokeTypes;
                             jl_method_instance_t *caller;
                             while (ib < nb) {
                                 ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
-                                if (!invokeTypes) {
-                                    // ordinary dispatch, invalidate
+                                int replaced_edge;
+                                if (invokeTypes) {
+                                    // n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
+                                    replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
+                                }
+                                else {
+                                    replaced_edge = replaced_dispatch;
+                                }
+                                if (replaced_edge) {
                                     invalidate_method_instance(&do_nothing_with_codeinst, caller, max_world, 1);
                                     invalidated = 1;
-                                } else {
-                                    // invoke-dispatch, check invokeTypes for validity
-                                    struct jl_typemap_assoc search = {invokeTypes, method->primary_world, NULL, 0, ~(size_t)0};
-                                    oldentry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
-                                    if (oldentry && oldentry->func.method == mi->def.method) {
-                                        // We can safely keep this method
-                                        jl_array_ptr_set(backedges, insb++, invokeTypes);
-                                        jl_array_ptr_set(backedges, insb++, caller);
-                                    } else {
-                                        invalidate_method_instance(&do_nothing_with_codeinst, caller, max_world, 1);
-                                        invalidated = 1;
-                                    }
+                                }
+                                else {
+                                    insb = set_next_edge(backedges, insb, invokeTypes, caller);
                                 }
                             }
                             jl_array_del_end(backedges, nb - insb);
                         }
-                        if (!mi->backedges || jl_array_len(mi->backedges) == 0) {
-                            jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
-                            invalidate_external(mi, max_world);
-                            if (mi->backedges) {
-                                invalidated = 1;
-                                invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_insert");
-                            }
+                        jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
+                        invalidate_external(mi, max_world);
+                        if (_jl_debug_method_invalidation && invalidated) {
+                            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
+                            loctag = jl_cstr_to_string("jl_method_table_insert");
+                            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                         }
                     }
                 }
             }
             if (jl_array_len(oldmi)) {
                 // search mt->cache and leafcache and drop anything that might overlap with the new method
-                // TODO: keep track of just the `mi` for which shadowing was true (to avoid recomputing that here)
+                // this is very cheap, so we don't mind being fairly conservative at over-approximating this
                 struct invalidate_mt_env mt_cache_env;
                 mt_cache_env.max_world = max_world;
                 mt_cache_env.shadowed = oldmi;
@@ -2019,13 +2179,13 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 }
             }
         }
-        if (invalidated && _jl_debug_method_invalidation) {
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
-            loctag = jl_cstr_to_string("jl_method_table_insert");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-        }
-        update_max_args(mt, type);
     }
+    if (invalidated && _jl_debug_method_invalidation) {
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
+        loctag = jl_cstr_to_string("jl_method_table_insert");
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+    }
+    update_max_args(mt, type);
     JL_UNLOCK(&mt->writelock);
     JL_GC_POP();
 }
@@ -2106,7 +2266,6 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig)
 {
-    JL_TIMING(METHOD_MATCH);
     if (ambig != NULL)
         *ambig = 0;
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
@@ -2218,12 +2377,33 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 mi, codeinst2->rettype,
                 codeinst2->min_world, codeinst2->max_world);
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
-            // once set, don't change invoke-ptr, as that leads to race conditions
-            // with the (not) simultaneous updates to invoke and specptr
-            codeinst->isspecsig = codeinst2->isspecsig;
             codeinst->rettype_const = codeinst2->rettype_const;
-            jl_atomic_store_release(&codeinst->specptr.fptr, jl_atomic_load_relaxed(&codeinst2->specptr.fptr));
-            jl_atomic_store_release(&codeinst->invoke, jl_atomic_load_relaxed(&codeinst2->invoke));
+            uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
+            void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
+            if (fptr != NULL) {
+                while (!(specsigflags & 0b10)) {
+                    jl_cpu_pause();
+                    specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
+                }
+                invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
+                void *prev_fptr = NULL;
+                // see jitlayers.cpp for the ordering restrictions here
+                if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
+                    jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1);
+                    jl_atomic_store_release(&codeinst->invoke, invoke);
+                    jl_atomic_store_release(&codeinst->specsigflags, specsigflags);
+                } else {
+                    // someone else already compiled it
+                    while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                        jl_cpu_pause();
+                    }
+                    // codeinst is now set up fully, safe to return
+                }
+            } else {
+                jl_callptr_t prev = NULL;
+                jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
+            }
         }
         // don't call record_precompile_statement here, since we already compiled it as mi2 which is better
         return codeinst;
@@ -2248,14 +2428,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
             if (unspecmi) {
                 jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
-                if (unspec && jl_atomic_load_acquire(&unspec->invoke)) {
+                jl_callptr_t unspec_invoke = NULL;
+                if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
                     jl_code_instance_t *codeinst = jl_new_codeinst(mi,
                         (jl_value_t*)jl_any_type, NULL, NULL,
                         0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-                    codeinst->isspecsig = 0;
-                    codeinst->specptr = unspec->specptr;
+                    void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
+                    if (unspec_fptr) {
+                        // wait until invoke and specsigflags are properly set
+                        while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
+                            jl_cpu_pause();
+                        }
+                        unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
+                    }
+                    jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
                     codeinst->rettype_const = unspec->rettype_const;
-                    jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&unspec->invoke));
+                    jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
                     jl_mi_cache_insert(mi, codeinst);
                     record_precompile_statement(mi);
                     return codeinst;
@@ -2267,12 +2455,12 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
     // if that didn't work and compilation is off, try running in the interpreter
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
         compile_option == JL_OPTIONS_COMPILE_MIN) {
-        jl_code_info_t *src = jl_code_for_interpreter(mi);
+        jl_code_info_t *src = jl_code_for_interpreter(mi, world);
         if (!jl_code_requires_compiler(src, 0)) {
             jl_code_instance_t *codeinst = jl_new_codeinst(mi,
                 (jl_value_t*)jl_any_type, NULL, NULL,
                 0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-            jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_interpret_call);
+            jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
             record_precompile_statement(mi);
             return codeinst;
@@ -2289,7 +2477,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
         jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
         // ask codegen to make the fptr for unspec
-        if (jl_atomic_load_acquire(&ucache->invoke) == NULL) {
+        jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
+        if (ucache_invoke == NULL) {
             if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
                                               jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
                 jl_printf(JL_STDERR, "source not available for ");
@@ -2298,19 +2487,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 jl_error("source missing for method that needs to be compiled");
             }
             jl_generate_fptr_for_unspecialized(ucache);
+            ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
         }
-        assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
-        if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
-            jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
+        assert(ucache_invoke != NULL);
+        if (ucache_invoke != jl_fptr_sparam &&
+            ucache_invoke != jl_fptr_interpret_call) {
             // only these care about the exact specTypes, otherwise we can use it directly
             return ucache;
         }
         codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
             0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-        codeinst->isspecsig = 0;
-        codeinst->specptr = ucache->specptr;
+        void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
+        if (unspec_fptr) {
+            // wait until invoke and specsigflags are properly set
+            while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
+                jl_cpu_pause();
+            }
+            ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
+        }
+        // unspec is always not specsig, but might use specptr
+        jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
+        jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
         codeinst->rettype_const = ucache->rettype_const;
-        jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&ucache->invoke));
+        jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
         jl_mi_cache_insert(mi, codeinst);
     }
     else {
@@ -2328,11 +2527,8 @@ jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t narg
 jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
-    while (1) {
-        if (invoke)
-            return invoke(f, args, nargs);
-        invoke = jl_atomic_load_acquire(&m->specptr.fptr1); // require forward progress with acquire annotation
-    }
+    assert(invoke && "Forgot to set specptr for jl_fptr_args!");
+    return invoke(f, args, nargs);
 }
 
 jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
@@ -2340,18 +2536,17 @@ jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_
     jl_svec_t *sparams = m->def->sparam_vals;
     assert(sparams != jl_emptysvec);
     jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
-    while (1) {
-        if (invoke)
-            return invoke(f, args, nargs, sparams);
-        invoke = jl_atomic_load_acquire(&m->specptr.fptr3); // require forward progress with acquire annotation
-    }
+    assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
+    return invoke(f, args, nargs, sparams);
 }
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
+
+JL_DLLEXPORT const jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
+JL_DLLEXPORT const jl_callptr_t jl_f_opaque_closure_call_addr = (jl_callptr_t)&jl_f_opaque_closure_call;
 
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
@@ -2376,11 +2571,11 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t
     jl_svec_t *newparams = NULL;
     JL_GC_PUSH2(&tt, &newparams);
     jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(m->sig) : mt;
-    intptr_t nspec = (kwmt == NULL || kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || kwmt == jl_kwcall_mt ? m->nargs + 1 : jl_atomic_load_relaxed(&kwmt->max_args) + 2 + 2 * (mt == jl_kwcall_mt));
-    jl_compilation_sig(ti, env, m, nspec, &newparams);
+    intptr_t max_varargs = get_max_varargs(m, kwmt, mt, NULL);
+    jl_compilation_sig(ti, env, m, max_varargs, &newparams);
     int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple;
     if (newparams) {
-        tt = jl_apply_tuple_type(newparams);
+        tt = (jl_datatype_t*)jl_apply_tuple_type(newparams);
         if (!is_compileable) {
             // compute new env, if used below
             jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &newparams);
@@ -2620,18 +2815,20 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
 }
 
 // add type of `f` to front of argument tuple type
-static jl_value_t *jl_argtype_with_function(jl_function_t *f, jl_value_t *types0)
+jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0)
+{
+    return jl_argtype_with_function_type(jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f), types0);
+}
+
+jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0)
 {
     jl_value_t *types = jl_unwrap_unionall(types0);
     size_t l = jl_nparams(types);
-    jl_value_t *tt = (jl_value_t*)jl_alloc_svec(1+l);
-    size_t i;
-    JL_GC_PUSH1(&tt);
-    if (jl_is_type(f))
-        jl_svecset(tt, 0, jl_wrap_Type(f));
-    else
-        jl_svecset(tt, 0, jl_typeof(f));
-    for(i=0; i < l; i++)
+    jl_value_t *tt = NULL;
+    JL_GC_PUSH2(&tt, &ft);
+    tt = (jl_value_t*)jl_alloc_svec(1+l);
+    jl_svecset(tt, 0, ft);
+    for (size_t i = 0; i < l; i++)
         jl_svecset(tt, i+1, jl_tparam(types,i));
     tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt);
     tt = jl_rewrap_unionall_(tt, types0);
@@ -2667,7 +2864,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
     while (codeinst) {
         if (codeinst->min_world <= world && world <= codeinst->max_world) {
-            jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
             if (invoke != NULL) {
                 jl_value_t *res = invoke(F, args, nargs, codeinst);
                 return verify_type(res);
@@ -2687,7 +2884,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     errno = last_errno;
     if (jl_options.malloc_log)
         jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
-    jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
     jl_value_t *res = invoke(F, args, nargs, codeinst);
     return verify_type(res);
 }
@@ -2794,15 +2991,15 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
 #undef LOOP_BODY
     i = 4;
     jl_tupletype_t *tt = NULL;
-    int64_t last_alloc;
+    int64_t last_alloc = 0;
     if (i == 4) {
         // if no method was found in the associative cache, check the full cache
-        JL_TIMING(METHOD_LOOKUP_FAST);
+        JL_TIMING(METHOD_LOOKUP_FAST, METHOD_LOOKUP_FAST);
         mt = jl_gf_mtable(F);
         jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         entry = NULL;
         if (leafcache != (jl_array_t*)jl_an_empty_vec_any &&
-                jl_typeis(jl_atomic_load_relaxed(&mt->cache), jl_typemap_level_type)) {
+                jl_typetagis(jl_atomic_load_relaxed(&mt->cache), jl_typemap_level_type)) {
             // hashing args is expensive, but looking at mt->cache is probably even more expensive
             tt = lookup_arg_type_tuple(F, args, nargs);
             if (tt != NULL)
@@ -2840,7 +3037,7 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
         assert(tt);
         JL_LOCK(&mt->writelock);
         // cache miss case
-        JL_TIMING(METHOD_LOOKUP_SLOW);
+        JL_TIMING(METHOD_LOOKUP_SLOW, METHOD_LOOKUP_SLOW);
         mfunc = jl_mt_assoc_by_type(mt, tt, world);
         JL_UNLOCK(&mt->writelock);
         JL_GC_POP();
@@ -2925,7 +3122,7 @@ jl_value_t *jl_gf_invoke(jl_value_t *types0, jl_value_t *gf, jl_value_t **args,
     size_t world = jl_current_task->world_age;
     jl_value_t *types = NULL;
     JL_GC_PUSH1(&types);
-    types = jl_argtype_with_function(gf, types0);
+    types = jl_argtype_with_function((jl_value_t*)gf, types0);
     jl_method_t *method = (jl_method_t*)jl_gf_invoke_lookup(types, jl_nothing, world);
     JL_GC_PROMISE_ROOTED(method);
 
@@ -3016,11 +3213,9 @@ struct ml_matches_env {
     int intersections;
     size_t world;
     int lim;
+    int include_ambiguous;
     // results:
     jl_value_t *t; // array of method matches
-    size_t min_valid;
-    size_t max_valid;
-    // temporary:
     jl_method_match_t *matc; // current working method match
 };
 
@@ -3048,22 +3243,22 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
         return 1;
     if (closure->world < ml->min_world) {
         // ignore method table entries that are part of a later world
-        if (closure->max_valid >= ml->min_world)
-            closure->max_valid = ml->min_world - 1;
+        if (closure->match.max_valid >= ml->min_world)
+            closure->match.max_valid = ml->min_world - 1;
         return 1;
     }
     else if (closure->world > ml->max_world) {
         // ignore method table entries that have been replaced in the current world
-        if (closure->min_valid <= ml->max_world)
-            closure->min_valid = ml->max_world + 1;
+        if (closure->match.min_valid <= ml->max_world)
+            closure->match.min_valid = ml->max_world + 1;
         return 1;
     }
     else {
-        // intersect the env valid range with method's valid range
-        if (closure->min_valid < ml->min_world)
-            closure->min_valid = ml->min_world;
-        if (closure->max_valid > ml->max_world)
-            closure->max_valid = ml->max_world;
+        // intersect the env valid range with method's inclusive valid range
+        if (closure->match.min_valid < ml->min_world)
+            closure->match.min_valid = ml->min_world;
+        if (closure->match.max_valid > ml->max_world)
+            closure->match.max_valid = ml->max_world;
     }
     jl_method_t *meth = ml->func.method;
     if (closure->lim >= 0 && jl_is_dispatch_tupletype(meth->sig)) {
@@ -3071,6 +3266,9 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
             return 0;
         closure->lim--;
     }
+    // don't need to consider other similar methods if this ml will always fully intersect with them and dominates all of them
+    if (!closure->include_ambiguous || closure->lim != -1)
+        typemap_slurp_search(ml, &closure->match);
     closure->matc = make_method_match((jl_tupletype_t*)closure->match.ti,
         closure->match.env, meth,
         closure->match.issubty ? FULLY_COVERS : NOT_FULLY_COVERS);
@@ -3085,11 +3283,283 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
     return 1;
 }
 
-static int ml_mtable_visitor(jl_methtable_t *mt, void *env)
-{
-    return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, (struct typemap_intersection_env*)env);
+static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0)
+{
+    struct typemap_intersection_env* env = (struct typemap_intersection_env*)closure0;
+    return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), jl_cachearg_offset(mt), env);
+}
+
+
+// Visit the candidate methods, starting from t[idx], to determine a possible valid sort ordering,
+// where every morespecific method appears before any method which it has a common
+// intersection with but is not partly ambiguous with (ambiguity is transitive, particularly
+// if lim==-1, although morespecific is not transitive).
+// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
+// Inputs:
+//  * `t`: the array of vertexes (method matches)
+//  * `idx`: the next vertex to add to the output
+//  * `visited`: the state of the algorithm for each vertex in `t`: either 1 if we visited it already or 1+depth if we are visiting it now
+//  * `stack`: the state of the algorithm for the current vertex (up to length equal to `t`): the list of all vertexes currently in the depth-first path or in the current SCC
+//  * `result`: the output of the algorithm, a sorted list of vertexes (up to length `lim`)
+//  * `allambig`: a list of all vertexes with an ambiguity (up to length equal to `t`), discovered while running the rest of the algorithm
+//  * `lim`: either -1 for unlimited matches, or the maximum length for `result` before returning failure (return -1).
+//           If specified as -1, this will return extra matches that would have been elided from the list because they were already covered by an earlier match.
+//           This gives a sort of maximal set of matching methods (up to the first minmax method).
+//           If specified as -1, the sorting will also include all "weak" edges (every ambiguous pair) which will create much larger ambiguity cycles,
+//           resulting in a less accurate sort order and much less accurate `*has_ambiguity` result.
+//  * `include_ambiguous`: whether to filter out fully ambiguous matches from `result`
+//  * `*has_ambiguity`: whether the algorithm does not need to compute if there is an unresolved ambiguity
+//  * `*found_minmax`: whether there is a minmax method already found, so future fully_covers matches should be ignored
+// Outputs:
+//  * `*has_ambiguity`: whether the caller should check if there remains an unresolved ambiguity (in `allambig`)
+// Returns:
+//  * -1: too many matches for lim, other outputs are undefined
+//  *  0: the child(ren) have been added to the output
+//  * 1+: the children are part of this SCC (up to this depth)
+// TODO: convert this function into an iterative call, rather than recursive
+static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, arraylist_t *stack, arraylist_t *result, arraylist_t *allambig, int lim, int include_ambiguous, int *has_ambiguity, int *found_minmax)
+{
+    size_t cycle = (size_t)visited->items[idx];
+    if (cycle != 0)
+        return cycle - 1; // depth remaining
+    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, idx);
+    jl_method_t *m = matc->method;
+    jl_value_t *ti = (jl_value_t*)matc->spec_types;
+    int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+    // first check if this new method is actually already fully covered by an
+    // existing match and we can just ignore this entry quickly
+    size_t result_len = 0;
+    if (subt) {
+        if (*found_minmax == 2)
+            visited->items[idx] = (void*)1;
+    }
+    else if (lim != -1) {
+        for (; result_len < result->len; result_len++) {
+            size_t idx2 = (size_t)result->items[result_len];
+            jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+            jl_method_t *m2 = matc2->method;
+            if (jl_subtype(ti, m2->sig)) {
+                if (include_ambiguous) {
+                    if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                        continue;
+                }
+                visited->items[idx] = (void*)1;
+                break;
+            }
+        }
+    }
+    if ((size_t)visited->items[idx] == 1)
+        return 0;
+    arraylist_push(stack, (void*)idx);
+    size_t depth = stack->len;
+    visited->items[idx] = (void*)(1 + depth);
+    cycle = depth;
+    int addambig = 0;
+    int mayexclude = 0;
+    // First visit all "strong" edges where the child is definitely better.
+    // This likely won't hit any cycles, but might (because morespecific is not transitive).
+    // Along the way, record if we hit any ambiguities-we may need to track those later.
+    for (size_t childidx = 0; childidx < jl_array_len(t); childidx++) {
+        if (childidx == idx)
+            continue;
+        int child_cycle = (size_t)visited->items[childidx];
+        if (child_cycle == 1)
+            continue; // already handled
+        if (child_cycle != 0 && child_cycle - 1 >= cycle)
+            continue; // already part of this cycle
+        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+        jl_method_t *m2 = matc2->method;
+        int subt2 = matc2->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+        // TODO: we could change this to jl_has_empty_intersection(ti, (jl_value_t*)matc2->spec_types);
+        // since we only care about sorting of the intersections the user asked us about
+        if (!subt2 && jl_has_empty_intersection(m2->sig, m->sig))
+            continue;
+        int msp = jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig);
+        int msp2 = !msp && jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig);
+        if (!msp) {
+            if (subt || !include_ambiguous || (lim != -1 && msp2)) {
+                if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                    // this may be filtered out as fully intersected, if applicable later
+                    mayexclude = 1;
+                }
+            }
+            if (!msp2) {
+                addambig = 1; // record there is a least one previously-undetected ambiguity that may need to be investigated later (between m and m2)
+            }
+        }
+        if (lim == -1 ? msp : !msp2) // include only strong or also weak edges, depending on whether the result size is limited
+            continue;
+        // m2 is (lim!=-1 ? better : not-worse), so attempt to visit it first
+        // if limited, then we want to visit only better edges, because that results in finding k best matches quickest
+        // if not limited, then we want to visit all edges, since that results in finding the largest SCC cycles, which requires doing the fewest intersections
+        child_cycle = sort_mlmatches(t, childidx, visited, stack, result, allambig, lim, include_ambiguous, has_ambiguity, found_minmax);
+        if (child_cycle == -1)
+            return -1;
+        if (child_cycle && child_cycle < cycle) {
+            // record the cycle will resolve at depth "cycle"
+            cycle = child_cycle;
+        }
+        if (stack->len == depth) {
+            // if this child resolved without hitting a cycle, then there is
+            // some probability that this method is already fully covered now
+            // (same check as before), and we can delete this vertex now without
+            // anyone noticing (too much)
+            if (subt) {
+                if (*found_minmax == 2)
+                    visited->items[idx] = (void*)1;
+            }
+            else if (lim != -1) {
+                for (; result_len < result->len; result_len++) {
+                    size_t idx2 = (size_t)result->items[result_len];
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+                    jl_method_t *m2 = matc2->method;
+                    if (jl_subtype(ti, m2->sig)) {
+                        if (include_ambiguous) {
+                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                                continue;
+                        }
+                        visited->items[idx] = (void*)1;
+                        break;
+                    }
+                }
+            }
+            if ((size_t)visited->items[idx] == 1) {
+                assert(cycle == depth);
+                size_t childidx = (size_t)arraylist_pop(stack);
+                assert(childidx == idx); (void)childidx;
+                assert(!subt || *found_minmax == 2);
+                return 0;
+            }
+        }
+    }
+    if (matc->fully_covers == NOT_FULLY_COVERS && addambig)
+        arraylist_push(allambig, (void*)idx);
+    if (cycle != depth)
+        return cycle;
+    result_len = result->len;
+    if (stack->len == depth) {
+        // Found one "best" method to add right now. But we might exclude it if
+        // we determined earlier that we had that option.
+        if (mayexclude) {
+            if (!subt || *found_minmax == 2)
+                visited->items[idx] = (void*)1;
+        }
+    }
+    else {
+        // We have a set of ambiguous methods. Record that.
+        // This is greatly over-approximated for lim==-1
+        *has_ambiguity = 1;
+        // If we followed weak edges above, then this also fully closed the ambiguity cycle
+        if (lim == -1)
+            addambig = 0;
+        // If we're only returning possible matches, now filter out this method
+        // if its intersection is fully ambiguous in this SCC group.
+        // This is a repeat of the "first check", now that we have completed the cycle analysis
+        for (size_t i = depth - 1; i < stack->len; i++) {
+            size_t childidx = (size_t)stack->items[i];
+            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+            jl_value_t *ti = (jl_value_t*)matc->spec_types;
+            int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+            if ((size_t)visited->items[childidx] == 1) {
+                assert(subt);
+                continue;
+            }
+            assert(visited->items[childidx] == (void*)(2 + i));
+            // if we only followed strong edges before above
+            // check also if this set has an unresolved ambiguity missing from it
+            if (lim != -1 && !addambig) {
+                for (size_t j = 0; j < allambig->len; j++) {
+                    if ((size_t)allambig->items[j] == childidx) {
+                        addambig = 1;
+                        break;
+                    }
+                }
+            }
+            // always remove fully_covers matches after the first minmax ambiguity group is handled
+            if (subt) {
+                if (*found_minmax)
+                    visited->items[childidx] = (void*)1;
+                continue;
+            }
+            else if (lim != -1) {
+                // when limited, don't include this match if it was covered by an earlier one
+                for (size_t result_len = 0; result_len < result->len; result_len++) {
+                    size_t idx2 = (size_t)result->items[result_len];
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+                    jl_method_t *m2 = matc2->method;
+                    if (jl_subtype(ti, m2->sig)) {
+                        if (include_ambiguous) {
+                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                                continue;
+                        }
+                        visited->items[childidx] = (void*)1;
+                        break;
+                    }
+                }
+            }
+        }
+        if (!include_ambiguous && lim == -1) {
+            for (size_t i = depth - 1; i < stack->len; i++) {
+                size_t childidx = (size_t)stack->items[i];
+                if ((size_t)visited->items[childidx] == 1)
+                    continue;
+                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+                jl_method_t *m = matc->method;
+                jl_value_t *ti = (jl_value_t*)matc->spec_types;
+                for (size_t j = depth - 1; j < stack->len; j++) {
+                    if (i == j)
+                        continue;
+                    size_t idx2 = (size_t)stack->items[j];
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+                    jl_method_t *m2 = matc2->method;
+                    int subt2 = matc2->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                    // if their intersection contributes to the ambiguity cycle
+                    // and the contribution of m is fully ambiguous with the portion of the cycle from m2
+                    if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                        // but they aren't themselves simply ordered (here
+                        // we don't consider that a third method might be
+                        // disrupting that ordering and just consider them
+                        // pairwise to keep this simple).
+                        if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
+                            !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
+                            visited->items[childidx] = (void*)-1;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    // copy this cycle into the results
+    for (size_t i = depth - 1; i < stack->len; i++) {
+        size_t childidx = (size_t)stack->items[i];
+        if ((size_t)visited->items[childidx] == 1)
+            continue;
+        if ((size_t)visited->items[childidx] != -1) {
+            assert(visited->items[childidx] == (void*)(2 + i));
+            visited->items[childidx] = (void*)-1;
+            if (lim == -1 || result->len < lim)
+                arraylist_push(result, (void*)childidx);
+            else
+                return -1;
+        }
+    }
+    // now finally cleanup the stack
+    while (stack->len >= depth) {
+        size_t childidx = (size_t)arraylist_pop(stack);
+        // always remove fully_covers matches after the first minmax ambiguity group is handled
+        //jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+        if (matc->fully_covers != NOT_FULLY_COVERS && !addambig)
+            *found_minmax = 2;
+        if (visited->items[childidx] != (void*)-1)
+            continue;
+        visited->items[childidx] = (void*)1;
+    }
+    return 0;
 }
 
+
+
 // This is the collect form of calling jl_typemap_intersection_visitor
 // with optimizations to skip fully shadowed methods.
 //
@@ -3105,6 +3575,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                               int intersections, size_t world, int cache_result,
                               size_t *min_valid, size_t *max_valid, int *ambig)
 {
+    JL_TIMING(METHOD_MATCH, METHOD_MATCH);
+    if (world > jl_atomic_load_acquire(&jl_world_counter))
+        return jl_nothing; // the future is not enumerable
     int has_ambiguity = 0;
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)type);
     assert(jl_is_datatype(unw));
@@ -3117,10 +3590,11 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         else
             va = NULL;
     }
-    struct ml_matches_env env = {{ml_matches_visitor, (jl_value_t*)type, va,
+    struct ml_matches_env env = {{ml_matches_visitor, (jl_value_t*)type, va, /* .search_slurp = */ 0,
+            /* .min_valid = */ *min_valid, /* .max_valid = */ *max_valid,
             /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
-        intersections, world, lim, /* .t = */ jl_an_empty_vec_any,
-        /* .min_valid = */ *min_valid, /* .max_valid = */ *max_valid, /* .matc = */ NULL};
+        intersections, world, lim, include_ambiguous, /* .t = */ jl_an_empty_vec_any,
+        /* .matc = */ NULL};
     struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec, 1, ~(size_t)0};
     jl_value_t *isect2 = NULL;
     JL_GC_PUSH6(&env.t, &env.matc, &env.match.env, &search.env, &env.match.ti, &isect2);
@@ -3183,7 +3657,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 return env.t;
             }
         }
-        if (!jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, &env.match)) {
+        if (!ml_mtable_visitor(mt, &env.match)) {
             JL_GC_POP();
             return jl_nothing;
         }
@@ -3195,8 +3669,8 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             return jl_nothing;
         }
     }
-    *min_valid = env.min_valid;
-    *max_valid = env.max_valid;
+    *min_valid = env.match.min_valid;
+    *max_valid = env.match.max_valid;
     // done with many of these values now
     env.match.ti = NULL; env.matc = NULL; env.match.env = NULL; search.env = NULL;
     size_t i, j, len = jl_array_len(env.t);
@@ -3284,270 +3758,151 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 len = 1;
             }
         }
+        if (minmax && lim == 0) {
+            // protect some later algorithms from underflow
+            JL_GC_POP();
+            return jl_nothing;
+        }
     }
     if (len > 1) {
-        // need to partially domsort the graph now into a list
-        // (this is an insertion sort attempt)
-        // if we have a minmax method, we ignore anything less specific
-        // we'll clean that up next
-        for (i = 1; i < len; i++) {
-            env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            jl_method_t *m = env.matc->method;
-            int subt = env.matc->fully_covers != NOT_FULLY_COVERS;
-            if ((minmax != NULL || (minmax_ambig && !include_ambiguous)) && subt) {
-                continue; // already the biggest (skip will filter others)
-            }
-            for (j = 0; j < i; j++) {
-                jl_method_match_t *matc2 = (jl_method_match_t *)jl_array_ptr_ref(env.t, i - j - 1);
-                jl_method_t *m2 = matc2->method;
-                int subt2 = matc2->fully_covers != NOT_FULLY_COVERS;
-                if (!subt2 && subt)
-                    break;
-                if (subt == subt2) {
-                    if (lim != -1) {
-                        if (subt || !jl_has_empty_intersection(m->sig, m2->sig))
-                            if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
-                                break;
-                    }
-                    else {
-                        // if unlimited, use approximate sorting, with the only
-                        // main downside being that it may be overly-
-                        // conservative at reporting existence of ambiguities
-                        if (jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                            break;
-                    }
-                }
-                jl_array_ptr_set(env.t, i - j, matc2);
-            }
-            jl_array_ptr_set(env.t, i - j, env.matc);
-        }
-        char *skip = (char*)alloca(len);
-        memset(skip, 0, len);
+        arraylist_t stack, visited, result, allambig;
+        arraylist_new(&result, lim != -1 && lim < len ? lim : len);
+        arraylist_new(&stack, 0);
+        arraylist_new(&visited, len);
+        arraylist_new(&allambig, len);
+        arraylist_grow(&visited, len);
+        memset(visited.items, 0, len * sizeof(size_t));
         // if we had a minmax method (any subtypes), now may now be able to
-        // quickly cleanup some of our sort result
-        if (minmax != NULL || (minmax_ambig && !include_ambiguous)) {
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (minmax != matc && matc->fully_covers != NOT_FULLY_COVERS) {
-                    skip[i] = 1;
-                }
-            }
-        }
-        if (include_ambiguous && lim == -1 && ambig == NULL && !minmax_ambig) {
-            // in this case, we don't actually need to compute the ambiguity
-            // information at all as the user doesn't need us to filter them
-            // out or report them
-        }
-        else {
-            // now that the results are (mostly) sorted, assign group numbers to each ambiguity
-            // by computing the specificity-ambiguity matrix covering this query
-            uint32_t *ambig_groupid = (uint32_t*)alloca(len * sizeof(uint32_t));
-            for (i = 0; i < len; i++)
-                ambig_groupid[i] = i;
-            // as we go, keep a rough count of how many methods are disjoint, which
-            // gives us a lower bound on how many methods we will be returning
-            // and lets us stop early if we reach our limit
-            int ndisjoint = minmax ? 1 : 0;
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (skip[i]) {
-                    // if there was a minmax method, we can just pretend the rest are all in the same group:
-                    // they're all together but unsorted in the list, since we'll drop them all later anyways
-                    assert(matc->fully_covers != NOT_FULLY_COVERS);
-                    if (ambig_groupid[len - 1] > i)
-                        ambig_groupid[len - 1] = i; // ambiguity covering range [i:len)
-                    break;
-                }
-                jl_method_t *m = matc->method;
-                int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                int rsubt = jl_egal((jl_value_t*)matc->spec_types, m->sig);
-                int disjoint = 1;
-                for (j = len; j > i; j--) {
-                    if (ambig_groupid[j - 1] < i) {
-                        disjoint = 0;
-                        break;
-                    }
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j - 1);
-                    // can't use skip[j - 1] here, since we still need to make sure the minmax dominates
-                    jl_method_t *m2 = matc2->method;
-                    int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-                    int rsubt2 = jl_egal((jl_value_t*)matc2->spec_types, m2->sig);
-                    jl_value_t *ti;
-                    if (!subt && !subt2 && rsubt && rsubt2 && lim == -1 && ambig == NULL)
-                        // these would only be filtered out of the list as
-                        // ambiguous if they are also type-equal, as we
-                        // aren't skipping matches and the user doesn't
-                        // care if we report any ambiguities
-                        continue;
-                    if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
-                        continue;
-                    if (subt) {
-                        ti = (jl_value_t*)matc2->spec_types;
-                        isect2 = NULL;
-                    }
-                    else if (subt2) {
-                        ti = (jl_value_t*)matc->spec_types;
-                        isect2 = NULL;
-                    }
-                    else {
-                        jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &env.match.ti, &isect2);
-                        ti = env.match.ti;
-                    }
-                    if (ti != jl_bottom_type) {
-                        disjoint = 0;
-                        // m and m2 are ambiguous, but let's see if we can find another method (m3)
-                        // that dominates their intersection, and means we can ignore this
-                        size_t k;
-                        for (k = i; k > 0; k--) {
-                            jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, k - 1);
-                            jl_method_t *m3 = matc3->method;
-                            if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig))
-                                break;
-                        }
-                        if (k == 0) {
-                            ambig_groupid[j - 1] = i; // ambiguity covering range [i:j)
-                            isect2 = NULL;
-                            break;
-                        }
-                    }
-                    isect2 = NULL;
-                }
-                if (disjoint && lim >= 0) {
-                    ndisjoint += 1;
-                    if (ndisjoint > lim) {
-                        JL_GC_POP();
-                        return jl_nothing;
-                    }
-                }
-            }
-            // then we'll merge those numbers to assign each item in the group the same number
-            // (similar to Kosaraju's SCC algorithm?)
-            uint32_t groupid = 0;
-            uint32_t grouphi = 0;
-            for (i = 0; i < len; i++) {
-                j = len - i - 1;
-                uint32_t agid = ambig_groupid[j];
-                if (agid != j) { // thus agid < j
-                    if (grouphi == 0) {
-                        groupid = agid;
-                        grouphi = j;
-                    }
-                    else if (agid < groupid) {
-                        groupid = agid;
-                    }
-                }
-                if (grouphi && j == groupid) {
-                    do {
-                        ambig_groupid[grouphi--] = groupid;
-                    } while (grouphi > j);
-                    ambig_groupid[j] = groupid;
-                    groupid = 0;
-                    grouphi = 0;
-                }
-            }
-            // always remove matches after the first subtype, now that we've sorted the list for ambiguities
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (matc->fully_covers == FULLY_COVERS) { // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                    uint32_t agid = ambig_groupid[i];
-                    while (i < len && agid == ambig_groupid[i])
-                        i++; // keep ambiguous ones
-                    for (; i < len; i++)
-                        skip[i] = 1; // drop the rest
-                }
+        // quickly cleanup some of methods
+        int found_minmax = 0;
+        if (minmax != NULL)
+            found_minmax = 2;
+        else if (minmax_ambig && !include_ambiguous)
+            found_minmax = 1;
+        if (ambig == NULL) // if we don't care about the result, set it now so we won't bother attempting to compute it accurately later
+            has_ambiguity = 1;
+        for (i = 0; i < len; i++) {
+            assert(visited.items[i] == (void*)0 || visited.items[i] == (void*)1);
+            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+            if (matc->fully_covers != NOT_FULLY_COVERS && found_minmax) {
+                // this was already handled above and below, so we won't learn anything new
+                // by visiting it and it might be a bit costly
+                continue;
             }
-            // when limited, skip matches that are covered by earlier ones (and aren't perhaps ambiguous with them)
-            if (lim != -1) {
-                for (i = 0; i < len; i++) {
-                    if (skip[i])
-                        continue;
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                    jl_method_t *m = matc->method;
-                    jl_tupletype_t *ti = matc->spec_types;
-                    if (matc->fully_covers == FULLY_COVERS)
-                        break; // remaining matches are ambiguous or already skipped
-                    for (j = 0; j < i; j++) {
-                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
-                        jl_method_t *m2 = matc2->method;
-                        if (jl_subtype((jl_value_t*)ti, m2->sig)) {
-                            if (ambig_groupid[i] != ambig_groupid[j]) {
-                                skip[i] = 1;
-                                break;
-                            }
-                            else if (!include_ambiguous) {
-                                if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig)) {
-                                    skip[i] = 1;
-                                    break;
-                                }
-                            }
-                        }
-                    }
-                }
+            int child_cycle = sort_mlmatches((jl_array_t*)env.t, i, &visited, &stack, &result, &allambig, lim == -1 || minmax == NULL ? lim : lim - 1, include_ambiguous, &has_ambiguity, &found_minmax);
+            if (child_cycle == -1) {
+                arraylist_free(&allambig);
+                arraylist_free(&visited);
+                arraylist_free(&stack);
+                arraylist_free(&result);
+                JL_GC_POP();
+                return jl_nothing;
             }
-            // Compute whether anything could be ambiguous by seeing if any two
-            // remaining methods in the result are in the same ambiguity group.
-            assert(len > 0);
-            uint32_t agid = ambig_groupid[0];
-            for (i = 1; i < len; i++) {
-                if (!skip[i]) {
-                    if (agid == ambig_groupid[i]) {
-                        has_ambiguity = 1;
-                        break;
-                    }
-                    agid = ambig_groupid[i];
-                }
+            assert(child_cycle == 0); (void)child_cycle;
+            assert(stack.len == 0);
+            assert(visited.items[i] == (void*)1);
+        }
+        // now compute whether there were ambiguities left in this cycle
+        if (has_ambiguity == 0 && allambig.len > 0) {
+            if (lim == -1) {
+                // lim is over-approximated, so has_ambiguities is too
+                has_ambiguity = 1;
             }
-            // If we're only returning possible matches, now filter out any method
-            // whose intersection is fully ambiguous with the group it is in.
-            if (!include_ambiguous) {
-                for (i = 0; i < len; i++) {
-                    if (skip[i])
-                        continue;
-                    uint32_t agid = ambig_groupid[i];
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+            else {
+                // go back and find the additional ambiguous methods and temporary add them to the stack
+                // (potentially duplicating them from lower on the stack to here)
+                jl_value_t *ti = NULL;
+                jl_value_t *isect2 = NULL;
+                JL_GC_PUSH2(&ti, &isect2);
+                for (size_t i = 0; i < allambig.len; i++) {
+                    size_t idx = (size_t)allambig.items[i];
+                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx);
                     jl_method_t *m = matc->method;
-                    jl_tupletype_t *ti = matc->spec_types;
                     int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                    char ambig1 = 0;
-                    for (j = agid; j < len && ambig_groupid[j] == agid; j++) {
-                        if (j == i)
+                    for (size_t idx2 = 0; idx2 < jl_array_len(env.t); idx2++) {
+                        if (idx2 == idx)
                             continue;
-                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+                        // laborious test, checking for existence and coverage of another method (m3)
+                        // outside of the ambiguity group that dominates any ambiguous methods,
+                        // and means we can ignore this for has_ambiguity
+                        // (has_ambiguity is overestimated for lim==-1, since we don't compute skipped matches either)
+                        // n.b. even if we skipped them earlier, they still might
+                        // contribute to the ambiguities (due to lock of transitivity of
+                        // morespecific over subtyping)
+                        // TODO: we could improve this result by checking if the removal of some
+                        // edge earlier means that this subgraph is now well-ordered and then be
+                        // allowed to ignore these vertexes entirely here
+                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx2);
                         jl_method_t *m2 = matc2->method;
                         int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                        if (subt) {
+                            ti = (jl_value_t*)matc2->spec_types;
+                            isect2 = NULL;
+                        }
+                        else if (subt2) {
+                            ti = (jl_value_t*)matc->spec_types;
+                            isect2 = NULL;
+                        }
+                        else {
+                            jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &ti, &isect2);
+                        }
                         // if their intersection contributes to the ambiguity cycle
-                        if (subt || subt2 || !jl_has_empty_intersection((jl_value_t*)ti, m2->sig)) {
-                            // and the contribution of m is ambiguous with the portion of the cycle from m2
-                            if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
-                                // but they aren't themselves simply ordered (here
-                                // we don't consider that a third method might be
-                                // disrupting that ordering and just consider them
-                                // pairwise to keep this simple).
-                                if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
-                                    !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
-                                    ambig1 = 1;
-                                    break;
-                                }
+                        if (ti == jl_bottom_type)
+                            continue;
+                        // and they aren't themselves simply ordered
+                        if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) ||
+                            jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                            continue;
+                        // now look for a third method m3 that dominated these and that fully covered this intersection already
+                        size_t k;
+                        for (k = 0; k < result.len; k++) {
+                            size_t idx3 = (size_t)result.items[k];
+                            if (idx3 == idx || idx3 == idx2) {
+                                has_ambiguity = 1;
+                                break;
+                            }
+                            jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx3);
+                            jl_method_t *m3 = matc3->method;
+                            if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
+                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
+                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig)) {
+                                //if (jl_subtype(matc->spec_types, ti) || jl_subtype(matc->spec_types, matc3->m3->sig))
+                                //    // check if it covered not only this intersection, but all intersections with matc
+                                //    // if so, we do not need to check all of them separately
+                                //    j = len;
+                                break;
                             }
                         }
+                        if (k == result.len)
+                            has_ambiguity = 1;
+                        isect2 = NULL;
+                        ti = NULL;
+                        if (has_ambiguity)
+                            break;
                     }
-                    if (ambig1)
-                        skip[i] = 1;
+                    if (has_ambiguity)
+                        break;
                 }
+                JL_GC_POP();
             }
         }
-        // cleanup array to remove skipped entries
-        for (i = 0, j = 0; i < len; i++) {
+        arraylist_free(&allambig);
+        arraylist_free(&visited);
+        arraylist_free(&stack);
+        for (j = 0; j < result.len; j++) {
+            i = (size_t)result.items[j];
             jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            if (!skip[i]) {
-                jl_array_ptr_set(env.t, j++, matc);
-                // remove our sentinel entry markers
-                if (matc->fully_covers == SENTINEL)
-                    matc->fully_covers = NOT_FULLY_COVERS;
-            }
+            // remove our sentinel entry markers
+            if (matc->fully_covers == SENTINEL)
+                matc->fully_covers = NOT_FULLY_COVERS;
+            result.items[j] = (void*)matc;
+        }
+        if (minmax) {
+            arraylist_push(&result, minmax);
+            j++;
         }
+        memcpy(jl_array_data(env.t), result.items, j * sizeof(jl_method_match_t*));
+        arraylist_free(&result);
         if (j != len)
             jl_array_del_end((jl_array_t*)env.t, len - j);
         len = j;
@@ -3558,7 +3913,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             jl_method_t *meth = env.matc->method;
             jl_svec_t *tpenv = env.matc->sparams;
             JL_LOCK(&mt->writelock);
-            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.min_valid, env.max_valid, tpenv);
+            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.match.min_valid, env.match.max_valid, tpenv);
             JL_UNLOCK(&mt->writelock);
         }
     }
@@ -3585,23 +3940,24 @@ int jl_has_concrete_subtype(jl_value_t *typ)
     return ((jl_datatype_t*)typ)->has_concrete_subtype;
 }
 
-JL_DLLEXPORT void jl_typeinf_timing_begin(void)
+JL_DLLEXPORT uint64_t jl_typeinf_timing_begin(void)
 {
     jl_task_t *ct = jl_current_task;
-    if (!ct->reentrant_timing++) {
-        ct->inference_start_time = jl_hrtime();
-    }
+    if (ct->reentrant_timing & 1)
+        return 0;
+    ct->reentrant_timing |= 1;
+    return jl_hrtime();
 }
 
-JL_DLLEXPORT void jl_typeinf_timing_end(void)
+JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start)
 {
+    if (!start)
+        return;
     jl_task_t *ct = jl_current_task;
-    if (!--ct->reentrant_timing) {
-        if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
-            uint64_t inftime = jl_hrtime() - ct->inference_start_time;
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
-        }
-        ct->inference_start_time = 0;
+    ct->reentrant_timing &= ~1u;
+    if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
+        uint64_t inftime = jl_hrtime() - start;
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
     }
 }
 
diff --git a/src/init.c b/src/init.c
index 2bfdebe00dfaf..9c18a60eb8b06 100644
--- a/src/init.c
+++ b/src/init.c
@@ -348,7 +348,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
     // TODO: Destroy threads?
 
     jl_destroy_timing(); // cleans up the current timing_stack for noreturn
-#ifdef ENABLE_TIMINGS
+#ifdef USE_TIMING_COUNTS
     jl_print_timings();
 #endif
     jl_teardown_codegen(); // prints stats
@@ -380,7 +380,7 @@ JL_DLLEXPORT void jl_postoutput_hook(void)
     return;
 }
 
-static void post_boot_hooks(void);
+void post_boot_hooks(void);
 
 JL_DLLEXPORT void *jl_libjulia_internal_handle;
 JL_DLLEXPORT void *jl_libjulia_handle;
@@ -704,6 +704,9 @@ static void jl_set_io_wait(int v)
 }
 
 extern jl_mutex_t jl_modules_mutex;
+extern jl_mutex_t precomp_statement_out_lock;
+extern jl_mutex_t newly_inferred_mutex;
+extern jl_mutex_t global_roots_lock;
 
 static void restore_fp_env(void)
 {
@@ -716,6 +719,15 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
 
 JL_DLLEXPORT int jl_default_debug_info_kind;
 
+static void init_global_mutexes(void) {
+    JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex");
+    JL_MUTEX_INIT(&precomp_statement_out_lock, "precomp_statement_out_lock");
+    JL_MUTEX_INIT(&newly_inferred_mutex, "newly_inferred_mutex");
+    JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock");
+    JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock");
+    JL_MUTEX_INIT(&typecache_lock, "typecache_lock");
+}
+
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 {
     // initialize many things, in no particular order
@@ -745,7 +757,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     jl_safepoint_init();
     jl_page_size = jl_getpagesize();
     htable_new(&jl_current_modules, 0);
-    JL_MUTEX_INIT(&jl_modules_mutex);
+    init_global_mutexes();
     jl_precompile_toplevel_module = NULL;
     ios_set_io_wait_func = jl_set_io_wait;
     jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.),
@@ -753,22 +765,24 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     jl_init_uv();
     init_stdio();
     restore_fp_env();
-    restore_signals();
+    if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
+        restore_signals();
+
     jl_init_intrinsic_properties();
 
+    // Important offset for external codegen.
+    jl_task_gcstack_offset = offsetof(jl_task_t, gcstack);
+    jl_task_ptls_offset = offsetof(jl_task_t, ptls);
+
     jl_prep_sanitizers();
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(1, &stack_lo, &stack_hi);
 
-    jl_libjulia_internal_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1);
+    jl_libjulia_internal_handle = jl_find_dynamic_library_by_addr(&jl_load_dynamic_library);
+    jl_libjulia_handle = jl_find_dynamic_library_by_addr(&jl_any_type);
 #ifdef _OS_WINDOWS_
     jl_exe_handle = GetModuleHandleA(NULL);
     jl_RTLD_DEFAULT_handle = jl_libjulia_internal_handle;
-    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
-                            (LPCWSTR)&jl_any_type,
-                            (HMODULE*)&jl_libjulia_handle)) {
-        jl_error("could not load base module");
-    }
     jl_ntdll_handle = jl_dlopen("ntdll.dll", JL_RTLD_NOLOAD); // bypass julia's pathchecking for system dlls
     jl_kernel32_handle = jl_dlopen("kernel32.dll", JL_RTLD_NOLOAD);
     jl_crtdll_handle = jl_dlopen(jl_crtdll_name, JL_RTLD_NOLOAD);
@@ -803,11 +817,15 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 
     arraylist_new(&jl_linkage_blobs, 0);
     arraylist_new(&jl_image_relocs, 0);
+    arraylist_new(&eytzinger_image_tree, 0);
+    arraylist_new(&eytzinger_idxs, 0);
+    arraylist_push(&eytzinger_idxs, (void*)0);
+    arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
 
     jl_ptls_t ptls = jl_init_threadtls(0);
 
 #ifdef MMTK_GC
-    initialize_collection((void *)ptls);
+    mmtk_initialize_collection((void *)ptls);
 #endif
 #pragma GCC diagnostic push
 #if defined(_COMPILER_GCC_) && __GNUC__ >= 12
@@ -822,6 +840,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 
 static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
 {
+    JL_TIMING(JULIA_INIT, JULIA_INIT);
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
     if (jl_options.image_file)
@@ -856,6 +875,7 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
     if (jl_base_module == NULL) {
         // nthreads > 1 requires code in Base
         jl_atomic_store_relaxed(&jl_n_threads, 1);
+        jl_n_gcthreads = 0;
     }
     jl_start_threads();
 
@@ -877,80 +897,6 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_install_sigint_handler();
 }
 
-static jl_value_t *core(const char *name)
-{
-    return jl_get_global(jl_core_module, jl_symbol(name));
-}
-
-// fetch references to things defined in boot.jl
-static void post_boot_hooks(void)
-{
-    jl_char_type    = (jl_datatype_t*)core("Char");
-    jl_int8_type    = (jl_datatype_t*)core("Int8");
-    jl_int16_type   = (jl_datatype_t*)core("Int16");
-    jl_float16_type = (jl_datatype_t*)core("Float16");
-    jl_float32_type = (jl_datatype_t*)core("Float32");
-    jl_float64_type = (jl_datatype_t*)core("Float64");
-    jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
-    jl_number_type  = (jl_datatype_t*)core("Number");
-    jl_signed_type  = (jl_datatype_t*)core("Signed");
-    jl_datatype_t *jl_unsigned_type = (jl_datatype_t*)core("Unsigned");
-    jl_datatype_t *jl_integer_type = (jl_datatype_t*)core("Integer");
-
-    jl_bool_type->super = jl_integer_type;
-    jl_uint8_type->super = jl_unsigned_type;
-    jl_uint16_type->super = jl_unsigned_type;
-    jl_uint32_type->super = jl_unsigned_type;
-    jl_uint64_type->super = jl_unsigned_type;
-    jl_int32_type->super = jl_signed_type;
-    jl_int64_type->super = jl_signed_type;
-
-    jl_errorexception_type = (jl_datatype_t*)core("ErrorException");
-    jl_stackovf_exception  = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
-    jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
-    jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
-    jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
-    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
-    jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
-    jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
-    jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
-    jl_readonlymemory_exception = jl_new_struct_uninit((jl_datatype_t*)core("ReadOnlyMemoryError"));
-    jl_typeerror_type      = (jl_datatype_t*)core("TypeError");
-    jl_argumenterror_type  = (jl_datatype_t*)core("ArgumentError");
-    jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
-    jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
-    jl_initerror_type      = (jl_datatype_t*)core("InitError");
-    jl_pair_type           = core("Pair");
-    jl_kwcall_func         = core("kwcall");
-    jl_kwcall_mt           = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
-    jl_atomic_store_relaxed(&jl_kwcall_mt->max_args, 0);
-
-    jl_weakref_type = (jl_datatype_t*)core("WeakRef");
-    jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
-
-    jl_init_box_caches();
-
-    // set module field of primitive types
-    jl_svec_t *bindings = jl_atomic_load_relaxed(&jl_core_module->bindings);
-    jl_value_t **table = jl_svec_data(bindings);
-    for (size_t i = 0; i < jl_svec_len(bindings); i++) {
-        if (table[i] != jl_nothing) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                if (jl_is_unionall(v))
-                    v = jl_unwrap_unionall(v);
-                if (jl_is_datatype(v)) {
-                    jl_datatype_t *tt = (jl_datatype_t*)v;
-                    tt->name->module = jl_core_module;
-                    if (tt->name->mt)
-                        tt->name->mt->module = jl_core_module;
-                }
-            }
-        }
-    }
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/interpreter.c b/src/interpreter.c
index bf41a2eaa9fbd..c08496f72ce04 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -102,7 +102,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 
     fname = eval_value(args[0], s);
     jl_methtable_t *mt = NULL;
-    if (jl_typeis(fname, jl_methtable_type)) {
+    if (jl_typetagis(fname, jl_methtable_type)) {
         mt = (jl_methtable_t*)fname;
     }
     atypes = eval_value(args[1], s);
@@ -184,7 +184,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         else
             return s->locals[jl_source_nslots(src) + id];
     }
-    if (jl_is_slot(e) || jl_is_argument(e)) {
+    if (jl_is_slotnumber(e) || jl_is_argument(e)) {
         ssize_t n = jl_slot_number(e);
         if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
             jl_error("access to invalid slot number");
@@ -230,7 +230,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
-        if (jl_is_slot(sym) || jl_is_argument(sym)) {
+        if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
             ssize_t n = jl_slot_number(sym);
             if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
                 jl_error("access to invalid slot number");
@@ -472,7 +472,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             if (head == jl_assign_sym) {
                 jl_value_t *lhs = jl_exprarg(stmt, 0);
                 jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
-                if (jl_is_slot(lhs)) {
+                if (jl_is_slotnumber(lhs)) {
                     ssize_t n = jl_slot_number(lhs);
                     assert(n <= jl_source_nslots(s->src) && n > 0);
                     s->locals[n - 1] = rhs;
@@ -608,7 +608,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
         }
         else if (jl_is_newvarnode(stmt)) {
             jl_value_t *var = jl_fieldref(stmt, 0);
-            assert(jl_is_slot(var));
+            assert(jl_is_slotnumber(var));
             ssize_t n = jl_slot_number(var);
             assert(n <= jl_source_nslots(s->src) && n > 0);
             s->locals[n - 1] = NULL;
@@ -626,7 +626,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 
 // preparing method IR for interpreter
 
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi)
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
 {
     jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&mi->uninferred);
     if (jl_is_method(mi->def.value)) {
@@ -636,12 +636,12 @@ jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi)
             }
             else {
                 assert(mi->def.method->generator);
-                src = jl_code_for_staged(mi);
+                src = jl_code_for_staged(mi, world);
             }
         }
         if (src && (jl_value_t*)src != jl_nothing) {
             JL_GC_PUSH1(&src);
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
+            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
             jl_atomic_store_release(&mi->uninferred, (jl_value_t*)src);
             jl_gc_wb(mi, src);
             JL_GC_POP();
@@ -659,9 +659,11 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
 {
     interpreter_state *s;
     jl_method_instance_t *mi = codeinst->def;
-    jl_code_info_t *src = jl_code_for_interpreter(mi);
+    jl_task_t *ct = jl_current_task;
+    size_t world = ct->world_age;
+    jl_code_info_t *src = jl_code_for_interpreter(mi, world);
     jl_array_t *stmts = src->code;
-    assert(jl_typeis(stmts, jl_array_any_type));
+    assert(jl_typetagis(stmts, jl_array_any_type));
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2;
     jl_value_t **locals = NULL;
     JL_GC_PUSHFRAME(s, locals, nroots);
@@ -696,12 +698,12 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     return r;
 }
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret_call;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret_call;
 
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
 {
     jl_method_t *source = oc->source;
-    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_array_t*)source->source);
+    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
     interpreter_state *s;
     unsigned nroots = jl_source_nslots(code) + jl_source_nssavalues(code) + 2;
     jl_task_t *ct = jl_current_task;
@@ -734,8 +736,8 @@ jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **ar
     jl_value_t *r = eval_body(code->code, s, 0, 0);
     locals[0] = r; // GC root
     JL_GC_PROMISE_ROOTED(r);
-    jl_typeassert(r, jl_tparam1(jl_typeof(oc)));
     ct->world_age = last_age;
+    jl_typeassert(r, jl_tparam1(jl_typeof(oc)));
     JL_GC_POP();
     return r;
 }
@@ -746,7 +748,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src);
     JL_GC_PUSHFRAME(s, s->locals, nroots);
     jl_array_t *stmts = src->code;
-    assert(jl_typeis(stmts, jl_array_any_type));
+    assert(jl_typetagis(stmts, jl_array_any_type));
     s->src = src;
     s->module = m;
     s->sparam_vals = jl_emptysvec;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index b822907e63524..7bef27f477534 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -45,10 +45,10 @@ FunctionType *get_intr_args5(LLVMContext &C) { return FunctionType::get(JuliaTyp
 
 const auto &runtime_func() {
     static struct runtime_funcs_t {
-        std::array<JuliaFunction *, num_intrinsics> runtime_func;
+        std::array<JuliaFunction<> *, num_intrinsics> runtime_func;
         runtime_funcs_t() :
         runtime_func{
-#define ADD_I(name, nargs) new JuliaFunction{XSTR(jl_##name), get_intr_args##nargs, nullptr},
+#define ADD_I(name, nargs) new JuliaFunction<>{XSTR(jl_##name), get_intr_args##nargs, nullptr},
 #define ADD_HIDDEN ADD_I
 #define ALIAS(alias, base) nullptr,
     INTRINSICS
@@ -79,12 +79,10 @@ const auto &float_func() {
             float_func[sub_float] = true;
             float_func[mul_float] = true;
             float_func[div_float] = true;
-            float_func[rem_float] = true;
             float_func[add_float_fast] = true;
             float_func[sub_float_fast] = true;
             float_func[mul_float_fast] = true;
             float_func[div_float_fast] = true;
-            float_func[rem_float_fast] = true;
             float_func[fma_float] = true;
             float_func[muladd_float] = true;
             float_func[eq_float] = true;
@@ -110,8 +108,8 @@ const auto &float_func() {
     return float_funcs.float_func;
 }
 
-extern "C"
-JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_impl(void)
+extern "C" JL_DLLEXPORT_CODEGEN
+uint32_t jl_get_LLVM_VERSION_impl(void)
 {
     return 10000 * LLVM_VERSION_MAJOR + 100 * LLVM_VERSION_MINOR
 #ifdef LLVM_VERSION_PATCH
@@ -156,13 +154,13 @@ static Type *FLOATT(Type *t)
 }
 
 // convert an llvm type to same-size int type
-static Type *INTT(Type *t)
+static Type *INTT(Type *t, const DataLayout &DL)
 {
     auto &ctxt = t->getContext();
     if (t->isIntegerTy())
         return t;
     if (t->isPointerTy())
-        return getSizeTy(ctxt);
+        return DL.getIntPtrType(t);
     if (t == getDoubleTy(ctxt))
         return getInt64Ty(ctxt);
     if (t == getFloatTy(ctxt))
@@ -345,22 +343,19 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
         unboxed = emit_bitcast(ctx, unboxed, to);
     }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
-#ifndef JL_NDEBUG
-        const DataLayout &DL = jl_Module->getDataLayout();
-#endif
         assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
         AllocaInst *cast = ctx.builder.CreateAlloca(ty);
         ctx.builder.CreateStore(unboxed, cast);
         unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
     }
     else if (frompointer) {
-        Type *INTT_to = INTT(to);
+        Type *INTT_to = INTT(to, DL);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
         if (INTT_to != to)
             unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     else if (topointer) {
-        Type *INTT_to = INTT(to);
+        Type *INTT_to = INTT(to, DL);
         if (to != INTT_to)
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
@@ -486,11 +481,11 @@ static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
 static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs)
 {
     Function *func = prepare_call(runtime_func()[f]);
-    Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+    SmallVector<Value *> argvalues(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argvalues[i] = boxed(ctx, argv[i]);
     }
-    Value *r = ctx.builder.CreateCall(func, makeArrayRef(argvalues, nargs));
+    Value *r = ctx.builder.CreateCall(func, argvalues);
     return mark_julia_type(ctx, r, true, (jl_value_t*)jl_any_type);
 }
 
@@ -509,20 +504,25 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
     Type *llvmt = bitstype_to_llvm((jl_value_t*)bt, ctx.builder.getContext(), true);
     uint32_t nb = jl_datatype_size(bt);
 
+    Value *bt_value_rt = NULL;
+    if (!jl_is_concrete_type((jl_value_t*)bt)) {
+        bt_value_rt = boxed(ctx, bt_value);
+        emit_concretecheck(ctx, bt_value_rt, "bitcast: target type not a leaf primitive type");
+    }
+
     // Examine the second argument //
     bool isboxed;
     Type *vxt = julia_type_to_llvm(ctx, v.typ, &isboxed);
-
     if (!jl_is_primitivetype(v.typ) || jl_datatype_size(v.typ) != nb) {
-        Value *typ = emit_typeof_boxed(ctx, v);
+        Value *typ = emit_typeof(ctx, v, false, false);
         if (!jl_is_primitivetype(v.typ)) {
             if (jl_is_datatype(v.typ) && !jl_is_abstracttype(v.typ)) {
-                emit_error(ctx, "bitcast: expected primitive type value for second argument");
+                emit_error(ctx, "bitcast: value not a primitive type");
                 return jl_cgval_t();
             }
             else {
                 Value *isprimitive = emit_datatype_isprimitivetype(ctx, typ);
-                error_unless(ctx, isprimitive, "bitcast: expected primitive type value for second argument");
+                error_unless(ctx, isprimitive, "bitcast: value not a primitive type");
             }
         }
         if (jl_is_datatype(v.typ) && !jl_is_abstracttype(v.typ)) {
@@ -575,7 +575,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         return mark_julia_type(ctx, vx, false, bt);
     }
     else {
-        Value *box = emit_allocobj(ctx, nb, boxed(ctx, bt_value));
+        Value *box = emit_allocobj(ctx, nb, bt_value_rt);
         init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, bt->name->wrapper);
     }
@@ -586,6 +586,8 @@ static jl_cgval_t generic_cast(
         intrinsic f, Instruction::CastOps Op,
         const jl_cgval_t *argv, bool toint, bool fromint)
 {
+    auto &TT = ctx.emission_context.TargetTriple;
+    auto &DL = ctx.emission_context.DL;
     const jl_cgval_t &targ = argv[0];
     const jl_cgval_t &v = argv[1];
     jl_datatype_t *jlto = staticeval_bitstype(targ);
@@ -595,11 +597,11 @@ static jl_cgval_t generic_cast(
     Type *to = bitstype_to_llvm((jl_value_t*)jlto, ctx.builder.getContext(), true);
     Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext(), true);
     if (toint)
-        to = INTT(to);
+        to = INTT(to, DL);
     else
         to = FLOATT(to);
     if (fromint)
-        vt = INTT(vt);
+        vt = INTT(vt, DL);
     else
         vt = FLOATT(vt);
     if (!to || !vt)
@@ -608,17 +610,17 @@ static jl_cgval_t generic_cast(
     if (!CastInst::castIsValid(Op, from, to))
         return emit_runtime_call(ctx, f, argv, 2);
     if (Op == Instruction::FPExt) {
-#ifdef JL_NEED_FLOATTEMP_VAR
-        // Target platform might carry extra precision.
-        // Force rounding to single precision first. The reason is that it's
-        // fine to keep working in extended precision as long as it's
-        // understood that everything is implicitly rounded to 23 bits,
-        // but if we start looking at more bits we need to actually do the
-        // rounding first instead of carrying around incorrect low bits.
-        Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
-        ctx.builder.CreateStore(from, jlfloattemp_var);
-        from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
-#endif
+        if (jl_floattemp_var_needed(TT)) {
+            // Target platform might carry extra precision.
+            // Force rounding to single precision first. The reason is that it's
+            // fine to keep working in extended precision as long as it's
+            // understood that everything is implicitly rounded to 23 bits,
+            // but if we start looking at more bits we need to actually do the
+            // rounding first instead of carrying around incorrect low bits.
+            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+            ctx.builder.CreateStore(from, jlfloattemp_var);
+            from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+        }
     }
     Value *ans = ctx.builder.CreateCast(Op, from, to);
     if (f == fptosi || f == fptoui)
@@ -627,7 +629,9 @@ static jl_cgval_t generic_cast(
         return mark_julia_type(ctx, ans, false, jlto);
     }
     else {
-        Value *box = emit_allocobj(ctx, nb, boxed(ctx, targ));
+        Value *targ_rt = boxed(ctx, targ);
+        emit_concretecheck(ctx, targ_rt, std::string(jl_intrinsic_name(f)) + ": target type not a leaf primitive type");
+        Value *box = emit_allocobj(ctx, nb, targ_rt);
         init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, jlto->name->wrapper);
     }
@@ -661,8 +665,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         return jl_cgval_t();
     }
 
-    Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), i, (jl_value_t*)jl_long_type);
-    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+    Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
+    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
 
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
@@ -674,9 +678,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     else if (!jl_isbits(ety)) {
         assert(jl_is_datatype(ety));
         uint64_t size = jl_datatype_size(ety);
-        Value *strct = emit_allocobj(ctx, size,
-                                     literal_pointer_val(ctx, ety));
-        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(getSizeTy(ctx.builder.getContext()),
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety);
+        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
@@ -731,23 +734,23 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
     }
     emit_typecheck(ctx, x, ety, "pointerset");
 
-    Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), i, (jl_value_t*)jl_long_type);
-    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+    Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
+    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
 
     Value *thePtr;
     if (ety == (jl_value_t*)jl_any_type) {
         // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
-        thePtr = emit_unbox(ctx, getSizePtrTy(ctx.builder.getContext()), e, e.typ);
+        thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ);
         Instruction *store = ctx.builder.CreateAlignedStore(
-          ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), getSizeTy(ctx.builder.getContext())),
-            ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), thePtr, im1), Align(align_nb));
+          ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size),
+            ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1), Align(align_nb));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
         ai.decorateInst(store);
     }
     else if (!jl_isbits(ety)) {
         thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
-        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(getSizeTy(ctx.builder.getContext()),
+        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         emit_memcpy(ctx, ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1), jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb);
     }
@@ -819,9 +822,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
 
     if (!jl_isbits(ety)) {
         assert(jl_is_datatype(ety));
-        uint64_t size = jl_datatype_size(ety);
-        Value *strct = emit_allocobj(ctx, size,
-                                     literal_pointer_val(ctx, ety));
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety);
         Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8);
         thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
@@ -1128,6 +1129,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
 
 static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **args, size_t nargs)
 {
+    auto &DL = ctx.emission_context.DL;
     assert(f < num_intrinsics);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
@@ -1141,7 +1143,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     if (f == cglobal_auto || f == cglobal)
         return emit_cglobal(ctx, args, nargs);
 
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         jl_cgval_t arg = emit_expr(ctx, args[i + 1]);
         if (arg.typ == jl_bottom_type) {
@@ -1161,79 +1163,79 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         const jl_cgval_t &x = argv[0];
         jl_value_t *typ = jl_unwrap_unionall(x.typ);
         if (!jl_is_datatype(typ) || ((jl_datatype_t*)typ)->name != jl_array_typename)
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         return mark_julia_type(ctx, emit_arraylen(ctx, x), false, jl_long_type);
     }
     case pointerref:
         ++Emitted_pointerref;
         assert(nargs == 3);
-        return emit_pointerref(ctx, argv);
+        return emit_pointerref(ctx, argv.data());
     case pointerset:
         ++Emitted_pointerset;
         assert(nargs == 4);
-        return emit_pointerset(ctx, argv);
+        return emit_pointerset(ctx, argv.data());
     case atomic_fence:
         ++Emitted_atomic_fence;
         assert(nargs == 1);
-        return emit_atomicfence(ctx, argv);
+        return emit_atomicfence(ctx, argv.data());
     case atomic_pointerref:
         ++Emitted_atomic_pointerref;
         assert(nargs == 2);
-        return emit_atomic_pointerref(ctx, argv);
+        return emit_atomic_pointerref(ctx, argv.data());
     case atomic_pointerset:
     case atomic_pointerswap:
     case atomic_pointermodify:
     case atomic_pointerreplace:
         ++Emitted_atomic_pointerop;
-        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
+        return emit_atomic_pointerop(ctx, f, argv.data(), nargs, nullptr);
     case bitcast:
         ++Emitted_bitcast;
         assert(nargs == 2);
-        return generic_bitcast(ctx, argv);
+        return generic_bitcast(ctx, argv.data());
     case trunc_int:
         ++Emitted_trunc_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::Trunc, argv, true, true);
+        return generic_cast(ctx, f, Instruction::Trunc, argv.data(), true, true);
     case sext_int:
         ++Emitted_sext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SExt, argv, true, true);
+        return generic_cast(ctx, f, Instruction::SExt, argv.data(), true, true);
     case zext_int:
         ++Emitted_zext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::ZExt, argv, true, true);
+        return generic_cast(ctx, f, Instruction::ZExt, argv.data(), true, true);
     case uitofp:
         ++Emitted_uitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::UIToFP, argv, false, true);
+        return generic_cast(ctx, f, Instruction::UIToFP, argv.data(), false, true);
     case sitofp:
         ++Emitted_sitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SIToFP, argv, false, true);
+        return generic_cast(ctx, f, Instruction::SIToFP, argv.data(), false, true);
     case fptoui:
         ++Emitted_fptoui;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToUI, argv, true, false);
+        return generic_cast(ctx, f, Instruction::FPToUI, argv.data(), true, false);
     case fptosi:
         ++Emitted_fptosi;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToSI, argv, true, false);
+        return generic_cast(ctx, f, Instruction::FPToSI, argv.data(), true, false);
     case fptrunc:
         ++Emitted_fptrunc;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPTrunc, argv, false, false);
+        return generic_cast(ctx, f, Instruction::FPTrunc, argv.data(), false, false);
     case fpext:
         ++Emitted_fpext;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPExt, argv, false, false);
+        return generic_cast(ctx, f, Instruction::FPExt, argv.data(), false, false);
 
     case not_int: {
         ++Emitted_not_int;
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!jl_is_primitivetype(x.typ))
-            return emit_runtime_call(ctx, f, argv, nargs);
-        Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true));
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
+        Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true), DL);
         Value *from = emit_unbox(ctx, xt, x, x.typ);
         Value *ans = ctx.builder.CreateNot(from);
         return mark_julia_type(ctx, ans, false, x.typ);
@@ -1244,7 +1246,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!x.constant || !jl_is_datatype(x.constant))
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         jl_datatype_t *dt = (jl_datatype_t*) x.constant;
 
         // select the appropriated overloaded intrinsic
@@ -1254,7 +1256,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         else if (dt == jl_float64_type)
             intr_name += "f64";
         else
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
 
         FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, getInt1Ty(ctx.builder.getContext()));
         auto ret = ctx.builder.CreateCall(intr);
@@ -1267,14 +1269,14 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 
         // verify argument types
         if (!jl_is_primitivetype(xinfo.typ))
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         Type *xtyp = bitstype_to_llvm(xinfo.typ, ctx.builder.getContext(), true);
         if (float_func()[f])
             xtyp = FLOATT(xtyp);
         else
-            xtyp = INTT(xtyp);
+            xtyp = INTT(xtyp, DL);
         if (!xtyp)
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         ////Bool are required to be in the range [0,1]
         ////so while they are represented as i8,
         ////the operations need to be done in mod 1
@@ -1285,31 +1287,31 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         //if (xtyp == (jl_value_t*)jl_bool_type)
         //    r = getInt1Ty(ctx.builder.getContext());
 
-        Type **argt = (Type**)alloca(sizeof(Type*) * nargs);
+        SmallVector<Type *> argt(nargs);
         argt[0] = xtyp;
 
         if (f == shl_int || f == lshr_int || f == ashr_int) {
             if (!jl_is_primitivetype(argv[1].typ))
-                return emit_runtime_call(ctx, f, argv, nargs);
-            argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true));
+                return emit_runtime_call(ctx, f, argv.data(), nargs);
+            argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true), DL);
         }
         else {
             for (size_t i = 1; i < nargs; ++i) {
                 if (xinfo.typ != argv[i].typ)
-                    return emit_runtime_call(ctx, f, argv, nargs);
+                    return emit_runtime_call(ctx, f, argv.data(), nargs);
                 argt[i] = xtyp;
             }
         }
 
         // unbox the arguments
-        Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+        SmallVector<Value *> argvalues(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argvalues[i] = emit_unbox(ctx, argt[i], argv[i], argv[i].typ);
         }
 
         // call the intrinsic
         jl_value_t *newtyp = xinfo.typ;
-        Value *r = emit_untyped_intrinsic(ctx, f, argvalues, nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
+        Value *r = emit_untyped_intrinsic(ctx, f, argvalues.data(), nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
         // Turn Bool operations into mod 1 now, if needed
         if (newtyp == (jl_value_t*)jl_bool_type && !r->getType()->isIntegerTy(1))
             r = ctx.builder.CreateTrunc(r, getInt1Ty(ctx.builder.getContext()));
@@ -1365,12 +1367,10 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     case sub_float: return math_builder(ctx)().CreateFSub(x, y);
     case mul_float: return math_builder(ctx)().CreateFMul(x, y);
     case div_float: return math_builder(ctx)().CreateFDiv(x, y);
-    case rem_float: return math_builder(ctx)().CreateFRem(x, y);
     case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
     case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
     case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);
     case div_float_fast: return math_builder(ctx, true)().CreateFDiv(x, y);
-    case rem_float_fast: return math_builder(ctx, true)().CreateFRem(x, y);
     case fma_float: {
         assert(y->getType() == x->getType());
         assert(z->getType() == y->getType());
@@ -1413,7 +1413,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         jl_value_t *params[2];
         params[0] = xtyp;
         params[1] = (jl_value_t*)jl_bool_type;
-        jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+        jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
         *newtyp = tuptyp;
 
         Value *tupval;
@@ -1469,7 +1469,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
 
     case fpiseq: {
         *newtyp = jl_bool_type;
-        Type *it = INTT(t);
+        Type *it = INTT(t, ctx.emission_context.DL);
         Value *xi = ctx.builder.CreateBitCast(x, it);
         Value *yi = ctx.builder.CreateBitCast(y, it);
         return ctx.builder.CreateOr(ctx.builder.CreateAnd(ctx.builder.CreateFCmpUNO(x, x),
diff --git a/src/intrinsics.h b/src/intrinsics.h
index bb67460bbb31f..93747faa74160 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -19,7 +19,6 @@
     ADD_I(sub_float, 2) \
     ADD_I(mul_float, 2) \
     ADD_I(div_float, 2) \
-    ADD_I(rem_float, 2) \
     ADD_I(fma_float, 3) \
     ADD_I(muladd_float, 3) \
     /*  fast arithmetic */ \
@@ -28,7 +27,6 @@
     ALIAS(sub_float_fast, sub_float) \
     ALIAS(mul_float_fast, mul_float) \
     ALIAS(div_float_fast, div_float) \
-    ALIAS(rem_float_fast, rem_float) \
     /*  same-type comparisons */ \
     ADD_I(eq_int, 2) \
     ADD_I(ne_int, 2) \
diff --git a/src/ircode.c b/src/ircode.c
index 0e74f7700ebf2..4121d6691aa5b 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -147,7 +147,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (v == (jl_value_t*)jl_base_module) {
         write_uint8(s->s, TAG_BASE);
     }
-    else if (jl_typeis(v, jl_string_type) && jl_string_len(v) == 0) {
+    else if (jl_typetagis(v, jl_string_tag << 4) && jl_string_len(v) == 0) {
         jl_encode_value(s, jl_an_empty_string);
     }
     else if (v == (jl_value_t*)s->method->module) {
@@ -197,7 +197,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_LONG_SSAVALUE);
         write_uint16(s->s, ((jl_ssavalue_t*)v)->id);
     }
-    else if (jl_typeis(v, jl_slotnumber_type) && jl_slot_number(v) <= UINT16_MAX && jl_slot_number(v) >= 0) {
+    else if (jl_typetagis(v, jl_slotnumber_type) && jl_slot_number(v) <= UINT16_MAX && jl_slot_number(v) >= 0) {
         write_uint8(s->s, TAG_SLOTNUMBER);
         write_uint16(s->s, jl_slot_number(v));
     }
@@ -299,7 +299,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         else
             jl_encode_value(s, inner);
     }
-    else if (jl_typeis(v, jl_int64_type)) {
+    else if (jl_typetagis(v, jl_int64_tag << 4)) {
         void *data = jl_data_ptr(v);
         if (*(int64_t*)data >= INT16_MIN && *(int64_t*)data <= INT16_MAX) {
             write_uint8(s->s, TAG_SHORTER_INT64);
@@ -314,14 +314,14 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
             write_uint64(s->s, *(int64_t*)data);
         }
     }
-    else if (jl_typeis(v, jl_int32_type)) {
+    else if (jl_typetagis(v, jl_int32_tag << 4)) {
         jl_encode_int32(s, *(int32_t*)jl_data_ptr(v));
     }
-    else if (jl_typeis(v, jl_uint8_type)) {
+    else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         write_uint8(s->s, TAG_UINT8);
         write_int8(s->s, *(int8_t*)jl_data_ptr(v));
     }
-    else if (jl_typeis(v, jl_lineinfonode_type)) {
+    else if (jl_typetagis(v, jl_lineinfonode_type)) {
         write_uint8(s->s, TAG_LINEINFO);
         for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++)
             jl_encode_value(s, jl_get_nth_field(v, i));
@@ -330,7 +330,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_SINGLETON);
         jl_encode_value(s, jl_typeof(v));
     }
-    else if (as_literal && jl_typeis(v, jl_string_type)) {
+    else if (as_literal && jl_typetagis(v, jl_string_tag << 4)) {
         write_uint8(s->s, TAG_STRING);
         write_int32(s->s, jl_string_len(v));
         ios_write(s->s, jl_string_data(v), jl_string_len(v));
@@ -385,7 +385,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         }
     }
     else if (as_literal || jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_linenode(v) ||
-             jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slot(v) || jl_is_ssavalue(v) ||
+             jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slotnumber(v) || jl_is_ssavalue(v) ||
              (jl_isbits(jl_typeof(v)) && jl_datatype_size(jl_typeof(v)) <= 64)) {
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
         size_t tsz = jl_datatype_size(t);
@@ -434,13 +434,12 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
 }
 
-static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, uint8_t pure,
+static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds,
                                             uint8_t has_fcall, uint8_t inlining, uint8_t constprop)
 {
     jl_code_info_flags_t flags;
     flags.bits.inferred = inferred;
     flags.bits.propagate_inbounds = propagate_inbounds;
-    flags.bits.pure = pure;
     flags.bits.has_fcall = has_fcall;
     flags.bits.inlining = inlining;
     flags.bits.constprop = constprop;
@@ -611,9 +610,12 @@ static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DI
 {
     int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s));
     jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL);
-    jl_set_typeof(v, (void*)(intptr_t)0x50);
+    jl_set_typeof(v, (void*)(intptr_t)0xf50);
     jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s);
-    jl_set_typeof(v, dt);
+    if (dt->smalltag)
+        jl_set_typetagof(v, dt->smalltag, 0);
+    else
+        jl_set_typeof(v, dt);
     char *data = (char*)jl_data_ptr(v);
     size_t i, np = dt->layout->npointers;
     char *start = data;
@@ -759,9 +761,11 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
 
 // --- entry points ---
 
-JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
+typedef jl_value_t jl_string_t; // for local expressibility
+
+JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
 {
-    JL_TIMING(AST_COMPRESS);
+    JL_TIMING(AST_COMPRESS, AST_COMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
     assert(jl_is_method(m));
     assert(jl_is_code_info(code));
@@ -781,7 +785,7 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
         1
     };
 
-    jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, code->pure,
+    jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds,
                                                  code->has_fcall, code->inlining, code->constprop);
     write_uint8(s.s, flags.packed);
     write_uint8(s.s, code->purity.bits);
@@ -839,7 +843,7 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
     write_uint8(s.s, s.relocatability);
 
     ios_flush(s.s);
-    jl_array_t *v = jl_take_buffer(&dest);
+    jl_string_t *v = jl_pchar_to_string(s.s->buf, s.s->size);
     ios_close(s.s);
     if (jl_array_len(m->roots) == 0) {
         m->roots = NULL;
@@ -852,19 +856,19 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
     return v;
 }
 
-JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_array_t *data)
+JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return (jl_code_info_t*)data;
-    JL_TIMING(AST_UNCOMPRESS);
+    JL_TIMING(AST_UNCOMPRESS, AST_UNCOMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
     assert(jl_is_method(m));
-    assert(jl_typeis(data, jl_array_uint8_type));
+    assert(jl_is_string(data));
     size_t i;
     ios_t src;
     ios_mem(&src, 0);
-    ios_setbuf(&src, (char*)data->data, jl_array_len(data), 0);
-    src.size = jl_array_len(data);
+    ios_setbuf(&src, (char*)jl_string_data(data), jl_string_len(data), 0);
+    src.size = jl_string_len(data);
     int en = jl_gc_enable(0); // Might GC
     jl_ircode_state s = {
         &src,
@@ -880,7 +884,6 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     code->constprop = flags.bits.constprop;
     code->inferred = flags.bits.inferred;
     code->propagate_inbounds = flags.bits.propagate_inbounds;
-    code->pure = flags.bits.pure;
     code->has_fcall = flags.bits.has_fcall;
     code->purity.bits = read_uint8(s.s);
     code->inlining_cost = read_uint16(s.s);
@@ -938,52 +941,42 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     return code;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_array_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inferred;
-    assert(jl_typeis(data, jl_array_uint8_type));
+    assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
+    flags.packed = jl_string_data(data)[0];
     return flags.bits.inferred;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_array_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlining;
-    assert(jl_typeis(data, jl_array_uint8_type));
+    assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
+    flags.packed = jl_string_data(data)[0];
     return flags.bits.inlining;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data)
-{
-    if (jl_is_code_info(data))
-        return ((jl_code_info_t*)data)->pure;
-    assert(jl_typeis(data, jl_array_uint8_type));
-    jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
-    return flags.bits.pure;
-}
-
-JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_array_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->has_fcall;
-    assert(jl_typeis(data, jl_array_uint8_type));
+    assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
+    flags.packed = jl_string_data(data)[0];
     return flags.bits.has_fcall;
 }
 
-JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_array_t *data)
+JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlining_cost;
-    assert(jl_typeis(data, jl_array_uint8_type));
-    uint16_t res = jl_load_unaligned_i16((char*)data->data + 2);
+    assert(jl_is_string(data));
+    uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + 2);
     return res;
 }
 
@@ -1013,26 +1006,26 @@ JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
     return str;
 }
 
-JL_DLLEXPORT ssize_t jl_ir_nslots(jl_array_t *data)
+JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data)
 {
     if (jl_is_code_info(data)) {
         jl_code_info_t *func = (jl_code_info_t*)data;
         return jl_array_len(func->slotnames);
     }
     else {
-        assert(jl_typeis(data, jl_array_uint8_type));
-        int nslots = jl_load_unaligned_i32((char*)data->data + 2 + sizeof(uint16_t));
+        assert(jl_is_string(data));
+        int nslots = jl_load_unaligned_i32(jl_string_data(data) + 2 + sizeof(uint16_t));
         return nslots;
     }
 }
 
-JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_array_t *data, size_t i)
+JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_string_t *data, size_t i)
 {
     assert(i < jl_ir_nslots(data));
     if (jl_is_code_info(data))
         return ((uint8_t*)((jl_code_info_t*)data)->slotflags->data)[i];
-    assert(jl_typeis(data, jl_array_uint8_type));
-    return ((uint8_t*)data->data)[2 + sizeof(uint16_t) + sizeof(int32_t) + i];
+    assert(jl_is_string(data));
+    return jl_string_data(data)[2 + sizeof(uint16_t) + sizeof(int32_t) + i];
 }
 
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms)
@@ -1114,13 +1107,13 @@ void jl_init_serializer(void)
                      jl_densearray_type, jl_function_type, jl_typename_type,
                      jl_builtin_type, jl_task_type, jl_uniontype_type,
                      jl_array_any_type, jl_intrinsic_type,
-                     jl_abstractslot_type, jl_methtable_type, jl_typemap_level_type,
+                     jl_methtable_type, jl_typemap_level_type,
                      jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type,
                      jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type),
                      jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type,
                      jl_typeofbottom_type, jl_typeofbottom_type->super,
                      jl_namedtuple_type, jl_array_int32_type,
-                     jl_typedslot_type, jl_uint32_type, jl_uint64_type,
+                     jl_uint32_type, jl_uint64_type,
                      jl_type_type_mt, jl_nonfunction_mt,
                      jl_opaque_closure_type,
 
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index b744f9fcbd3f2..895fed9c056e2 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -12,9 +12,9 @@
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/ExecutionEngine/Orc/CompileUtils.h>
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
-#if JL_LLVM_VERSION >= 130000
+#include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
-#endif
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
 #include <llvm/Support/FormattedStream.h>
@@ -26,11 +26,7 @@
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/Support/Host.h>
 #include <llvm/Support/TargetSelect.h>
@@ -44,9 +40,7 @@ using namespace llvm;
 #include "processor.h"
 
 #ifdef JL_USE_JITLINK
-# if JL_LLVM_VERSION >= 140000
-#  include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
-# endif
+# include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
 # include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
 # include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
 # if JL_LLVM_VERSION >= 150000
@@ -94,44 +88,44 @@ extern "C" {
 
 enum class MSanTLS
 {
-	param = 1,             // __msan_param_tls
-	param_origin,          //__msan_param_origin_tls
-	retval,                // __msan_retval_tls
-	retval_origin,         //__msan_retval_origin_tls
-	va_arg,                // __msan_va_arg_tls
-	va_arg_origin,         // __msan_va_arg_origin_tls
-	va_arg_overflow_size,  // __msan_va_arg_overflow_size_tls
-	origin,                //__msan_origin_tls
+    param = 1,             // __msan_param_tls
+    param_origin,          //__msan_param_origin_tls
+    retval,                // __msan_retval_tls
+    retval_origin,         //__msan_retval_origin_tls
+    va_arg,                // __msan_va_arg_tls
+    va_arg_origin,         // __msan_va_arg_origin_tls
+    va_arg_overflow_size,  // __msan_va_arg_overflow_size_tls
+    origin,                //__msan_origin_tls
 };
 
 static void *getTLSAddress(void *control)
 {
-	auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
-	switch(tlsIndex)
-	{
-	case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
-	case MSanTLS::param_origin: return reinterpret_cast<void *>(&__msan_param_origin_tls);
-	case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
-	case MSanTLS::retval_origin: return reinterpret_cast<void *>(&__msan_retval_origin_tls);
-	case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
-	case MSanTLS::va_arg_origin: return reinterpret_cast<void *>(&__msan_va_arg_origin_tls);
-	case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
-	case MSanTLS::origin: return reinterpret_cast<void *>(&__msan_origin_tls);
-	default:
-		assert(false && "BAD MSAN TLS INDEX");
-		return nullptr;
-	}
+    auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
+    switch(tlsIndex)
+    {
+    case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
+    case MSanTLS::param_origin: return reinterpret_cast<void *>(&__msan_param_origin_tls);
+    case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
+    case MSanTLS::retval_origin: return reinterpret_cast<void *>(&__msan_retval_origin_tls);
+    case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
+    case MSanTLS::va_arg_origin: return reinterpret_cast<void *>(&__msan_va_arg_origin_tls);
+    case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
+    case MSanTLS::origin: return reinterpret_cast<void *>(&__msan_origin_tls);
+    default:
+        assert(false && "BAD MSAN TLS INDEX");
+        return nullptr;
+    }
 }
 }
 #endif
 
 // Snooping on which functions are being compiled, and how long it takes
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_compiles_impl(void *s)
 {
     **jl_ExecutionEngine->get_dump_compiles_stream() = (ios_t*)s;
 }
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_llvm_opt_impl(void *s)
 {
     **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s;
@@ -159,6 +153,7 @@ void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
     else {
         GV->setConstant(true);
         GV->setLinkage(GlobalValue::PrivateLinkage);
+        GV->setVisibility(GlobalValue::DefaultVisibility);
         GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
     }
 }
@@ -178,7 +173,8 @@ static jl_callptr_t _jl_compile_codeinst(
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
         size_t world,
-        orc::ThreadSafeContext context)
+        orc::ThreadSafeContext context,
+        bool is_recompile)
 {
     // caller must hold codegen_lock
     // and have disabled finalizers
@@ -190,17 +186,23 @@ static jl_callptr_t _jl_compile_codeinst(
     assert(jl_is_code_instance(codeinst));
     assert(codeinst->min_world <= world && (codeinst->max_world >= world || codeinst->max_world == 0) &&
         "invalid world for method-instance");
-    assert(src && jl_is_code_info(src));
 
+    JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
+#ifdef USE_TRACY
+    if (is_recompile) {
+        TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
+        TracyCZoneColor(ctx, 0xFFA500);
+    }
+#endif
     jl_callptr_t fptr = NULL;
     // emit the code in LLVM IR form
-    jl_codegen_params_t params(std::move(context)); // Locks the context
+    jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
     params.cache = true;
     params.world = world;
     jl_workqueue_t emitted;
     {
         orc::ThreadSafeModule result_m =
-            jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging);
+            jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging, params.DL, params.TargetTriple);
         jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params);
         if (result_m)
             emitted[codeinst] = {std::move(result_m), std::move(decls)};
@@ -245,10 +247,13 @@ static jl_callptr_t _jl_compile_codeinst(
         MaxWorkqueueSize.updateMax(emitted.size());
         IndirectCodeinsts += emitted.size() - 1;
     }
-    JL_TIMING(LLVM_MODULE_FINISH);
 
+    size_t i = 0;
     for (auto &def : emitted) {
         jl_code_instance_t *this_code = def.first;
+        if (i < jl_timing_print_limit)
+            jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_CURRENT_BLOCK);
+
         jl_llvm_functions_t decls = std::get<1>(def.second);
         jl_callptr_t addr;
         bool isspecsig = false;
@@ -258,26 +263,45 @@ static jl_callptr_t _jl_compile_codeinst(
         else if (decls.functionObject == "jl_fptr_sparam") {
             addr = jl_fptr_sparam_addr;
         }
+        else if (decls.functionObject == "jl_f_opaque_closure_call") {
+            addr = jl_f_opaque_closure_call_addr;
+        }
         else {
             addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
             isspecsig = true;
         }
-        if (jl_atomic_load_relaxed(&this_code->invoke) == NULL) {
-            // once set, don't change invoke-ptr, as that leads to race conditions
-            // with the (not) simultaneous updates to invoke and specptr
-            if (!decls.specFunctionObject.empty()) {
-                jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
-                this_code->isspecsig = isspecsig;
+        if (!decls.specFunctionObject.empty()) {
+            void *prev_specptr = NULL;
+            auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
+            if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
+                // only set specsig and invoke if we were the first to set specptr
+                jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
+                // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
+                // either assumes that specptr was null, doesn't care about specptr,
+                // or will wait until specsigflags has 0b10 set before reloading invoke
+                jl_atomic_store_release(&this_code->invoke, addr);
+                jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
+            } else {
+                //someone else beat us, don't commit any results
+                while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
+                    jl_cpu_pause();
+                }
+                addr = jl_atomic_load_relaxed(&this_code->invoke);
+            }
+        } else {
+            jl_callptr_t prev_invoke = NULL;
+            if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                addr = prev_invoke;
+                //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
+                //known lesser function)
             }
-            jl_atomic_store_release(&this_code->invoke, addr);
-        }
-        else if (jl_atomic_load_relaxed(&this_code->invoke) == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
-            // hack to export this pointer value to jl_dump_method_disasm
-            jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
         }
         if (this_code == codeinst)
             fptr = addr;
+        i++;
     }
+    if (i > jl_timing_print_limit)
+        jl_timing_printf(JL_TIMING_CURRENT_BLOCK, "... <%d methods truncated>", i - 10);
 
     uint64_t end_time = 0;
     if (timed)
@@ -300,11 +324,13 @@ static jl_callptr_t _jl_compile_codeinst(
 const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 // compile a C-callable alias
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
 {
     auto ct = jl_current_task;
-    ct->reentrant_timing++;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
@@ -321,7 +347,10 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
         into = &backing;
     }
     JL_LOCK(&jl_codegen_lock);
-    jl_codegen_params_t params(into->getContext());
+    auto target_info = into->withModuleDo([&](Module &M) {
+        return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+    });
+    jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second));
     if (pparams == NULL)
         pparams = &params;
     assert(pparams->tsctx.getContext() == into->getContext().getContext());
@@ -341,9 +370,12 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
             jl_ExecutionEngine->addModule(std::move(*into));
     }
     JL_UNLOCK(&jl_codegen_lock);
-    if (!--ct->reentrant_timing && measure_compile_time_enabled) {
-        auto end = jl_hrtime();
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
     }
     if (ctx.getContext()) {
         jl_ExecutionEngine->releaseContext(std::move(ctx));
@@ -352,7 +384,7 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
 }
 
 // declare a C-callable entry point; called during code loading from the toplevel
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 {
     // validate arguments. try to do as many checks as possible here to avoid
@@ -395,11 +427,13 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 }
 
 // this compiles li and emits fptr
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
 {
     auto ct = jl_current_task;
-    ct->reentrant_timing++;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     bool is_recompile = false;
@@ -407,16 +441,18 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
         compiler_start_time = jl_hrtime();
     // if we don't have any decls already, try to generate it now
     jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
+    jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH2(&src, &codeinst);
     JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-    jl_value_t *ci = jl_rettype_inferred(mi, world, world);
-    jl_code_instance_t *codeinst = (ci == jl_nothing ? NULL : (jl_code_instance_t*)ci);
+    jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
+    if (ci != jl_nothing)
+        codeinst = (jl_code_instance_t*)ci;
     if (codeinst) {
         src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
         if ((jl_value_t*)src == jl_nothing)
             src = NULL;
         else if (jl_is_method(mi->def.method))
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
     }
     else {
         // identify whether this is an invalidated method that is being recompiled
@@ -444,7 +480,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
             }
         }
         ++SpecFPtrCount;
-        _jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext());
+        _jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), is_recompile);
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL)
             codeinst = NULL;
     }
@@ -452,25 +488,43 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
         codeinst = NULL;
     }
     JL_UNLOCK(&jl_codegen_lock);
-    if (!--ct->reentrant_timing && measure_compile_time_enabled) {
-        uint64_t t_comp = jl_hrtime() - compiler_start_time;
-        if (is_recompile) {
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            uint64_t t_comp = jl_hrtime() - compiler_start_time;
+            if (is_recompile) {
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
+            }
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
         }
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
+        ct->reentrant_timing &= ~1ull;
     }
     JL_GC_POP();
     return codeinst;
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_generate_fptr_for_oc_wrapper_impl(jl_code_instance_t *oc_wrap)
+{
+    if (jl_atomic_load_relaxed(&oc_wrap->invoke) != NULL) {
+        return;
+    }
+    JL_LOCK(&jl_codegen_lock);
+    if (jl_atomic_load_relaxed(&oc_wrap->invoke) == NULL) {
+        _jl_compile_codeinst(oc_wrap, NULL, 1, *jl_ExecutionEngine->getContext(), 0);
+    }
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
 {
     if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) {
         return;
     }
     auto ct = jl_current_task;
-    ct->reentrant_timing++;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
@@ -486,40 +540,42 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
                 // TODO: this is wrong
                 assert(def->generator);
                 // TODO: jl_code_for_staged can throw
-                src = jl_code_for_staged(unspec->def);
+                src = jl_code_for_staged(unspec->def, ~(size_t)0);
             }
             if (src && (jl_value_t*)src != jl_nothing)
-                src = jl_uncompress_ir(def, NULL, (jl_array_t*)src);
+                src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
         }
         else {
             src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred);
         }
         assert(src && jl_is_code_info(src));
         ++UnspecFPtrCount;
-        _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext());
-        if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
-            // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-            jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
-        }
+        _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0);
+        jl_callptr_t null = nullptr;
+        // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
+        jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
         JL_GC_POP();
     }
     JL_UNLOCK(&jl_codegen_lock); // Might GC
-    if (!--ct->reentrant_timing && measure_compile_time_enabled) {
-        auto end = jl_hrtime();
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
     }
 }
 
 
 // get a native disassembly for a compiled method
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
         char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
     jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
     if (codeinst) {
-        uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+        uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
         if (getwrapper)
             return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
@@ -528,7 +584,9 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
             // (using sentinel value `1` instead)
             // so create an exception here so we can print pretty our lies
             auto ct = jl_current_task;
-            ct->reentrant_timing++;
+            bool timed = (ct->reentrant_timing & 1) == 0;
+            if (timed)
+                ct->reentrant_timing |= 1;
             uint64_t compiler_start_time = 0;
             uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
             if (measure_compile_time_enabled)
@@ -542,25 +600,28 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
                 if (jl_is_method(def)) {
                     if (!src) {
                         // TODO: jl_code_for_staged can throw
-                        src = def->generator ? jl_code_for_staged(mi) : (jl_code_info_t*)def->source;
+                        src = def->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)def->source;
                     }
                     if (src && (jl_value_t*)src != jl_nothing)
-                        src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+                        src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
                 }
-                fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+                fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
                 specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                 if (src && jl_is_code_info(src)) {
                     if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext());
+                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), 0);
                         specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                     }
                 }
                 JL_GC_POP();
             }
             JL_UNLOCK(&jl_codegen_lock);
-            if (!--ct->reentrant_timing && measure_compile_time_enabled) {
-                auto end = jl_hrtime();
-                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+            if (timed) {
+                if (measure_compile_time_enabled) {
+                    auto end = jl_hrtime();
+                    jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+                }
+                ct->reentrant_timing &= ~1ull;
             }
         }
         if (specfptr != 0)
@@ -753,11 +814,7 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
                 auto SecName = Sec.getName();
 #endif
                 // https://github.com/llvm/llvm-project/commit/118e953b18ff07d00b8f822dfbf2991e41d6d791
-#if JL_LLVM_VERSION >= 140000
                Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart().getValue();
-#else
-               Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart();
-#endif
             }
             return Error::success();
         });
@@ -787,10 +844,27 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
                           jitlink::PassConfiguration &Config) override {
         Config.PostAllocationPasses.push_back([this](jitlink::LinkGraph &G) {
             size_t graph_size = 0;
+            size_t code_size = 0;
+            size_t data_size = 0;
             for (auto block : G.blocks()) {
                 graph_size += block->getSize();
             }
+            for (auto &section : G.sections()) {
+                size_t secsize = 0;
+                for (auto block : section.blocks()) {
+                    secsize += block->getSize();
+                }
+                if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) {
+                    data_size += secsize;
+                } else {
+                    code_size += secsize;
+                }
+                graph_size += secsize;
+            }
             this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
+            jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
+            jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
+            jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
             return Error::success();
         });
     }
@@ -799,9 +873,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 // TODO: Port our memory management optimisations to JITLink instead of using the
 // default InProcessMemoryManager.
 std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() {
-#if JL_LLVM_VERSION < 140000
-    return std::make_unique<jitlink::InProcessMemoryManager>();
-#elif JL_LLVM_VERSION < 150000
+#if JL_LLVM_VERSION < 150000
     return cantFail(jitlink::InProcessMemoryManager::Create());
 #else
     return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>());
@@ -811,17 +883,11 @@ std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() {
 
 # ifdef LLVM_SHLIB
 
-#  if JL_LLVM_VERSION >= 140000
-#   define EHFRAME_RANGE(name) orc::ExecutorAddrRange name
-#   define UNPACK_EHFRAME_RANGE(name) \
+# define EHFRAME_RANGE(name) orc::ExecutorAddrRange name
+# define UNPACK_EHFRAME_RANGE(name) \
         name.Start.toPtr<uint8_t *>(), \
         static_cast<size_t>(name.size())
-#  else
-#   define EHFRAME_RANGE(name) JITTargetAddress name##Addr, size_t name##Size
-#   define UNPACK_EHFRAME_RANGE(name) \
-        jitTargetAddressToPointer<uint8_t *>(name##Addr), \
-        name##Size
-#  endif
+
 
 class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
 public:
@@ -943,32 +1009,22 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
 namespace {
     static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
         TargetOptions options = TargetOptions();
-#if defined(_OS_WINDOWS_)
+
+        Triple TheTriple(sys::getProcessTriple());
         // use ELF because RuntimeDyld COFF i686 support didn't exist
         // use ELF because RuntimeDyld COFF X86_64 doesn't seem to work (fails to generate function pointers)?
-#define FORCE_ELF
+        bool force_elf = TheTriple.isOSWindows();
+#ifdef FORCE_ELF
+        force_elf = true;
 #endif
+        if (force_elf) {
+            TheTriple.setObjectFormat(Triple::ELF);
+        }
         //options.PrintMachineCode = true; //Print machine code produced during JIT compiling
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) && JL_LLVM_VERSION < 130000
-        // tell Win32 to assume the stack is always 16-byte aligned,
-        // and to ensure that it is 16-byte aligned for out-going calls,
-        // to ensure compatibility with GCC codes
-        // In LLVM 13 and onwards this has turned into a module option
-        options.StackAlignmentOverride = 16;
-#endif
-#if defined(JL_DEBUG_BUILD) && JL_LLVM_VERSION < 130000
-        // LLVM defaults to tls stack guard, which causes issues with Julia's tls implementation
-        options.StackProtectorGuard = StackProtectorGuards::Global;
-#endif
 #if defined(MSAN_EMUTLS_WORKAROUND)
         options.EmulatedTLS = true;
         options.ExplicitEmulatedTLS = true;
 #endif
-
-        Triple TheTriple(sys::getProcessTriple());
-#if defined(FORCE_ELF)
-        TheTriple.setObjectFormat(Triple::ELF);
-#endif
         uint32_t target_flags = 0;
         auto target = jl_get_llvm_target(imaging_default(), target_flags);
         auto &TheCPU = target.first;
@@ -1021,11 +1077,11 @@ namespace {
                 );
         assert(TM && "Failed to select target machine -"
                      " Is the LLVM backend for this CPU enabled?");
-        #if (!defined(_CPU_ARM_) && !defined(_CPU_PPC64_))
-        // FastISel seems to be buggy for ARM. Ref #13321
-        if (jl_options.opt_level < 2)
-            TM->setFastISel(true);
-        #endif
+        if (!TheTriple.isARM() && !TheTriple.isPPC64()) {
+            // FastISel seems to be buggy for ARM. Ref #13321
+            if (jl_options.opt_level < 2)
+                TM->setFastISel(true);
+        }
         return std::unique_ptr<TargetMachine>(TM);
     }
 } // namespace
@@ -1065,6 +1121,9 @@ namespace {
         int optlevel;
         PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
             : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
+        // overload for newpm compatibility
+        PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &) JL_NOTSAFEPOINT
+            : PMCreator(TM, optlevel) {}
         PMCreator(const PMCreator &other) JL_NOTSAFEPOINT
             : PMCreator(*other.TM, other.optlevel) {}
         PMCreator(PMCreator &&other) JL_NOTSAFEPOINT
@@ -1090,18 +1149,23 @@ namespace {
     struct PMCreator {
         orc::JITTargetMachineBuilder JTMB;
         OptimizationLevel O;
-        PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
-            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)) {}
+        std::vector<std::function<void()>> &printers;
+        PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
+            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}
 
         auto operator()() JL_NOTSAFEPOINT {
-            return std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
+            auto NPM = std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
+            printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
+                NPM->printTimers();
+            });
+            return NPM;
         }
     };
 #endif
 
     struct OptimizerT {
-        OptimizerT(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
-            : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
+        OptimizerT(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
+            : optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {}
         OptimizerT(OptimizerT&) JL_NOTSAFEPOINT = delete;
         OptimizerT(OptimizerT&&) JL_NOTSAFEPOINT = default;
 
@@ -1137,7 +1201,7 @@ namespace {
                     }
                 }
 
-                JL_TIMING(LLVM_OPT);
+                JL_TIMING(LLVM_OPT, LLVM_OPT);
 
                 //Run the optimization
                 assert(!verifyModule(M, &errs()));
@@ -1209,27 +1273,24 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     return jl_data_layout;
 }
 
-JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
+JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers)
   : CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
       std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
     OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer,
-            llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel))) {}
+            llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel, PrintLLVMTimers))) {}
+
+#ifdef _COMPILER_ASAN_ENABLED_
+int64_t ___asan_globals_registered;
+#endif
 
 JuliaOJIT::JuliaOJIT()
   : TM(createTargetMachine()),
     DL(jl_create_datalayout(*TM)),
-#if JL_LLVM_VERSION >= 130000
     ES(cantFail(orc::SelfExecutorProcessControl::Create())),
-#else
-    ES(),
-#endif
     GlobalJD(ES.createBareJITDylib("JuliaGlobals")),
     JD(ES.createBareJITDylib("JuliaOJIT")),
     ContextPool([](){
         auto ctx = std::make_unique<LLVMContext>();
-#ifdef JL_LLVM_OPAQUE_POINTERS
-        ctx->setOpaquePointers(true);
-#endif
         return orc::ThreadSafeContext(std::move(ctx));
     }),
 #ifdef JL_USE_JITLINK
@@ -1247,10 +1308,10 @@ JuliaOJIT::JuliaOJIT()
 #endif
     LockLayer(ObjectLayer),
     Pipelines{
-        std::make_unique<PipelineT>(LockLayer, *TM, 0),
-        std::make_unique<PipelineT>(LockLayer, *TM, 1),
-        std::make_unique<PipelineT>(LockLayer, *TM, 2),
-        std::make_unique<PipelineT>(LockLayer, *TM, 3),
+        std::make_unique<PipelineT>(LockLayer, *TM, 0, PrintLLVMTimers),
+        std::make_unique<PipelineT>(LockLayer, *TM, 1, PrintLLVMTimers),
+        std::make_unique<PipelineT>(LockLayer, *TM, 2, PrintLLVMTimers),
+        std::make_unique<PipelineT>(LockLayer, *TM, 3, PrintLLVMTimers),
     },
     OptSelLayer(Pipelines)
 {
@@ -1276,27 +1337,39 @@ JuliaOJIT::JuliaOJIT()
         });
 #endif
 
+    std::string ErrorStr;
+
+    // Make sure that libjulia-internal is loaded and placed first in the
+    // DynamicLibrary order so that calls to runtime intrinsics are resolved
+    // to the correct library when multiple libjulia-*'s have been loaded
+    // (e.g. when we `ccall` into a PackageCompiler.jl-created shared library)
+    sys::DynamicLibrary libjulia_internal_dylib = sys::DynamicLibrary::addPermanentLibrary(
+      jl_libjulia_internal_handle, &ErrorStr);
+    if(!ErrorStr.empty())
+        report_fatal_error(llvm::Twine("FATAL: unable to dlopen libjulia-internal\n") + ErrorStr);
+
     // Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
     // symbols in the program as well. The nullptr argument to the function
     // tells DynamicLibrary to load the program, not a library.
-    std::string ErrorStr;
     if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, &ErrorStr))
         report_fatal_error(llvm::Twine("FATAL: unable to dlopen self\n") + ErrorStr);
 
+    GlobalJD.addGenerator(
+      std::make_unique<orc::DynamicLibrarySearchGenerator>(
+        libjulia_internal_dylib,
+        DL.getGlobalPrefix(),
+        orc::DynamicLibrarySearchGenerator::SymbolPredicate()));
+
     GlobalJD.addGenerator(
       cantFail(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
         DL.getGlobalPrefix())));
 
     // Resolve non-lock free atomic functions in the libatomic1 library.
     // This is the library that provides support for c11/c++11 atomic operations.
-    const char *const libatomic =
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-        "libatomic.so.1";
-#elif defined(_OS_WINDOWS_)
-        "libatomic-1.dll";
-#else
-        NULL;
-#endif
+    auto TT = getTargetTriple();
+    const char *const libatomic = TT.isOSLinux() || TT.isOSFreeBSD() ?
+        "libatomic.so.1" : TT.isOSWindows() ?
+        "libatomic-1.dll" : nullptr;
     if (libatomic) {
         static void *atomic_hdl = jl_load_dynamic_library(libatomic, JL_RTLD_LOCAL, 0);
         if (atomic_hdl != NULL) {
@@ -1313,6 +1386,7 @@ JuliaOJIT::JuliaOJIT()
 
     JD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
 
+#if JULIA_FLOAT16_ABI == 1
     orc::SymbolAliasMap jl_crt = {
         { mangle("__gnu_h2f_ieee"), { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
         { mangle("__extendhfsf2"),  { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
@@ -1321,6 +1395,7 @@ JuliaOJIT::JuliaOJIT()
         { mangle("__truncdfhf2"),   { mangle("julia__truncdfhf2"),   JITSymbolFlags::Exported } }
     };
     cantFail(GlobalJD.define(orc::symbolAliases(jl_crt)));
+#endif
 
 #ifdef MSAN_EMUTLS_WORKAROUND
     orc::SymbolMap msan_crt;
@@ -1343,6 +1418,11 @@ JuliaOJIT::JuliaOJIT()
         reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
     cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
 #endif
+#ifdef _COMPILER_ASAN_ENABLED_
+    orc::SymbolMap asan_crt;
+    asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
+    cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
+#endif
 }
 
 JuliaOJIT::~JuliaOJIT() = default;
@@ -1360,7 +1440,7 @@ void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
 
 void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
-    JL_TIMING(LLVM_MODULE_FINISH);
+    JL_TIMING(LLVM_ORC, LLVM_ORC);
     ++ModulesAdded;
     orc::SymbolLookupSet NewExports;
     TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
@@ -1406,8 +1486,8 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 
 JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
 {
-    orc::JITDylib* SearchOrders[2] = {&GlobalJD, &JD};
-    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[ExportedSymbolsOnly ? 0 : 1], ExportedSymbolsOnly ? 2 : 1);
+    orc::JITDylib* SearchOrders[2] = {&JD, &GlobalJD};
+    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 2 : 1);
     auto Sym = ES.lookup(SearchOrder, Name);
     if (Sym)
         return *Sym;
@@ -1470,24 +1550,21 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
 
 
 #ifdef JL_USE_JITLINK
-# if JL_LLVM_VERSION < 140000
-#  pragma message("JIT debugging (GDB integration) not available on LLVM < 14.0 (for JITLink)")
-void JuliaOJIT::enableJITDebuggingSupport() {}
-# else
 extern "C" orc::shared::CWrapperFunctionResult
 llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
 
 void JuliaOJIT::enableJITDebuggingSupport()
 {
-    // We do not use GDBJITDebugInfoRegistrationPlugin::Create, as the runtime name
-    // lookup is unnecessarily involved/fragile for our in-process JIT use case
-    // (with the llvm_orc_registerJITLoaderGDBAllocAction symbol being in either
-    // libjulia-codegen or yet another shared library for LLVM depending on the build
-    // flags, etc.).
-    const auto Addr = ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBAllocAction);
-    ObjectLayer.addPlugin(std::make_unique<orc::GDBJITDebugInfoRegistrationPlugin>(Addr));
+    orc::SymbolMap GDBFunctions;
+    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
+    if (TM->getTargetTriple().isOSBinFormatMachO())
+        ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
+    else if (TM->getTargetTriple().isOSBinFormatELF())
+        //EPCDebugObjectRegistrar doesn't take a JITDylib, so we have to directly provide the call address
+        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, orc::ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper))));
 }
-# endif
 #else
 void JuliaOJIT::enableJITDebuggingSupport()
 {
@@ -1533,6 +1610,16 @@ size_t JuliaOJIT::getTotalBytes() const
 }
 #endif
 
+void JuliaOJIT::printTimers()
+{
+#ifdef JL_USE_NEW_PM
+    for (auto &printer : PrintLLVMTimers) {
+        printer();
+    }
+#endif
+    reportAndResetTimings();
+}
+
 JuliaOJIT *jl_ExecutionEngine;
 
 // destructively move the contents of src into dest
@@ -1644,8 +1731,8 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
             NamedMDNode *sNMD = src.getNamedMetadata("llvm.dbg.cu");
             if (sNMD) {
                 NamedMDNode *dNMD = dest.getOrInsertNamedMetadata("llvm.dbg.cu");
-                for (NamedMDNode::op_iterator I = sNMD->op_begin(), E = sNMD->op_end(); I != E; ++I) {
-                    dNMD->addOperand(*I);
+                for (MDNode *I : sNMD->operands()) {
+                    dNMD->addOperand(I);
                 }
             }
         });
@@ -1713,11 +1800,12 @@ TargetIRAnalysis JuliaOJIT::getTargetIRAnalysis() const {
 }
 
 static void jl_decorate_module(Module &M) {
-#if defined(_CPU_X86_64_) && defined(_OS_WINDOWS_)
-    // Add special values used by debuginfo to build the UnwindData table registration for Win64
-    // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
-    // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
-    M.appendModuleInlineAsm("\
+    auto TT = Triple(M.getTargetTriple());
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+        // Add special values used by debuginfo to build the UnwindData table registration for Win64
+        // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
+        // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
+        M.appendModuleInlineAsm("\
     .section .text                  \n\
     .type   __UnwindData,@object    \n\
     .p2align        2, 0x90         \n\
@@ -1730,7 +1818,7 @@ static void jl_decorate_module(Module &M) {
     __catchjmp:                     \n\
         .zero   12                  \n\
         .size   __catchjmp, 12");
-#endif
+    }
 }
 
 // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
@@ -1812,7 +1900,7 @@ void add_named_global(StringRef name, void *addr)
     jl_ExecutionEngine->addGlobalMapping(name, (uint64_t)(uintptr_t)addr);
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
diff --git a/src/jitlayers.h b/src/jitlayers.h
index f62ee595a843b..c056a6b3418a3 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -44,10 +44,12 @@
 // for Mac/aarch64.
 // #define JL_FORCE_JITLINK
 
-#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK)
-# if JL_LLVM_VERSION < 130000
-#  pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults")
-# endif
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
+# define HAS_SANITIZER
+#endif
+// The sanitizers don't play well with our memory manager
+
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER)
 # define JL_USE_JITLINK
 #endif
 
@@ -73,7 +75,7 @@ GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M) JL_NOTSAFEPOINT;
 DataLayout jl_create_datalayout(TargetMachine &TM) JL_NOTSAFEPOINT;
 
 static inline bool imaging_default() JL_NOTSAFEPOINT {
-    return jl_options.image_codegen || (jl_generating_output() && jl_options.use_pkgimages);
+    return jl_options.image_codegen || (jl_generating_output() && (!jl_options.incremental || jl_options.use_pkgimages));
 }
 
 struct OptimizationOptions {
@@ -91,6 +93,12 @@ struct OptimizationOptions {
     }
 };
 
+// LLVM's new pass manager is scheduled to replace the legacy pass manager
+// for middle-end IR optimizations.
+#if JL_LLVM_VERSION >= 150000
+#define JL_USE_NEW_PM
+#endif
+
 struct NewPM {
     std::unique_ptr<TargetMachine> TM;
     StandardInstrumentations SI;
@@ -103,6 +111,8 @@ struct NewPM {
     ~NewPM() JL_NOTSAFEPOINT;
 
     void run(Module &M) JL_NOTSAFEPOINT;
+
+    void printTimers() JL_NOTSAFEPOINT;
 };
 
 struct AnalysisManagers {
@@ -163,7 +173,8 @@ typedef struct _jl_llvm_functions_t {
 } jl_llvm_functions_t;
 
 struct jl_returninfo_t {
-    llvm::Function *decl;
+    llvm::FunctionCallee decl;
+    llvm::AttributeList attrs;
     enum CallingConv {
         Boxed = 0,
         Register,
@@ -182,6 +193,8 @@ typedef std::tuple<jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool
 typedef struct _jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
     orc::ThreadSafeContext::Lock tsctx_lock;
+    DataLayout DL;
+    Triple TargetTriple;
 
     inline LLVMContext &getContext() {
         return *tsctx.getContext();
@@ -190,19 +203,18 @@ typedef struct _jl_codegen_params_t {
     // outputs
     std::vector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>> workqueue;
     std::map<void*, GlobalVariable*> globals;
-    std::map<std::tuple<jl_code_instance_t*,bool>, Function*> external_fns;
+    std::map<std::tuple<jl_code_instance_t*,bool>, GlobalVariable*> external_fns;
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
     DenseMap<Constant*, GlobalVariable*> mergedConstants;
     // Map from symbol name (in a certain library) to its GV in sysimg and the
     // DL handle address in the current session.
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
-#ifdef _OS_WINDOWS_
+    SymMapGV symMapDefault;
+    // These symMaps are Windows-only
     SymMapGV symMapExe;
     SymMapGV symMapDll;
     SymMapGV symMapDlli;
-#endif
-    SymMapGV symMapDefault;
     // Map from distinct callee's to its GOT entry.
     // In principle the attribute, function type and calling convention
     // don't need to be part of the key but it seems impossible to forward
@@ -213,14 +225,16 @@ typedef struct _jl_codegen_params_t {
         std::tuple<GlobalVariable*, FunctionType*, CallingConv::ID>,
         GlobalVariable*>> allPltMap;
     std::unique_ptr<Module> _shared_module;
-    inline Module &shared_module(Module &from);
+    inline Module &shared_module();
     // inputs
     size_t world = 0;
     const jl_cgparams_t *params = &jl_default_cgparams;
     bool cache = false;
     bool external_linkage = false;
     bool imaging;
-    _jl_codegen_params_t(orc::ThreadSafeContext ctx) : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()), imaging(imaging_default()) {}
+    _jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple)
+        : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()),
+            DL(std::move(DL)), TargetTriple(std::move(triple)), imaging(imaging_default()) {}
 } jl_codegen_params_t;
 
 jl_llvm_functions_t jl_emit_code(
@@ -417,7 +431,7 @@ class JuliaOJIT {
         std::unique_ptr<WNMutex> mutex;
     };
     struct PipelineT {
-        PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
+        PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers);
         CompileLayerT CompileLayer;
         OptimizeLayerT OptimizeLayer;
     };
@@ -487,6 +501,7 @@ class JuliaOJIT {
     TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;
 
     size_t getTotalBytes() const JL_NOTSAFEPOINT;
+    void printTimers() JL_NOTSAFEPOINT;
 
     jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
         return dump_emitted_mi_name_stream;
@@ -519,12 +534,14 @@ class JuliaOJIT {
     jl_locked_stream dump_compiles_stream;
     jl_locked_stream dump_llvm_opt_stream;
 
+    std::vector<std::function<void()>> PrintLLVMTimers;
+
     ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
 
 #ifndef JL_USE_JITLINK
     const std::shared_ptr<RTDyldMemoryManager> MemMgr;
 #else
-    std::atomic<size_t> total_size;
+    std::atomic<size_t> total_size{0};
     const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
 #endif
     ObjLayerT ObjectLayer;
@@ -539,14 +556,9 @@ inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafe
     return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), imaging_mode, DL, triple), ctx);
 }
 
-Module &jl_codegen_params_t::shared_module(Module &from) JL_NOTSAFEPOINT {
+Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {
     if (!_shared_module) {
-        _shared_module = jl_create_llvm_module("globals", getContext(), imaging, from.getDataLayout(), Triple(from.getTargetTriple()));
-        assert(&from.getContext() == tsctx.getContext() && "Module context differs from codegen_params context!");
-    } else {
-        assert(&from.getContext() == &getContext() && "Module context differs from shared module context!");
-        assert(from.getDataLayout() == _shared_module->getDataLayout() && "Module data layout differs from shared module data layout!");
-        assert(from.getTargetTriple() == _shared_module->getTargetTriple() && "Module target triple differs from shared module target triple!");
+        _shared_module = jl_create_llvm_module("globals", getContext(), imaging, DL, TargetTriple);
     }
     return *_shared_module;
 }
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index dd38560af1414..092a48be81930 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -3,7 +3,6 @@
 // Pointers that are exposed through the public libjulia
 #define JL_EXPORTED_DATA_POINTERS(XX) \
     XX(jl_abstractarray_type) \
-    XX(jl_abstractslot_type) \
     XX(jl_abstractstring_type) \
     XX(jl_an_empty_string) \
     XX(jl_an_empty_vec_any) \
@@ -104,7 +103,6 @@
     XX(jl_true) \
     XX(jl_tuple_typename) \
     XX(jl_tvar_type) \
-    XX(jl_typedslot_type) \
     XX(jl_typeerror_type) \
     XX(jl_typemap_entry_type) \
     XX(jl_typemap_level_type) \
@@ -128,13 +126,14 @@
     XX(jl_voidpointer_type) \
     XX(jl_void_type) \
     XX(jl_weakref_type) \
-    XX(jl_build_ids) \
-    XX(jl_linkage_blobs) \
 
 // Data symbols that are defined inside the public libjulia
 #define JL_EXPORTED_DATA_SYMBOLS(XX) \
     XX(jl_n_threadpools, int) \
     XX(jl_n_threads, _Atomic(int)) \
+    XX(jl_n_gcthreads, int) \
     XX(jl_options, jl_options_t) \
+    XX(jl_task_gcstack_offset, int) \
+    XX(jl_task_ptls_offset, int) \
 
 // end of file
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 1f182f37f938f..41692e9be680e 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -21,7 +21,6 @@
     XX(jl_apply_type1) \
     XX(jl_apply_type2) \
     XX(jl_argument_datatype) \
-    XX(jl_argument_method_table) \
     XX(jl_arraylen) \
     XX(jl_arrayref) \
     XX(jl_arrayset) \
@@ -120,7 +119,7 @@
     XX(jl_check_pkgimage_clones) \
     XX(jl_egal) \
     XX(jl_egal__bits) \
-    XX(jl_egal__special) \
+    XX(jl_egal__bitstag) \
     XX(jl_eh_restore_state) \
     XX(jl_enter_handler) \
     XX(jl_enter_threaded_region) \
@@ -172,8 +171,10 @@
     XX(jl_gc_external_obj_hdr_size) \
     XX(jl_gc_find_taggedvalue_pool) \
     XX(jl_gc_get_total_bytes) \
+    XX(jl_gc_get_max_memory) \
     XX(jl_gc_internal_obj_base_ptr) \
     XX(jl_gc_is_enabled) \
+    XX(jl_gc_is_in_finalizer) \
     XX(jl_gc_live_bytes) \
     XX(jl_gc_managed_malloc) \
     XX(jl_gc_managed_realloc) \
@@ -260,7 +261,7 @@
     XX(jl_infer_thunk) \
     XX(jl_init) \
     XX(jl_init_options) \
-    XX(jl_init_restored_modules) \
+    XX(jl_init_restored_module) \
     XX(jl_init_with_image) \
     XX(jl_init_with_image__threading) \
     XX(jl_init__threading) \
@@ -278,7 +279,6 @@
     XX(jl_ios_fd) \
     XX(jl_ios_get_nbyte_int) \
     XX(jl_ir_flag_inferred) \
-    XX(jl_ir_flag_pure) \
     XX(jl_ir_flag_has_fcall) \
     XX(jl_ir_flag_inlining) \
     XX(jl_ir_inlining_cost) \
@@ -412,7 +412,6 @@
     XX(jl_restore_system_image_data) \
     XX(jl_rethrow) \
     XX(jl_rethrow_other) \
-    XX(jl_rettype_inferred) \
     XX(jl_running_on_valgrind) \
     XX(jl_safe_printf) \
     XX(jl_SC_CLK_TCK) \
@@ -481,7 +480,6 @@
     XX(jl_try_substrtod) \
     XX(jl_try_substrtof) \
     XX(jl_tty_set_mode) \
-    XX(jl_tupletype_fill) \
     XX(jl_typeassert) \
     XX(jl_typeinf_lock_begin) \
     XX(jl_typeinf_lock_end) \
@@ -490,7 +488,6 @@
     XX(jl_typename_str) \
     XX(jl_typeof_str) \
     XX(jl_types_equal) \
-    XX(jl_type_equality_is_identity) \
     XX(jl_type_error) \
     XX(jl_type_error_rt) \
     XX(jl_type_intersection) \
@@ -551,6 +548,7 @@
     YY(jl_register_fptrs) \
     YY(jl_generate_fptr) \
     YY(jl_generate_fptr_for_unspecialized) \
+    YY(jl_generate_fptr_for_oc_wrapper) \
     YY(jl_compile_extern_c) \
     YY(jl_teardown_codegen) \
     YY(jl_jit_total_bytes) \
diff --git a/src/jl_uv.c b/src/jl_uv.c
index b34c3f51c6766..281dd798dbb36 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -112,7 +112,7 @@ void jl_init_uv(void)
 {
     uv_async_init(jl_io_loop, &signal_async, jl_signal_async_cb);
     uv_unref((uv_handle_t*)&signal_async);
-    JL_MUTEX_INIT(&jl_uv_mutex); // a file-scope initializer can be used instead
+    JL_MUTEX_INIT(&jl_uv_mutex, "jl_uv_mutex"); // a file-scope initializer can be used instead
 }
 
 _Atomic(int) jl_uv_n_waiters = 0;
diff --git a/src/jlapi.c b/src/jlapi.c
index 5c6f01ab86a88..001253fed71a8 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -15,6 +15,10 @@
 #include "julia_assert.h"
 #include "julia_internal.h"
 
+#ifdef USE_TRACY
+#include "tracy/TracyC.h"
+#endif
+
 #ifdef __cplusplus
 #include <cfenv>
 extern "C" {
@@ -473,6 +477,11 @@ JL_DLLEXPORT void (jl_cpu_pause)(void)
     jl_cpu_pause();
 }
 
+JL_DLLEXPORT void (jl_cpu_suspend)(void)
+{
+    jl_cpu_suspend();
+}
+
 JL_DLLEXPORT void (jl_cpu_wake)(void)
 {
     jl_cpu_wake();
@@ -680,6 +689,14 @@ static void rr_detach_teleport(void) {
 
 JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
 {
+#ifdef USE_TRACY
+    // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and
+    //            JULIA_TIMING_METADATA_PRINT_LIMIT=20
+    jl_timing_apply_env();
+    if (getenv("JULIA_WAIT_FOR_TRACY"))
+        while (!TracyCIsConnected) jl_cpu_pause(); // Wait for connection
+#endif
+
     // no-op on Windows, note that the caller must have already converted
     // from `wchar_t` to `UTF-8` already if we're running on Windows.
     uv_setup_args(argc, argv);
diff --git a/src/jloptions.c b/src/jloptions.c
index 90bb39955ee42..4c0b59f811643 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -40,6 +40,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         NULL, // cpu_target ("native", "core2", etc...)
                         0,    // nthreadpools
                         0,    // nthreads
+                        0,    // ngcthreads
                         NULL, // nthreads_per_pool
                         0,    // nprocs
                         NULL, // machine_file
@@ -128,6 +129,7 @@ static const char opts[]  =
     "                           interface if supported (Linux and Windows) or to the number of CPU\n"
     "                           threads if not supported (MacOS) or if process affinity is not\n"
     "                           configured, and sets M to 1.\n"
+    " --gcthreads=N             Use N threads for GC, set to half of the number of compute threads if unspecified.\n"
     " -p, --procs {N|auto}      Integer value N launches N additional local worker processes\n"
     "                           \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
     " --machine-file <file>     Run processes on hosts listed in <file>\n\n"
@@ -251,6 +253,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_strip_metadata,
            opt_strip_ir,
            opt_heap_size_hint,
+           opt_gc_threads,
     };
     static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
     static const struct option longopts[] = {
@@ -275,6 +278,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "cpu-target",      required_argument, 0, 'C' },
         { "procs",           required_argument, 0, 'p' },
         { "threads",         required_argument, 0, 't' },
+        { "gcthreads",       required_argument, 0, opt_gc_threads },
         { "machine-file",    required_argument, 0, opt_machine_file },
         { "project",         optional_argument, 0, opt_project },
         { "color",           required_argument, 0, opt_color },
@@ -351,11 +355,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                             c = o->val;
                             goto restart_switch;
                         }
-                        else if (o->val <= 0xff && strchr(shortopts, o->val)) {
-                            jl_errorf("option `-%c/--%s` is missing an argument", o->val, o->name);
-                        }
                         else {
-                            jl_errorf("option `--%s` is missing an argument", o->name);
+                            const char *problem = o->has_arg ? "is missing an argument" : "does not accept an argument";
+                            if (o->val <= 0xff && strchr(shortopts, o->val)) {
+                                jl_errorf("option `-%c/--%s` %s", o->val, o->name, problem);
+                            }
+                            else {
+                                jl_errorf("option `--%s` %s", o->name, problem);
+                            }
                         }
                     }
                 }
@@ -812,6 +819,13 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             if (jl_options.heap_size_hint == 0)
                 jl_errorf("julia: invalid argument to --heap-size-hint without memory size specified");
 
+            break;
+        case opt_gc_threads:
+            errno = 0;
+            long ngcthreads = strtol(optarg, &endptr, 10);
+            if (errno != 0 || optarg == endptr || *endptr != 0 || ngcthreads < 1 || ngcthreads >= INT16_MAX)
+                jl_errorf("julia: --gcthreads=<n>; n must be an integer >= 1");
+            jl_options.ngcthreads = (int16_t)ngcthreads;
             break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
diff --git a/src/jloptions.h b/src/jloptions.h
index d0aba777027e7..c44a8cfe05770 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -15,6 +15,7 @@ typedef struct {
     const char *cpu_target;
     int8_t nthreadpools;
     int16_t nthreads;
+    int16_t ngcthreads;
     const int16_t *nthreads_per_pool;
     int32_t nprocs;
     const char *machine_file;
diff --git a/src/jltypes.c b/src/jltypes.c
index d9f50d67d3f73..1a30df637a706 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -20,6 +20,7 @@ extern "C" {
 #endif
 
 _Atomic(jl_value_t*) cmpswap_names JL_GLOBALLY_ROOTED;
+jl_datatype_t *small_typeof[(jl_max_tags << 4) / sizeof(*small_typeof)]; // 16-bit aligned, like the GC
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
@@ -37,85 +38,119 @@ static int typeenv_has(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
     return 0;
 }
 
-static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
+static int typeenv_has_ne(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
 {
-    if (jl_typeis(v, jl_tvar_type))
-        return !typeenv_has(env, (jl_tvar_t*)v);
-    if (jl_is_uniontype(v))
-        return layout_uses_free_typevars(((jl_uniontype_t*)v)->a, env) ||
-               layout_uses_free_typevars(((jl_uniontype_t*)v)->b, env);
-    if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t*)v;
-        if (vm->T && layout_uses_free_typevars(vm->T, env))
-            return 1;
-        if (vm->N && layout_uses_free_typevars(vm->N, env))
-            return 1;
-        return 0;
-    }
-    if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        return layout_uses_free_typevars(ua->body, &newenv);
+    while (env != NULL) {
+        if (env->var == v)
+            return env->val != (jl_value_t*)v; // consider it actually not present if it is bound to itself unchanging
+        env = env->prev;
     }
-    if (jl_is_datatype(v)) {
-        jl_datatype_t *dt = (jl_datatype_t*)v;
-        if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
-            return 0;
-        if (dt->name == jl_namedtuple_typename)
-            return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
-        if (dt->name == jl_tuple_typename)
-            // conservative, since we don't want to inline an abstract tuple,
-            // and we currently declare !has_fixed_layout for these, but that
-            // means we also won't be able to inline a tuple which is concrete
-            // except for the use of free type-vars
-            return 1;
-        jl_svec_t *types = jl_get_fieldtypes(dt);
-        size_t i, l = jl_svec_len(types);
-        for (i = 0; i < l; i++) {
-            jl_value_t *ft = jl_svecref(types, i);
-            if (layout_uses_free_typevars(ft, env)) {
-                // This might be inline-alloc, but we don't know the layout
+    return 0;
+}
+
+
+static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
+{
+    while (1) {
+        if (jl_is_typevar(v))
+            return !typeenv_has(env, (jl_tvar_t*)v);
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = ua->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            jl_datatype_t *dt = (jl_datatype_t*)v;
+            if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
+                return 0;
+            if (dt->name == jl_namedtuple_typename)
+                return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
+            if (dt->name == jl_tuple_typename)
+                // conservative, since we don't want to inline an abstract tuple,
+                // and we currently declare !has_fixed_layout for these, but that
+                // means we also won't be able to inline a tuple which is concrete
+                // except for the use of free type-vars
                 return 1;
+            jl_svec_t *types = jl_get_fieldtypes(dt);
+            size_t i, l = jl_svec_len(types);
+            for (i = 0; i < l; i++) {
+                jl_value_t *ft = jl_svecref(types, i);
+                if (layout_uses_free_typevars(ft, env))
+                    // This might be inline-alloc, but we don't know the layout
+                    return 1;
             }
+            return 0;
+        }
+        else if (jl_is_uniontype(v)) {
+            if (layout_uses_free_typevars(((jl_uniontype_t*)v)->a, env))
+                return 1;
+           v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t*)v;
+            if (!vm->T)
+                return 0;
+            if (vm->N && layout_uses_free_typevars(vm->N, env))
+                return 1;
+            v = vm->T;
+        }
+        else {
+            return 0;
         }
     }
-    return 0;
 }
 
 static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
-    if (jl_typeis(v, jl_tvar_type)) {
-        return !typeenv_has(env, (jl_tvar_t*)v);
-    }
-    if (jl_is_uniontype(v))
-        return has_free_typevars(((jl_uniontype_t*)v)->a, env) ||
-            has_free_typevars(((jl_uniontype_t*)v)->b, env);
-    if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t*)v;
-        if (vm->T) {
-            if (has_free_typevars(vm->T, env))
-                return 1;
-            return vm->N && has_free_typevars(vm->N, env);
+    while (1) {
+        if (jl_is_typevar(v)) {
+            return !typeenv_has(env, (jl_tvar_t*)v);
         }
-    }
-    if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        return has_free_typevars(ua->var->lb, env) || has_free_typevars(ua->var->ub, env) ||
-            has_free_typevars(ua->body, &newenv);
-    }
-    if (jl_is_datatype(v)) {
-        int expect = ((jl_datatype_t*)v)->hasfreetypevars;
-        if (expect == 0 || env == NULL)
-            return expect;
-        size_t i;
-        for (i = 0; i < jl_nparams(v); i++) {
-            if (has_free_typevars(jl_tparam(v, i), env)) {
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var->lb != jl_bottom_type && has_free_typevars(ua->var->lb, env))
+                return 1;
+            if (ua->var->ub != (jl_value_t*)jl_any_type && has_free_typevars(ua->var->ub, env))
                 return 1;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = ua->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            int expect = ((jl_datatype_t*)v)->hasfreetypevars;
+            if (expect == 0 || env == NULL)
+                return expect;
+            size_t i;
+            for (i = 0; i < jl_nparams(v); i++) {
+                if (has_free_typevars(jl_tparam(v, i), env))
+                    return 1;
             }
+            return 0;
+        }
+        else if (jl_is_uniontype(v)) {
+            if (has_free_typevars(((jl_uniontype_t*)v)->a, env))
+                return 1;
+           v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t*)v;
+            if (!vm->T)
+                return 0;
+            if (vm->N && has_free_typevars(vm->N, env))
+                return 1;
+            v = vm->T;
+        }
+        else {
+            return 0;
         }
     }
-    return 0;
 }
 
 JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT
@@ -125,36 +160,48 @@ JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT
 
 static void find_free_typevars(jl_value_t *v, jl_typeenv_t *env, jl_array_t *out)
 {
-    if (jl_typeis(v, jl_tvar_type)) {
-        if (!typeenv_has(env, (jl_tvar_t*)v))
-            jl_array_ptr_1d_push(out, v);
-    }
-    else if (jl_is_uniontype(v)) {
-        find_free_typevars(((jl_uniontype_t*)v)->a, env, out);
-        find_free_typevars(((jl_uniontype_t*)v)->b, env, out);
-    }
-    else if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t *)v;
-        if (vm->T) {
-            find_free_typevars(vm->T, env, out);
-            if (vm->N) {
+    while (1) {
+        if (jl_is_typevar(v)) {
+            if (!typeenv_has(env, (jl_tvar_t*)v))
+                jl_array_ptr_1d_push(out, v);
+            return;
+        }
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var->lb != jl_bottom_type)
+                find_free_typevars(ua->var->lb, env, out);
+            if (ua->var->ub != (jl_value_t*)jl_any_type)
+                find_free_typevars(ua->var->ub, env, out);
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = ua->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            if (!((jl_datatype_t*)v)->hasfreetypevars)
+                return;
+            size_t i;
+            for (i = 0; i < jl_nparams(v); i++)
+                find_free_typevars(jl_tparam(v, i), env, out);
+            return;
+        }
+        else if (jl_is_uniontype(v)) {
+            find_free_typevars(((jl_uniontype_t*)v)->a, env, out);
+            v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t *)v;
+            if (!vm->T)
+                return;
+            if (vm->N) // this swap the visited order, but we don't mind it
                 find_free_typevars(vm->N, env, out);
-            }
+            v = vm->T;
         }
-    }
-    else if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        find_free_typevars(ua->var->lb, env, out);
-        find_free_typevars(ua->var->ub, env, out);
-        find_free_typevars(ua->body, &newenv, out);
-    }
-    else if (jl_is_datatype(v)) {
-        if (!((jl_datatype_t*)v)->hasfreetypevars)
+        else {
             return;
-        size_t i;
-        for (i=0; i < jl_nparams(v); i++)
-            find_free_typevars(jl_tparam(v,i), env, out);
+        }
     }
 }
 
@@ -170,41 +217,55 @@ JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v)
 // test whether a type has vars bound by the given environment
 static int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
-    if (jl_typeis(v, jl_tvar_type))
-        return typeenv_has(env, (jl_tvar_t*)v);
-    if (jl_is_uniontype(v))
-        return jl_has_bound_typevars(((jl_uniontype_t*)v)->a, env) ||
-            jl_has_bound_typevars(((jl_uniontype_t*)v)->b, env);
-    if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t *)v;
-        return vm->T && (jl_has_bound_typevars(vm->T, env) ||
-            (vm->N && jl_has_bound_typevars(vm->N, env)));
-    }
-    if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        if (jl_has_bound_typevars(ua->var->lb, env) || jl_has_bound_typevars(ua->var->ub, env))
-            return 1;
-        jl_typeenv_t *te = env;
-        while (te != NULL) {
-            if (te->var == ua->var)
-                break;
-            te = te->prev;
+    while (1) {
+        if (jl_is_typevar(v)) {
+            return typeenv_has_ne(env, (jl_tvar_t*)v);
         }
-        if (te) te->var = NULL;  // temporarily remove this var from env
-        int ans = jl_has_bound_typevars(ua->body, env);
-        if (te) te->var = ua->var;
-        return ans;
-    }
-    if (jl_is_datatype(v)) {
-        if (!((jl_datatype_t*)v)->hasfreetypevars)
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var->lb != jl_bottom_type && jl_has_bound_typevars(ua->var->lb, env))
+                return 1;
+            if (ua->var->ub != (jl_value_t*)jl_any_type && jl_has_bound_typevars(ua->var->ub, env))
+                return 1;
+            // Temporarily remove this var from env if necessary
+            // Note that te might be bound more than once in the env, so
+            // we remove it by setting it to itself in a new env.
+            if (typeenv_has_ne(env, ua->var)) {
+                jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+                newenv->var = ua->var;
+                newenv->val = (jl_value_t*)ua->var;
+                newenv->prev = env;
+                env = newenv;
+            }
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            if (!((jl_datatype_t*)v)->hasfreetypevars)
+                return 0;
+            size_t i;
+            for (i = 0; i < jl_nparams(v); i++) {
+                if (jl_has_bound_typevars(jl_tparam(v, i), env))
+                    return 1;
+            }
             return 0;
-        size_t i;
-        for (i=0; i < jl_nparams(v); i++) {
-            if (jl_has_bound_typevars(jl_tparam(v,i), env))
+        }
+        else if (jl_is_uniontype(v)) {
+            if (jl_has_bound_typevars(((jl_uniontype_t*)v)->a, env))
+                return 1;
+           v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t *)v;
+            if (!vm->T)
+                return 0;
+            if (vm->N && jl_has_bound_typevars(vm->N, env))
                 return 1;
+            v = vm->T;
+        }
+        else {
+            return 0;
         }
     }
-    return 0;
 }
 
 JL_DLLEXPORT int jl_has_typevar(jl_value_t *t, jl_tvar_t *v) JL_NOTSAFEPOINT
@@ -281,28 +342,15 @@ JL_DLLEXPORT int jl_get_size(jl_value_t *val, size_t *pnt)
 
 // --- type union ---
 
-static int count_union_components(jl_value_t **types, size_t n)
-{
-    size_t i, c=0;
-    for(i=0; i < n; i++) {
-        jl_value_t *e = types[i];
-        if (jl_is_uniontype(e)) {
-            jl_uniontype_t *u = (jl_uniontype_t*)e;
-            c += count_union_components(&u->a, 1);
-            c += count_union_components(&u->b, 1);
-        }
-        else {
-            c++;
-        }
-    }
-    return c;
-}
-
 int jl_count_union_components(jl_value_t *v)
 {
-    if (!jl_is_uniontype(v)) return 1;
-    jl_uniontype_t *u = (jl_uniontype_t*)v;
-    return jl_count_union_components(u->a) + jl_count_union_components(u->b);
+    size_t c = 0;
+    while (jl_is_uniontype(v)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)v;
+        c += jl_count_union_components(u->a);
+        v = u->b;
+    }
+    return c + 1;
 }
 
 // Return the `*pi`th element of a nested type union, according to a
@@ -310,16 +358,16 @@ int jl_count_union_components(jl_value_t *v)
 // considered an "element". `*pi` is destroyed in the process.
 static jl_value_t *nth_union_component(jl_value_t *v, int *pi) JL_NOTSAFEPOINT
 {
-    if (!jl_is_uniontype(v)) {
-        if (*pi == 0)
-            return v;
-        (*pi)--;
-        return NULL;
+    while (jl_is_uniontype(v)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)v;
+        jl_value_t *a = nth_union_component(u->a, pi);
+        if (a) return a;
+        v = u->b;
     }
-    jl_uniontype_t *u = (jl_uniontype_t*)v;
-    jl_value_t *a = nth_union_component(u->a, pi);
-    if (a) return a;
-    return nth_union_component(u->b, pi);
+    if (*pi == 0)
+        return v;
+    (*pi)--;
+    return NULL;
 }
 
 jl_value_t *jl_nth_union_component(jl_value_t *v, int i) JL_NOTSAFEPOINT
@@ -330,12 +378,11 @@ jl_value_t *jl_nth_union_component(jl_value_t *v, int i) JL_NOTSAFEPOINT
 // inverse of jl_nth_union_component
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT
 {
-    if (jl_is_uniontype(haystack)) {
-        if (jl_find_union_component(((jl_uniontype_t*)haystack)->a, needle, nth))
+    while (jl_is_uniontype(haystack)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)haystack;
+        if (jl_find_union_component(u->a, needle, nth))
             return 1;
-        if (jl_find_union_component(((jl_uniontype_t*)haystack)->b, needle, nth))
-            return 1;
-        return 0;
+        haystack = u->b;
     }
     if (needle == haystack)
         return 1;
@@ -343,23 +390,6 @@ int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *
     return 0;
 }
 
-static void flatten_type_union(jl_value_t **types, size_t n, jl_value_t **out, size_t *idx) JL_NOTSAFEPOINT
-{
-    size_t i;
-    for(i=0; i < n; i++) {
-        jl_value_t *e = types[i];
-        if (jl_is_uniontype(e)) {
-            jl_uniontype_t *u = (jl_uniontype_t*)e;
-            flatten_type_union(&u->a, 1, out, idx);
-            flatten_type_union(&u->b, 1, out, idx);
-        }
-        else {
-            out[*idx] = e;
-            (*idx)++;
-        }
-    }
-}
-
 STATIC_INLINE const char *datatype_module_name(jl_value_t *t) JL_NOTSAFEPOINT
 {
     if (((jl_datatype_t*)t)->name->module == NULL)
@@ -456,6 +486,53 @@ static int union_sort_cmp(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT
     }
 }
 
+static int count_union_components(jl_value_t **types, size_t n, int widen)
+{
+    size_t i, c = 0;
+    for (i = 0; i < n; i++) {
+        jl_value_t *e = types[i];
+        while (jl_is_uniontype(e)) {
+            jl_uniontype_t *u = (jl_uniontype_t*)e;
+            c += count_union_components(&u->a, 1, widen);
+            e = u->b;
+        }
+        if (widen && jl_is_unionall(e) && jl_is_uniontype(jl_unwrap_unionall(e))) {
+            jl_uniontype_t *u = (jl_uniontype_t*)jl_unwrap_unionall(e);
+            c += count_union_components(&u->a, 2, widen);
+        }
+        else {
+            c++;
+        }
+    }
+    return c;
+}
+
+static void flatten_type_union(jl_value_t **types, size_t n, jl_value_t **out, size_t *idx, int widen)
+{
+    size_t i;
+    for (i = 0; i < n; i++) {
+        jl_value_t *e = types[i];
+        while (jl_is_uniontype(e)) {
+            jl_uniontype_t *u = (jl_uniontype_t*)e;
+            flatten_type_union(&u->a, 1, out, idx, widen);
+            e = u->b;
+        }
+        if (widen && jl_is_unionall(e) && jl_is_uniontype(jl_unwrap_unionall(e))) {
+            // flatten this UnionAll into place by switching the union and unionall
+            jl_uniontype_t *u = (jl_uniontype_t*)jl_unwrap_unionall(e);
+            size_t old_idx = 0;
+            flatten_type_union(&u->a, 2, out, idx, widen);
+            for (; old_idx < *idx; old_idx++)
+                out[old_idx] = jl_rewrap_unionall(out[old_idx], e);
+        }
+        else {
+            out[*idx] = e;
+            (*idx)++;
+        }
+    }
+}
+
+
 static void isort_union(jl_value_t **a, size_t len) JL_NOTSAFEPOINT
 {
     size_t i, j;
@@ -484,11 +561,11 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
     if (n == 1)
         return ts[0];
 
-    size_t nt = count_union_components(ts, n);
+    size_t nt = count_union_components(ts, n, 1);
     jl_value_t **temp;
     JL_GC_PUSHARGS(temp, nt+1);
     size_t count = 0;
-    flatten_type_union(ts, n, temp, &count);
+    flatten_type_union(ts, n, temp, &count, 1);
     assert(count == nt);
     size_t j;
     for (i = 0; i < nt; i++) {
@@ -523,6 +600,231 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
     return tu;
 }
 
+// note: this is turned off as `Union` doesn't do such normalization.
+// static int simple_subtype(jl_value_t *a, jl_value_t *b)
+// {
+//     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
+//         return 1;
+//     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->lb))
+//         return 1;
+//     return 0;
+// }
+
+static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree)
+{
+    int subab = 0, subba = 0;
+    if (jl_egal(a, b)) {
+        subab = subba = 1;
+    }
+    else if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type) {
+        subab = 1;
+    }
+    else if (b == jl_bottom_type || a == (jl_value_t*)jl_any_type) {
+        subba = 1;
+    }
+    else if (hasfree) {
+        // subab = simple_subtype(a, b);
+        // subba = simple_subtype(b, a);
+    }
+    else if (jl_is_type_type(a) && jl_is_type_type(b) &&
+             jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b))) {
+        // issue #24521: don't merge Type{T} where typeof(T) varies
+    }
+    else if (jl_typeof(a) == jl_typeof(b) && jl_types_egal(a, b)) {
+        subab = subba = 1;
+    }
+    else {
+        subab = jl_subtype(a, b);
+        subba = jl_subtype(b, a);
+    }
+    return subab | (subba<<1);
+}
+
+jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
+{
+    size_t nta = count_union_components(&a, 1, 1);
+    size_t ntb = count_union_components(&b, 1, 1);
+    size_t nt = nta + ntb;
+    jl_value_t **temp;
+    JL_GC_PUSHARGS(temp, nt+1);
+    size_t count = 0;
+    flatten_type_union(&a, 1, temp, &count, 1);
+    flatten_type_union(&b, 1, temp, &count, 1);
+    assert(count == nt);
+    size_t i, j;
+    size_t ra = nta, rb = ntb;
+    // first remove cross-redundancy and check if `a >: b` or `a <: b`.
+    for (i = 0; i < nta; i++) {
+        if (temp[i] == NULL) continue;
+        int hasfree = jl_has_free_typevars(temp[i]);
+        for (j = nta; j < nt; j++) {
+            if (temp[j] == NULL) continue;
+            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int subab = subs & 1, subba = subs >> 1;
+            if (subab) {
+                temp[i] = NULL;
+                if (!subba) ra = 0;
+                count--;
+                break;
+            }
+            else if (subba) {
+                temp[j] = NULL;
+                rb = 0;
+                count--;
+            }
+        }
+    }
+    if (count == ra) {
+        JL_GC_POP();
+        return a;
+    }
+    if (count == rb) {
+        JL_GC_POP();
+        return b;
+    }
+    // then remove self-redundancy
+    for (i = 0; i < nt; i++) {
+        int has_free = temp[i] != NULL && jl_has_free_typevars(temp[i]);
+        size_t jmin = i < nta ? 0 : nta;
+        size_t jmax = i < nta ? nta : nt;
+        for (j = jmin; j < jmax; j++) {
+            if (j != i && temp[i] && temp[j]) {
+                if (temp[i] == jl_bottom_type ||
+                    temp[j] == (jl_value_t*)jl_any_type ||
+                    jl_egal(temp[i], temp[j]) ||
+                    (!has_free && !jl_has_free_typevars(temp[j]) &&
+                     // issue #24521: don't merge Type{T} where typeof(T) varies
+                     !(jl_is_type_type(temp[i]) && jl_is_type_type(temp[j]) && jl_typeof(jl_tparam0(temp[i])) != jl_typeof(jl_tparam0(temp[j]))) &&
+                     jl_subtype(temp[i], temp[j]))) {
+                    temp[i] = NULL;
+                }
+            }
+        }
+    }
+    isort_union(temp, nt);
+    temp[nt] = jl_bottom_type;
+    size_t k;
+    for (k = nt; k-- > 0; ) {
+        if (temp[k] != NULL) {
+            if (temp[nt] == jl_bottom_type)
+                temp[nt] = temp[k];
+            else
+                temp[nt] = jl_new_struct(jl_uniontype_type, temp[k], temp[nt]);
+        }
+    }
+    assert(temp[nt] != NULL);
+    jl_value_t *tu = temp[nt];
+    JL_GC_POP();
+    return tu;
+}
+
+int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity);
+
+jl_value_t *simple_intersect(jl_value_t *a, jl_value_t *b, int overesi)
+{
+    // Unlike `Union`, we don't unwrap `UnionAll` here to avoid possible widening.
+    size_t nta = count_union_components(&a, 1, 0);
+    size_t ntb = count_union_components(&b, 1, 0);
+    size_t nt = nta + ntb;
+    jl_value_t **temp;
+    JL_GC_PUSHARGS(temp, nt+1);
+    size_t count = 0;
+    flatten_type_union(&a, 1, temp, &count, 0);
+    flatten_type_union(&b, 1, temp, &count, 0);
+    assert(count == nt);
+    size_t i, j;
+    int8_t *stemp = (int8_t *)alloca(count);
+    // first remove disjoint elements.
+    memset(stemp, 0, count);
+    for (i = 0; i < nta; i++) {
+        int hasfree = jl_has_free_typevars(temp[i]);
+        for (j = nta; j < nt; j++) {
+            if (!stemp[i] || !stemp[j]) {
+                int intersect = !hasfree && !jl_has_free_typevars(temp[j]);
+                if (!(intersect ? jl_has_empty_intersection(temp[i], temp[j]) : obviously_disjoint(temp[i], temp[j], 0)))
+                    stemp[i] = stemp[j] = 1;
+            }
+        }
+    }
+    for (i = 0; i < nt; i++) {
+        temp[i] = stemp[i] ? temp[i] : NULL;
+    }
+    // then check subtyping.
+    // stemp[k] == -1 : ∃i temp[k] >:ₛ temp[i]
+    // stemp[k] == 1 : ∃i temp[k] == temp[i]
+    // stemp[k] == 2 : ∃i temp[k] <:ₛ temp[i]
+    memset(stemp, 0, count);
+    int all_disjoint = 1, subs[2] = {1, 1}, rs[2] = {1, 1};
+    for (i = 0; i < nta; i++) {
+        if (temp[i] == NULL) continue;
+        all_disjoint = 0;
+        int hasfree = jl_has_free_typevars(temp[i]);
+        for (j = nta; j < nt; j++) {
+            if (temp[j] == NULL) continue;
+            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int subab = subs & 1, subba = subs >> 1;
+            if (subba && !subab) {
+                stemp[i] = -1;
+                if (stemp[j] >= 0) stemp[j] = 2;
+            }
+            else if (subab && !subba) {
+                stemp[j] = -1;
+                if (stemp[i] >= 0) stemp[i] = 2;
+            }
+            else if (subs) {
+                if (stemp[i] == 0) stemp[i] = 1;
+                if (stemp[j] == 0) stemp[j] = 1;
+            }
+        }
+    }
+    if (!all_disjoint) {
+        for (i = 0; i < nt; i++) {
+            subs[i >= nta] &= (temp[i] == NULL || stemp[i] > 0);
+            rs[i >= nta] &= (temp[i] != NULL && stemp[i] > 0);
+        }
+        // return a(b) if a(b) <: b(a)
+        if (rs[0]) {
+            JL_GC_POP();
+            return a;
+        }
+        if (rs[1]) {
+            JL_GC_POP();
+            return b;
+        }
+    }
+    // return `Union{}` for `merge_env` if we can't prove `<:` or `>:`
+    if (all_disjoint || (!overesi && !subs[0] && !subs[1])) {
+        JL_GC_POP();
+        return jl_bottom_type;
+    }
+    nt = subs[0] ? nta : subs[1] ? nt  : nt;
+    i  = subs[0] ? 0   : subs[1] ? nta : 0;
+    count = nt - i;
+    if (!subs[0] && !subs[1]) {
+        // prepare for over estimation
+        // only preserve `a` with strict <:, but preserve `b` without strict >:
+        for (j = 0; j < nt; j++) {
+            if (stemp[j] < (j < nta ? 2 : 0))
+                temp[j] = NULL;
+        }
+    }
+    isort_union(&temp[i], count);
+    temp[nt] = jl_bottom_type;
+    size_t k;
+    for (k = nt; k-- > i; ) {
+        if (temp[k] != NULL) {
+            if (temp[nt] == jl_bottom_type)
+                temp[nt] = temp[k];
+            else
+                temp[nt] = jl_new_struct(jl_uniontype_type, temp[k], temp[nt]);
+        }
+    }
+    assert(temp[nt] != NULL);
+    jl_value_t *tu = temp[nt];
+    JL_GC_POP();
+    return tu;
+}
+
 // unionall types -------------------------------------------------------------
 
 JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
@@ -530,8 +832,8 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
     if (jl_is_vararg(body)) {
         if (jl_options.depwarn) {
             if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
-                jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).");
-            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\n");
+                jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.");
+            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.\n");
         }
         jl_vararg_t *vm = (jl_vararg_t*)body;
         int T_has_tv = vm->T && jl_has_typevar(vm->T, v);
@@ -581,8 +883,8 @@ static int typekey_eq(jl_datatype_t *tt, jl_value_t **key, size_t n)
     if (tt->name == jl_type_typename) {
         // for Type{T}, require `typeof(T)` to match also, to avoid incorrect
         // dispatch from changing the type of something.
-        // this should work because `Type`s don't have uids, and aren't the
-        // direct tags of values so we don't rely on pointer equality.
+        // this should work because `Type`s don't need unique pointers, and aren't the
+        // direct tags of values (concrete) so we don't rely on pointer equality.
         jl_value_t *kj = key[0];
         jl_value_t *tj = jl_tparam0(tt);
         return (kj == tj || (jl_typeof(tj) == jl_typeof(kj) && jl_types_equal(tj, kj)));
@@ -591,11 +893,14 @@ static int typekey_eq(jl_datatype_t *tt, jl_value_t **key, size_t n)
         jl_value_t *kj = key[j];
         jl_value_t *tj = jl_svecref(tt->parameters, j);
         if (tj != kj) {
-            // require exact same Type{T}. see e.g. issue #22842
-            if (jl_is_type_type(tj) || jl_is_type_type(kj))
-                return 0;
-            if ((jl_is_concrete_type(tj) || jl_is_concrete_type(kj)) &&
-                jl_type_equality_is_identity(tj, kj))
+            if (tt->name == jl_tuple_typename) {
+                // require exact same Type{T} in covariant context. see e.g. issue #22842
+                // this should work because `Tuple{Type}`s don't need unique pointers, and aren't the
+                // direct tags of values (concrete) so we don't rely on pointer equality.
+                if (jl_is_type_type(tj) || jl_is_type_type(kj))
+                    return 0;
+            }
+            if (jl_type_equality_is_identity(tj, kj))
                 return 0;
             if (!jl_types_equal(tj, kj))
                 return 0;
@@ -729,7 +1034,7 @@ static ssize_t lookup_type_idx_linearvalue(jl_svec_t *cache, jl_value_t *key1, j
 
 static jl_value_t *lookup_type(jl_typename_t *tn JL_PROPAGATES_ROOT, jl_value_t **key, size_t n)
 {
-    JL_TIMING(TYPE_CACHE_LOOKUP);
+    JL_TIMING(TYPE_CACHE_LOOKUP, TYPE_CACHE_LOOKUP);
     if (tn == jl_type_typename) {
         assert(n == 1);
         jl_value_t *uw = jl_unwrap_unionall(key[0]);
@@ -750,7 +1055,7 @@ static jl_value_t *lookup_type(jl_typename_t *tn JL_PROPAGATES_ROOT, jl_value_t
 
 static jl_value_t *lookup_typevalue(jl_typename_t *tn, jl_value_t *key1, jl_value_t **key, size_t n, int leaf)
 {
-    JL_TIMING(TYPE_CACHE_LOOKUP);
+    JL_TIMING(TYPE_CACHE_LOOKUP, TYPE_CACHE_LOOKUP);
     unsigned hv = typekeyvalue_hash(tn, key1, key, n, leaf);
     if (hv) {
         jl_svec_t *cache = jl_atomic_load_relaxed(&tn->cache);
@@ -871,7 +1176,7 @@ static int is_cacheable(jl_datatype_t *type)
 
 void jl_cache_type_(jl_datatype_t *type)
 {
-    JL_TIMING(TYPE_CACHE_INSERT);
+    JL_TIMING(TYPE_CACHE_INSERT, TYPE_CACHE_INSERT);
     assert(is_cacheable(type));
     jl_value_t **key = jl_svec_data(type->parameters);
     int n = jl_svec_len(type->parameters);
@@ -905,16 +1210,88 @@ jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type)
     return (jl_datatype_t*)lookup_type(type->name, key, n);
 }
 
-JL_DLLEXPORT int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2)
+// compute whether kj might actually be a subtype of something in the cache
+// (which otherwise would normally be comparable with pointer-egal)
+static int maybe_subtype_of_cache(jl_value_t *kj, int covariant) JL_NOTSAFEPOINT
 {
-    if (t1 == t2)
+    jl_value_t *uw = jl_is_unionall(kj) ? jl_unwrap_unionall(kj) : kj;
+    if (jl_is_datatype(uw)) {
+        jl_datatype_t *dt = (jl_datatype_t*)uw;
+        return dt->maybe_subtype_of_cache;
+    }
+    else if (jl_is_uniontype(uw)) {
+        int ca = maybe_subtype_of_cache(((jl_uniontype_t*)uw)->a, covariant);
+        int cb = maybe_subtype_of_cache(((jl_uniontype_t*)uw)->b, covariant);
+        return ca && cb;
+    }
+    else if (uw == jl_bottom_type) {
         return 1;
-    if (!jl_is_datatype(t1) || !jl_is_datatype(t2))
-        return 0;
-    jl_datatype_t *dt1 = (jl_datatype_t *) t1;
-    jl_datatype_t *dt2 = (jl_datatype_t *) t2;
+    }
+    else if (jl_is_typevar(uw) && !covariant) { // assume Tuple's bounds are always degenerate
+        // TODO: improve this bound if we can prove that typeintersect(lb,ub) is a leaftype
+        jl_tvar_t *tv = (jl_tvar_t*)uw;
+        return tv->lb == tv->ub ||
+               tv->lb != jl_bottom_type;
+    }
+    return 1;
+}
 
-    return dt1->cached_by_hash == dt2->cached_by_hash;
+// compute whether kj might have a supertype which is actually concrete
+static int has_concrete_supertype(jl_value_t *kj) JL_NOTSAFEPOINT
+{
+    jl_value_t *uw = jl_is_unionall(kj) ? jl_unwrap_unionall(kj) : kj;
+    if (jl_is_datatype(uw)) {
+        jl_datatype_t *dt = (jl_datatype_t*)uw;
+        if (dt->name->abstract && dt->name != jl_type_typename)
+            return 0;
+        if (!dt->maybe_subtype_of_cache)
+            return 0;
+        if (dt->name == jl_tuple_typename) {
+            // check tuple parameters recursively for has_concrete_supertype
+            size_t i, n = jl_nparams(dt);
+            for (i = 0; i < n; i++) {
+                jl_value_t *p = jl_tparam(dt, i);
+                if (jl_is_vararg(p))
+                    p = jl_unwrap_vararg(p);
+                if (!has_concrete_supertype(p))
+                    return 0;
+            }
+        }
+        return 1;
+    }
+    else if (jl_is_uniontype(uw)) {
+        int ca = has_concrete_supertype(((jl_uniontype_t*)uw)->a);
+        int cb = has_concrete_supertype(((jl_uniontype_t*)uw)->b);
+        return ca && cb;
+    }
+    else if (uw == jl_bottom_type) {
+        return 1;
+    }
+    else if (jl_is_typevar(uw)) {
+        jl_tvar_t *tv = (jl_tvar_t*)uw;
+        return has_concrete_supertype(tv->ub);
+    }
+    return 0;
+}
+
+int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT
+{
+    int c1 = jl_is_concrete_type(t1);
+    int c2 = jl_is_concrete_type(t2);
+    if (c1 && c2) {
+        if (((jl_datatype_t*)t1)->name != jl_tuple_typename)
+            return 1;
+        if (((jl_datatype_t*)t2)->name != jl_tuple_typename)
+            return 1;
+        if (((jl_datatype_t*)t1)->has_concrete_subtype && ((jl_datatype_t*)t2)->has_concrete_subtype)
+            return 1;
+        // e.g. Tuple{Union{}} and Tuple{Int} are both concrete!
+    }
+    if (c1 && !has_concrete_supertype(t2))
+        return 1;
+    if (c2 && !has_concrete_supertype(t1))
+        return 1;
+    return 0;
 }
 
 // type instantiation
@@ -940,7 +1317,7 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env);
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
@@ -948,7 +1325,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
                                      jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
@@ -958,7 +1335,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
 jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
 {
     if (tc == (jl_value_t*)jl_anytuple_type)
-        return (jl_value_t*)jl_apply_tuple_type_v(params, n);
+        return jl_apply_tuple_type_v(params, n);
     if (tc == (jl_value_t*)jl_uniontype_type)
         return (jl_value_t*)jl_type_union(params, n);
     size_t i;
@@ -1047,20 +1424,20 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
     }
     params[0] = dt;
     params[1] = (jl_value_t*)jl_bool_type;
-    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
     JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
     jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
     return rettyp;
 }
 
-JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
+// used to expand an NTuple to a flat representation
+static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
 {
-    // TODO: replace with just using NTuple
     jl_value_t *p = NULL;
     JL_GC_PUSH1(&p);
     p = (jl_value_t*)jl_svec_fill(n, v);
-    p = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)p);
+    p = jl_apply_tuple_type((jl_svec_t*)p);
     JL_GC_POP();
     return p;
 }
@@ -1079,8 +1456,22 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
     return inst_type_w_(u->body, &env, NULL, 1);
 }
 
+jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
+{
+    jl_tvar_t *v = jl_new_typevar(u->var->name, u->var->lb, u->var->ub);
+    jl_value_t *t = NULL;
+    JL_GC_PUSH2(&v, &t);
+    jl_typeenv_t env = { u->var, (jl_value_t *)v, NULL };
+    t = inst_type_w_(u->body, &env, NULL, 0);
+    t = jl_new_struct(jl_unionall_type, v, t);
+    JL_GC_POP();
+    return (jl_unionall_t*)t;
+}
+
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val)
 {
+    if (val == (jl_value_t*)var)
+        return t;
     jl_typeenv_t env = { var, val, NULL };
     return inst_type_w_(t, &env, NULL, 1);
 }
@@ -1147,7 +1538,7 @@ static jl_value_t *lookup_type_stack(jl_typestack_t *stack, jl_datatype_t *tt, s
 }
 
 // stable numbering for types--starts with name->hash, then falls back to objectid
-// sets failed if the hash value isn't stable (if not set on entry)
+// sets *failed if the hash value isn't stable (if this param not set on entry)
 static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
 {
     jl_value_t *uw = jl_is_unionall(kj) ? jl_unwrap_unionall(kj) : kj;
@@ -1159,32 +1550,21 @@ static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
                 *failed = 1;
                 return 0;
             }
+            // compute a hash now, only for the parent object we are putting in the cache
             hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), jl_svec_len(dt->parameters), *failed);
         }
         return hash;
     }
     else if (jl_is_typevar(uw)) {
-        if (!*failed) {
-            *failed = 1;
-            return 0;
-        }
         // ignore var and lb, since those might get normalized out in equality testing
         return type_hash(((jl_tvar_t*)uw)->ub, failed);
     }
-    else if (jl_is_vararg(uw)) {
-        if (!*failed) {
-            *failed = 1;
-            return 0;
-        }
-        jl_vararg_t *vm = (jl_vararg_t *)uw;
-        // 0x064eeaab is just a randomly chosen constant
-        return bitmix(type_hash(vm->T ? vm->T : (jl_value_t*)jl_any_type, failed), vm->N ? type_hash(vm->N, failed) : 0x064eeaab);
-    }
     else if (jl_is_uniontype(uw)) {
         if (!*failed) {
             *failed = 1;
             return 0;
         }
+        // compute a hash now, only for the parent object we are putting in the cache
         unsigned hasha = type_hash(((jl_uniontype_t*)uw)->a, failed);
         unsigned hashb = type_hash(((jl_uniontype_t*)uw)->b, failed);
         // use a associative mixing function, with well-defined overflow
@@ -1196,6 +1576,14 @@ static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
     }
 }
 
+JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    // NOTE: The value of `failed` is purposefully ignored here. The parameter is relevant
+    // for other parts of the internal algorithm but not for exposing to the Julia side.
+    int failed = 0;
+    return type_hash(v, &failed);
+}
+
 static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int nofail) JL_NOTSAFEPOINT
 {
     if (tn == jl_type_typename && key[0] == jl_bottom_type)
@@ -1204,7 +1592,18 @@ static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int
     unsigned hash = 3;
     int failed = nofail;
     for (j = 0; j < n; j++) {
-        hash = bitmix(type_hash(key[j], &failed), hash);
+        jl_value_t *p = key[j];
+        if (jl_is_vararg(p)) {
+            jl_vararg_t *vm = (jl_vararg_t*)p;
+            if (!nofail && vm->N)
+                return 0;
+            // 0x064eeaab is just a randomly chosen constant
+            hash = bitmix(vm->N ? type_hash(vm->N, &failed) : 0x064eeaab, hash);
+            if (failed && !nofail)
+                return 0;
+            p = vm->T ? vm->T : (jl_value_t*)jl_any_type;
+        }
+        hash = bitmix(type_hash(p, &failed), hash);
         if (failed && !nofail)
             return 0;
     }
@@ -1237,6 +1636,7 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
 {
     int istuple = (dt->name == jl_tuple_typename);
     dt->hasfreetypevars = 0;
+    dt->maybe_subtype_of_cache = 1;
     dt->isconcretetype = !dt->name->abstract;
     dt->isdispatchtuple = istuple;
     size_t i, l = jl_nparams(dt);
@@ -1247,30 +1647,38 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
             if (dt->hasfreetypevars)
                 dt->isconcretetype = 0;
         }
-        if (istuple && dt->isconcretetype)
-            dt->isconcretetype = (jl_is_datatype(p) && ((jl_datatype_t*)p)->isconcretetype) || p == jl_bottom_type;
-        if (dt->isdispatchtuple) {
-            dt->isdispatchtuple = jl_is_datatype(p) &&
-                ((!jl_is_kind(p) && ((jl_datatype_t*)p)->isconcretetype) ||
-                 (p == (jl_value_t*)jl_typeofbottom_type) || // == Type{Union{}}, so needs to be consistent
-                 (((jl_datatype_t*)p)->name == jl_type_typename && !((jl_datatype_t*)p)->hasfreetypevars));
+        if (istuple) {
+            if (dt->isconcretetype)
+                dt->isconcretetype = (jl_is_datatype(p) && ((jl_datatype_t*)p)->isconcretetype) || p == jl_bottom_type;
+            if (dt->isdispatchtuple) {
+                dt->isdispatchtuple = jl_is_datatype(p) &&
+                    ((!jl_is_kind(p) && ((jl_datatype_t*)p)->isconcretetype) ||
+                     (p == (jl_value_t*)jl_typeofbottom_type) || // == Type{Union{}}, so needs to be consistent
+                     (((jl_datatype_t*)p)->name == jl_type_typename && !((jl_datatype_t*)p)->hasfreetypevars));
+            }
         }
+        if (jl_is_vararg(p))
+            p = ((jl_vararg_t*)p)->T;
         if (istuple && dt->has_concrete_subtype) {
-            if (jl_is_vararg(p))
-                p = ((jl_vararg_t*)p)->T;
-            // tuple types like Tuple{:x} cannot have instances
+            // tuple types like Tuple{:x} and Tuple{Union{}} cannot have instances
             if (p && !jl_is_type(p) && !jl_is_typevar(p))
                 dt->has_concrete_subtype = 0;
+            if (p == jl_bottom_type)
+                dt->has_concrete_subtype = 0;
+        }
+        if (dt->maybe_subtype_of_cache) {
+            dt->maybe_subtype_of_cache = !p || maybe_subtype_of_cache(p, istuple) || !jl_has_free_typevars(p);
         }
     }
+    assert(dt->isconcretetype || dt->isdispatchtuple ? dt->maybe_subtype_of_cache : 1);
     if (dt->name == jl_type_typename) {
-        cacheable = 0; // the cache for Type ignores parameter normalization, so it can't be used as a regular hash
+        cacheable = 0; // n.b. the cache for Type ignores parameter normalization, so it can't be used to make a stable hash value
         jl_value_t *p = jl_tparam(dt, 0);
         if (!jl_is_type(p) && !jl_is_typevar(p)) // Type{v} has no subtypes, if v is not a Type
             dt->has_concrete_subtype = 0;
+        dt->maybe_subtype_of_cache = 1;
     }
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
-    dt->cached_by_hash = cacheable ? (typekey_hash(dt->name, jl_svec_data(dt->parameters), l, 0) != 0) : (dt->hash != 0);
 }
 
 static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np)
@@ -1308,7 +1716,7 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
     JL_GC_POP();
 }
 
-static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
+jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
 {
     t = jl_unwrap_unionall(t);
     if (jl_is_datatype(t))
@@ -1325,45 +1733,54 @@ static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY
 
 int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
 {
-    if (v == (jl_value_t*)var) {
-        if (inside_inv) {
-            return 0;
+    while (1) {
+        if (v == (jl_value_t*)var) {
+            if (inside_inv) {
+                return 0;
+            }
+            else {
+                (*cov_count)++;
+                return *cov_count <= 1 || jl_is_concrete_type(var->ub);
+            }
         }
-        else {
-            (*cov_count)++;
-            return *cov_count <= 1 || jl_is_concrete_type(var->ub);
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var == var)
+                return 1;
+            if (ua->var->lb != jl_bottom_type && !_may_substitute_ub(ua->var->lb, var, inside_inv, cov_count))
+                return 0;
+            if (ua->var->ub != (jl_value_t*)jl_any_type && !_may_substitute_ub(ua->var->ub, var, inside_inv, cov_count))
+                return 0;
+            v = ua->body;
         }
-    }
-    else if (jl_is_uniontype(v)) {
-        return _may_substitute_ub(((jl_uniontype_t*)v)->a, var, inside_inv, cov_count) &&
-            _may_substitute_ub(((jl_uniontype_t*)v)->b, var, inside_inv, cov_count);
-    }
-    else if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        if (ua->var == var)
+        if (jl_is_datatype(v)) {
+            int invar = inside_inv || !jl_is_tuple_type(v);
+            for (size_t i = 0; i < jl_nparams(v); i++) {
+                if (!_may_substitute_ub(jl_tparam(v, i), var, invar, cov_count))
+                    return 0;
+            }
             return 1;
-        return _may_substitute_ub(ua->var->lb, var, inside_inv, cov_count) &&
-            _may_substitute_ub(ua->var->ub, var, inside_inv, cov_count) &&
-            _may_substitute_ub(ua->body, var, inside_inv, cov_count);
-    }
-    else if (jl_is_datatype(v)) {
-        int invar = inside_inv || !jl_is_tuple_type(v);
-        for (size_t i = 0; i < jl_nparams(v); i++) {
-            if (!_may_substitute_ub(jl_tparam(v,i), var, invar, cov_count))
+        }
+        else if (jl_is_uniontype(v)) {
+            // TODO: is !inside_inv, these don't have to share the changes to cov_count
+            if (!_may_substitute_ub(((jl_uniontype_t*)v)->a, var, inside_inv, cov_count))
                 return 0;
+            v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *va = (jl_vararg_t*)v;
+            if (!va->T)
+                return 1;
+            if (va->N && !_may_substitute_ub(va->N, var, 1, cov_count))
+                return 0;
+            if (!jl_is_concrete_type(var->ub))
+                inside_inv = 1; // treat as invariant inside vararg, for the sake of this algorithm
+            v = va->T;
+        }
+        else {
+            return 1;
         }
     }
-    else if (jl_is_vararg(v)) {
-        jl_vararg_t *va = (jl_vararg_t*)v;
-        int old_count = *cov_count;
-        if (va->T && !_may_substitute_ub(va->T, var, inside_inv, cov_count))
-            return 0;
-        if (*cov_count > old_count && !jl_is_concrete_type(var->ub))
-            return 0;
-        if (va->N && !_may_substitute_ub(va->N, var, 1, cov_count))
-            return 0;
-    }
-    return 1;
 }
 
 // Check whether `var` may be replaced with its upper bound `ub` in `v where var<:ub`
@@ -1379,7 +1796,6 @@ int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
 
 jl_value_t *normalize_unionalls(jl_value_t *t)
 {
-    JL_GC_PUSH1(&t);
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
         jl_value_t *a = NULL;
@@ -1395,14 +1811,14 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
     else if (jl_is_unionall(t)) {
         jl_unionall_t *u = (jl_unionall_t*)t;
         jl_value_t *body = normalize_unionalls(u->body);
+        JL_GC_PUSH1(&body);
         if (body != u->body) {
-            JL_GC_PUSH1(&body);
             t = jl_new_struct(jl_unionall_type, u->var, body);
-            JL_GC_POP();
             u = (jl_unionall_t*)t;
         }
 
         if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var)) {
+            body = (jl_value_t*)u;
             JL_TRY {
                 t = jl_instantiate_unionall(u, u->var->ub);
             }
@@ -1411,21 +1827,21 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
                 // (may happen for bounds inconsistent with the wrapper's bounds)
             }
         }
+        JL_GC_POP();
     }
-    JL_GC_POP();
     return t;
 }
 
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env)
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
     int istuple = (tn == jl_tuple_typename);
     int isnamedtuple = (tn == jl_namedtuple_typename);
-    if (tn != jl_type_typename) {
+    if (check && tn != jl_type_typename) {
         size_t i;
         for (i = 0; i < ntp; i++)
             iparams[i] = normalize_unionalls(iparams[i]);
@@ -1435,9 +1851,31 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     int cacheable = 1;
     if (istuple) {
         size_t i;
-        for (i = 0; cacheable && i < ntp; i++)
-            if (!jl_is_concrete_type(iparams[i]) && iparams[i] != jl_bottom_type)
+        for (i = 0; i < ntp; i++) {
+            jl_value_t *pi = iparams[i];
+            if (jl_is_vararg(pi) && jl_unwrap_vararg(pi) == jl_bottom_type) {
+                jl_value_t *va1 = jl_unwrap_vararg_num(pi);
+                if (va1 && jl_is_long(va1)) {
+                    ssize_t nt = jl_unbox_long(va1);
+                    if (nt == 0)
+                        va1 = NULL;
+                    else
+                        pi = jl_bottom_type; // trigger errorf below
+                }
+                // This imposes an implicit constraint that va1==0,
+                // so we keep the Vararg if it has a TypeVar
+                if (va1 == NULL) {
+                    p = NULL;
+                    ntp -= 1;
+                    assert(i == ntp);
+                    break;
+                }
+            }
+            if (pi == jl_bottom_type)
+                jl_errorf("Tuple field type cannot be Union{}");
+            if (cacheable && !jl_is_concrete_type(pi))
                 cacheable = 0;
+        }
     }
     else {
         size_t i;
@@ -1482,7 +1920,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     if (stack_lkup)
         return stack_lkup;
 
-    if (!istuple) {
+    if (check && !istuple) {
         // check parameters against bounds in type definition
         check_datatype_parameters(tn, iparams, ntp);
     }
@@ -1492,9 +1930,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
 
     jl_datatype_t *ndt = NULL;
-    jl_value_t *last = iparams[ntp - 1];
-    JL_GC_PUSH3(&p, &ndt, &last);
+    JL_GC_PUSH2(&p, &ndt);
 
+    jl_value_t *last = iparams[ntp - 1];
     if (istuple && ntp > 0 && jl_is_vararg(last)) {
         // normalize Tuple{..., Vararg{Int, 3}} to Tuple{..., Int, Int, Int}
         jl_value_t *va = jl_unwrap_unionall(last);
@@ -1519,7 +1957,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 l = ntp - 1 + nt;
                 for (; i < l; i++)
                     jl_svecset(p, i, va0);
-                jl_value_t *ndt = (jl_value_t*)jl_apply_tuple_type(p);
+                jl_value_t *ndt = jl_apply_tuple_type(p);
                 JL_GC_POP();
                 return ndt;
             }
@@ -1602,7 +2040,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         ndt->super = jl_any_type;
     }
     else if (dt->super) {
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, 1);
+        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, check);
         jl_gc_wb(ndt, ndt->super);
     }
     jl_svec_t *ftypes = dt->types;
@@ -1648,17 +2086,17 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     return (jl_value_t*)ndt;
 }
 
-static jl_tupletype_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
+static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
 {
-    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL);
+    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, 1);
 }
 
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params)
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params)
 {
     return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params);
 }
 
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np)
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np)
 {
     return jl_apply_tuple_type_v_(p, np, NULL);
 }
@@ -1689,7 +2127,7 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1);
         JL_GC_POP();
     }
     return tt;
@@ -1744,7 +2182,7 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
             ssize_t nt = jl_unbox_long(N);
             if (nt < 0)
                 jl_errorf("size or dimension is negative: %zd", nt);
-            return (jl_value_t*)jl_tupletype_fill(nt, T);
+            return jl_tupletype_fill(nt, T);
         }
     }
     jl_value_t **iparams;
@@ -1760,14 +2198,14 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
     int i;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, 0);
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
         iparams[i] = pi;
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
     }
     if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env);
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check);
     JL_GC_POP();
     return t;
 }
@@ -1820,8 +2258,14 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         JL_GC_PUSH2(&a, &b);
         b = inst_type_w_(u->b, env, stack, check);
         if (a != u->a || b != u->b) {
-            jl_value_t *uargs[2] = {a, b};
-            t = jl_type_union(uargs, 2);
+            if (check) {
+                jl_value_t *uargs[2] = {a, b};
+                t = jl_type_union(uargs, 2);
+            }
+            else {
+                // fast path for `jl_rename_unionall`.
+                t = jl_new_struct(jl_uniontype_type, a, b);
+            }
         }
         JL_GC_POP();
         return t;
@@ -1863,7 +2307,7 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
     if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env);
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check);
     JL_GC_POP();
     return t;
 }
@@ -1937,6 +2381,7 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n)
     }
     jl_task_t *ct = jl_current_task;
     jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+    jl_set_typetagof(vm, jl_vararg_tag, 0);
     vm->T = t;
     vm->N = n;
     return vm;
@@ -2026,19 +2471,36 @@ static jl_tvar_t *tvar(const char *name)
                           (jl_value_t*)jl_any_type);
 }
 
+void export_small_typeof(void)
+{
+    void *copy;
+#ifdef _OS_WINDOWS_
+    jl_dlsym(jl_libjulia_handle, "small_typeof", &copy, 1);
+#else
+    jl_dlsym(jl_libjulia_internal_handle, "small_typeof", &copy, 1);
+#endif
+    memcpy(copy, &small_typeof, sizeof(small_typeof));
+}
+
+#define XX(name) \
+    small_typeof[(jl_##name##_tag << 4) / sizeof(*small_typeof)] = jl_##name##_type; \
+    jl_##name##_type->smalltag = jl_##name##_tag;
 void jl_init_types(void) JL_GC_DISABLED
 {
     jl_module_t *core = NULL; // will need to be assigned later
 
     // create base objects
     jl_datatype_type = jl_new_uninitialized_datatype();
-    jl_set_typeof(jl_datatype_type, jl_datatype_type);
+    XX(datatype);
     jl_typename_type = jl_new_uninitialized_datatype();
     jl_symbol_type = jl_new_uninitialized_datatype();
+    XX(symbol);
     jl_simplevector_type = jl_new_uninitialized_datatype();
+    XX(simplevector);
     jl_methtable_type = jl_new_uninitialized_datatype();
 
     jl_emptysvec = (jl_svec_t*)jl_gc_permobj(sizeof(void*), jl_simplevector_type);
+    jl_set_typetagof(jl_emptysvec, jl_simplevector_tag, GC_OLD_MARKED);
     jl_svec_set_len_unsafe(jl_emptysvec, 0);
 
     jl_any_type = (jl_datatype_t*)jl_new_abstracttype((jl_value_t*)jl_symbol("Any"), core, NULL, jl_emptysvec);
@@ -2046,8 +2508,9 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_nonfunction_mt = jl_any_type->name->mt;
     jl_any_type->name->mt = NULL;
 
-    jl_type_type = (jl_unionall_t*)jl_new_abstracttype((jl_value_t*)jl_symbol("Type"), core, jl_any_type, jl_emptysvec);
-    jl_type_typename = ((jl_datatype_t*)jl_type_type)->name;
+    jl_datatype_t *type_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Type"), core, jl_any_type, jl_emptysvec);
+    jl_type_type = (jl_unionall_t*)type_type;
+    jl_type_typename = type_type->name;
     jl_type_type_mt = jl_new_method_table(jl_type_typename->name, core);
     jl_type_typename->mt = jl_type_type_mt;
 
@@ -2055,7 +2518,7 @@ void jl_init_types(void) JL_GC_DISABLED
     // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
     jl_datatype_type->name = jl_new_typename_in(jl_symbol("DataType"), core, 0, 1);
     jl_datatype_type->name->wrapper = (jl_value_t*)jl_datatype_type;
-    jl_datatype_type->super = (jl_datatype_t*)jl_type_type;
+    jl_datatype_type->super = type_type;
     jl_datatype_type->parameters = jl_emptysvec;
     jl_datatype_type->name->n_uninitialized = 8 - 3;
     jl_datatype_type->name->names = jl_perm_symsvec(8,
@@ -2066,7 +2529,7 @@ void jl_init_types(void) JL_GC_DISABLED
             "instance",
             "layout",
             "hash",
-            "flags"); // "hasfreetypevars", "isconcretetype", "isdispatchtuple", "isbitstype", "zeroinit", "has_concrete_subtype", "cached_by_hash"
+            "flags"); // "hasfreetypevars", "isconcretetype", "isdispatchtuple", "isbitstype", "zeroinit", "has_concrete_subtype", "maybe_subtype_of_cache"
     jl_datatype_type->types = jl_svec(8,
             jl_typename_type,
             jl_datatype_type,
@@ -2095,6 +2558,11 @@ void jl_init_types(void) JL_GC_DISABLED
                                                     "hash", "n_uninitialized",
                                                     "flags", // "abstract", "mutable", "mayinlinealloc",
                                                     "max_methods");
+    const static uint32_t typename_constfields[1] = { 0x00003a3f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13)
+    const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
+    jl_typename_type->name->constfields = typename_constfields;
+    jl_typename_type->name->atomicfields = typename_atomicfields;
+    jl_precompute_memoized_dt(jl_typename_type, 1);
     jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
                                       jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
                                       jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
@@ -2102,11 +2570,6 @@ void jl_init_types(void) JL_GC_DISABLED
                                       jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
                                       jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/);
-    const static uint32_t typename_constfields[1] = { 0x00003a3f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13)
-    const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
-    jl_typename_type->name->constfields = typename_constfields;
-    jl_typename_type->name->atomicfields = typename_atomicfields;
-    jl_precompute_memoized_dt(jl_typename_type, 1);
 
     jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
     jl_methtable_type->name->wrapper = (jl_value_t*)jl_methtable_type;
@@ -2118,16 +2581,16 @@ void jl_init_types(void) JL_GC_DISABLED
                                                      "leafcache", "cache", "max_args",
                                                      "module", "backedges",
                                                      "", "", "offs", "");
-    jl_methtable_type->types = jl_svec(11, jl_symbol_type, jl_any_type, jl_any_type,
-                                       jl_any_type, jl_any_type/*jl_long*/,
-                                       jl_any_type/*module*/, jl_any_type/*any vector*/,
-                                       jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
-                                       jl_any_type/*uint8*/, jl_any_type/*uint8*/);
     const static uint32_t methtable_constfields[1] = { 0x00000020 }; // (1<<5);
     const static uint32_t methtable_atomicfields[1] = { 0x0000001e }; // (1<<1)|(1<<2)|(1<<3)|(1<<4);
     jl_methtable_type->name->constfields = methtable_constfields;
     jl_methtable_type->name->atomicfields = methtable_atomicfields;
     jl_precompute_memoized_dt(jl_methtable_type, 1);
+    jl_methtable_type->types = jl_svec(11, jl_symbol_type, jl_any_type, jl_any_type,
+                                       jl_any_type, jl_any_type/*jl_long*/,
+                                       jl_any_type/*module*/, jl_any_type/*any vector*/,
+                                       jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
+                                       jl_any_type/*uint8*/, jl_any_type/*uint8*/);
 
     jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core, 0, 1);
     jl_symbol_type->name->wrapper = (jl_value_t*)jl_symbol_type;
@@ -2156,36 +2619,51 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_astaggedvalue(jl_nothing)->header = ((uintptr_t)jl_nothing_type) | GC_OLD_MARKED;
     jl_nothing_type->instance = jl_nothing;
 
-    jl_datatype_t *type_type = (jl_datatype_t*)jl_type_type;
-    jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
-                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
-    jl_bottom_type = jl_new_struct(jl_typeofbottom_type);
-    jl_typeofbottom_type->instance = jl_bottom_type;
-
-    jl_uniontype_type = jl_new_datatype(jl_symbol("Union"), core, type_type, jl_emptysvec,
-                                        jl_perm_symsvec(2, "a", "b"),
-                                        jl_svec(2, jl_any_type, jl_any_type),
-                                        jl_emptysvec, 0, 0, 2);
-    // It seems like we probably usually end up needing the box for kinds (used in an Any context), so force it to exist
-    jl_uniontype_type->name->mayinlinealloc = 0;
-
     jl_tvar_type = jl_new_datatype(jl_symbol("TypeVar"), core, jl_any_type, jl_emptysvec,
                                    jl_perm_symsvec(3, "name", "lb", "ub"),
                                    jl_svec(3, jl_symbol_type, jl_any_type, jl_any_type),
                                    jl_emptysvec, 0, 1, 3);
+    XX(tvar);
     const static uint32_t tvar_constfields[1] = { 0x00000007 }; // all fields are constant, even though TypeVar itself has identity
     jl_tvar_type->name->constfields = tvar_constfields;
 
+    jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
+                                           jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
+    XX(typeofbottom);
+    jl_bottom_type = jl_gc_permobj(0, jl_typeofbottom_type);
+    jl_set_typetagof(jl_bottom_type, jl_typeofbottom_tag, GC_OLD_MARKED);
+    jl_typeofbottom_type->instance = jl_bottom_type;
+
     jl_unionall_type = jl_new_datatype(jl_symbol("UnionAll"), core, type_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "var", "body"),
                                        jl_svec(2, jl_tvar_type, jl_any_type),
                                        jl_emptysvec, 0, 0, 2);
+    XX(unionall);
+    // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
     jl_unionall_type->name->mayinlinealloc = 0;
 
+    jl_uniontype_type = jl_new_datatype(jl_symbol("Union"), core, type_type, jl_emptysvec,
+                                        jl_perm_symsvec(2, "a", "b"),
+                                        jl_svec(2, jl_any_type, jl_any_type),
+                                        jl_emptysvec, 0, 0, 2);
+    XX(uniontype);
+    // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
+    jl_uniontype_type->name->mayinlinealloc = 0;
+
+    jl_tvar_t *tttvar = tvar("T");
+    type_type->parameters = jl_svec(1, tttvar);
+    jl_precompute_memoized_dt(type_type, 0); // update the hash value ASAP
+    type_type->hasfreetypevars = 1;
+    type_type->ismutationfree = 1;
+    jl_type_typename->wrapper = jl_new_struct(jl_unionall_type, tttvar, (jl_value_t*)jl_type_type);
+    jl_type_type = (jl_unionall_t*)jl_type_typename->wrapper;
+
     jl_vararg_type = jl_new_datatype(jl_symbol("TypeofVararg"), core, jl_any_type, jl_emptysvec,
                                             jl_perm_symsvec(2, "T", "N"),
                                             jl_svec(2, jl_any_type, jl_any_type),
                                             jl_emptysvec, 0, 0, 0);
+    XX(vararg);
+    // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
     jl_vararg_type->name->mayinlinealloc = 0;
 
     jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL));
@@ -2195,55 +2673,44 @@ void jl_init_types(void) JL_GC_DISABLED
     // fix some miscomputed values, since we didn't know this was going to be a Tuple in jl_precompute_memoized_dt
     jl_tuple_typename->wrapper = (jl_value_t*)jl_anytuple_type; // remove UnionAll wrappers
     jl_anytuple_type->isconcretetype = 0;
+    jl_anytuple_type->maybe_subtype_of_cache = 0;
     jl_anytuple_type->layout = NULL;
-    jl_anytuple_type->cached_by_hash = 0;
-
-    jl_tvar_t *tttvar = tvar("T");
-    ((jl_datatype_t*)jl_type_type)->parameters = jl_svec(1, tttvar);
-    ((jl_datatype_t*)jl_type_type)->hasfreetypevars = 1;
-    ((jl_datatype_t*)jl_type_type)->cached_by_hash = 0;
-    jl_type_typename->wrapper = jl_new_struct(jl_unionall_type, tttvar, (jl_value_t*)jl_type_type);
-    jl_type_type = (jl_unionall_t*)jl_type_typename->wrapper;
-    ((jl_datatype_t*)jl_type_type->body)->ismutationfree = 1;
 
     jl_typeofbottom_type->super = jl_wrap_Type(jl_bottom_type);
-
-    jl_emptytuple_type = jl_apply_tuple_type(jl_emptysvec);
+    jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec);
     jl_emptytuple = jl_gc_permobj(0, jl_emptytuple_type);
     jl_emptytuple_type->instance = jl_emptytuple;
 
     // non-primitive definitions follow
     jl_int32_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Int32"), core,
                                          jl_any_type, jl_emptysvec, 32);
+    XX(int32);
     jl_int64_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Int64"), core,
                                          jl_any_type, jl_emptysvec, 64);
+    XX(int64);
     jl_uint32_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt32"), core,
                                           jl_any_type, jl_emptysvec, 32);
+    XX(uint32);
     jl_uint64_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt64"), core,
                                           jl_any_type, jl_emptysvec, 64);
+    XX(uint64);
     jl_uint8_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt8"), core,
                                          jl_any_type, jl_emptysvec, 8);
+    XX(uint8);
     jl_uint16_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt16"), core,
                                           jl_any_type, jl_emptysvec, 16);
+    XX(uint16);
 
     jl_ssavalue_type = jl_new_datatype(jl_symbol("SSAValue"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "id"),
                                        jl_svec1(jl_long_type),
                                        jl_emptysvec, 0, 0, 1);
 
-    jl_abstractslot_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Slot"), core, jl_any_type,
-                                               jl_emptysvec);
-
-    jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_abstractslot_type, jl_emptysvec,
+    jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_any_type, jl_emptysvec,
                                          jl_perm_symsvec(1, "id"),
                                          jl_svec1(jl_long_type),
                                          jl_emptysvec, 0, 0, 1);
 
-    jl_typedslot_type = jl_new_datatype(jl_symbol("TypedSlot"), core, jl_abstractslot_type, jl_emptysvec,
-                                        jl_perm_symsvec(2, "id", "typ"),
-                                        jl_svec(2, jl_long_type, jl_any_type),
-                                        jl_emptysvec, 0, 0, 2);
-
     jl_argument_type = jl_new_datatype(jl_symbol("Argument"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "n"),
                                        jl_svec1(jl_long_type),
@@ -2254,14 +2721,16 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_bool_type = NULL;
     jl_bool_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Bool"), core,
                                         jl_any_type, jl_emptysvec, 8);
-    jl_false = jl_permbox8(jl_bool_type, 0);
-    jl_true  = jl_permbox8(jl_bool_type, 1);
+    XX(bool);
+    jl_false = jl_permbox8(jl_bool_type, jl_bool_tag, 0);
+    jl_true  = jl_permbox8(jl_bool_type, jl_bool_tag, 1);
 
     jl_abstractstring_type = jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractString"), core, jl_any_type, jl_emptysvec);
     jl_string_type = jl_new_datatype(jl_symbol("String"), core, jl_abstractstring_type, jl_emptysvec,
                                      jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    XX(string);
     jl_string_type->instance = NULL;
-    jl_compute_field_offsets(jl_string_type);
+    jl_compute_field_offsets(jl_string_type); // re-compute now that we assigned jl_string_type
     jl_an_empty_string = jl_pchar_to_string("\0", 1);
     *(size_t*)jl_an_empty_string = 0;
 
@@ -2360,6 +2829,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_module_type =
         jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    XX(module);
     jl_module_type->instance = NULL;
     jl_compute_field_offsets(jl_module_type);
 
@@ -2433,7 +2903,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(22,
+                        jl_perm_symsvec(21,
                             "code",
                             "codelocs",
                             "ssavaluetypes",
@@ -2450,13 +2920,12 @@ void jl_init_types(void) JL_GC_DISABLED
                             "max_world",
                             "inferred",
                             "propagate_inbounds",
-                            "pure",
                             "has_fcall",
                             "inlining",
                             "constprop",
                             "purity",
                             "inlining_cost"),
-                        jl_svec(22,
+                        jl_svec(21,
                             jl_array_any_type,
                             jl_array_int32_type,
                             jl_any_type,
@@ -2474,7 +2943,6 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_bool_type,
                             jl_uint8_type,
                             jl_uint8_type,
                             jl_uint8_type,
@@ -2511,9 +2979,9 @@ void jl_init_types(void) JL_GC_DISABLED
                             "nospecialize",
                             "nkw",
                             "isva",
-                            "pure",
                             "is_for_opaque_closure",
                             "constprop",
+                            "max_varargs",
                             "purity"),
                         jl_svec(29,
                             jl_symbol_type,
@@ -2523,7 +2991,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_ulong_type,
                             jl_ulong_type,
                             jl_type_type,
-                            jl_simplevector_type,
+                            jl_any_type, // union(jl_simplevector_type, jl_method_instance_type),
                             jl_array_type,
                             jl_string_type,
                             jl_any_type,
@@ -2542,7 +3010,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_int32_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_bool_type,
+                            jl_uint8_type,
                             jl_uint8_type,
                             jl_uint8_type),
                         jl_emptysvec,
@@ -2553,7 +3021,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_method_instance_type =
         jl_new_datatype(jl_symbol("MethodInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(9,
+                        jl_perm_symsvec(10,
                             "def",
                             "specTypes",
                             "sparam_vals",
@@ -2562,8 +3030,9 @@ void jl_init_types(void) JL_GC_DISABLED
                             "callbacks",
                             "cache",
                             "inInference",
+                            "cache_with_orig",
                             "precompiled"),
-                        jl_svec(9,
+                        jl_svec(10,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
                             jl_simplevector_type,
@@ -2572,12 +3041,13 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_any_type,
                             jl_bool_type,
+                            jl_bool_type,
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 3);
     // These fields should be constant, but Serialization wants to mutate them in initialization
     //const static uint32_t method_instance_constfields[1] = { 0x00000007 }; // (1<<0)|(1<<1)|(1<<2);
-    const static uint32_t method_instance_atomicfields[1] = { 0x00000148 }; // (1<<3)|(1<<6)|(1<<8);
+    const static uint32_t method_instance_atomicfields[1] = { 0x00000248 }; // (1<<3)|(1<<6)|(1<<9);
     //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe. TODO: except inInference
     //jl_method_instance_type->name->constfields = method_instance_constfields;
     jl_method_instance_type->name->atomicfields = method_instance_atomicfields;
@@ -2664,8 +3134,9 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_pointer_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->name;
 
     // LLVMPtr{T, AS} where {T, AS}
-    tv = jl_svec2(tvar("T"), tvar("AS"));
-    jl_svec_t *tv_base = jl_svec1(tvar("T"));
+    jl_tvar_t *elvar = tvar("T");
+    tv = jl_svec2(elvar, tvar("AS"));
+    jl_svec_t *tv_base = jl_svec1(elvar);
     jl_llvmpointer_type = (jl_unionall_t*)
         jl_new_primitivetype((jl_value_t*)jl_symbol("LLVMPtr"), core,
                              (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv_base), 1), tv,
@@ -2693,7 +3164,7 @@ void jl_init_types(void) JL_GC_DISABLED
                         NULL,
                         jl_any_type,
                         jl_emptysvec,
-                        jl_perm_symsvec(15,
+                        jl_perm_symsvec(16,
                                         "next",
                                         "queue",
                                         "storage",
@@ -2705,11 +3176,12 @@ void jl_init_types(void) JL_GC_DISABLED
                                         "rngState1",
                                         "rngState2",
                                         "rngState3",
+                                        "rngState4",
                                         "_state",
                                         "sticky",
                                         "_isexception",
                                         "priority"),
-                        jl_svec(15,
+                        jl_svec(16,
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
@@ -2721,17 +3193,16 @@ void jl_init_types(void) JL_GC_DISABLED
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
+                                jl_uint64_type,
                                 jl_uint8_type,
                                 jl_bool_type,
                                 jl_bool_type,
                                 jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 6);
+    XX(task);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
-    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
-
-    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
 
     jl_binding_type =
         jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
@@ -2749,8 +3220,12 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(3, jl_module_type, jl_symbol_type, jl_binding_type),
                         jl_emptysvec, 0, 0, 3);
 
+    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
+    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
+        // N.B.: OpaqueClosure call code relies on specptr being field 5.
+        // Update that code if you change this.
         jl_perm_symsvec(5, "captures", "world", "source", "invoke", "specptr"),
         jl_svec(5, jl_any_type, jl_long_type, jl_any_type, pointer_void, pointer_void),
         jl_emptysvec, 0, 0, 5)->name->wrapper;
@@ -2763,7 +3238,6 @@ void jl_init_types(void) JL_GC_DISABLED
         jl_emptysvec, 0, 0, 4);
 
     // complete builtin type metadata
-    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     jl_uint8pointer_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_uint8_type);
     jl_svecset(jl_datatype_type->types, 5, jl_voidpointer_type);
     jl_svecset(jl_datatype_type->types, 6, jl_int32_type);
@@ -2812,13 +3286,103 @@ void jl_init_types(void) JL_GC_DISABLED
     // Technically not ismutationfree, but there's a separate system to deal
     // with mutations for global state.
     jl_module_type->ismutationfree = 1;
+    // Module object identity is determined by its name and parent name.
+    jl_module_type->isidentityfree = 1;
 
     // Array's mutable data is hidden, so we need to override it
     ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->ismutationfree = 0;
+    ((jl_datatype_t*)jl_array_any_type)->ismutationfree = 0;
+    ((jl_datatype_t*)jl_array_symbol_type)->ismutationfree = 0;
+    ((jl_datatype_t*)jl_array_uint8_type)->ismutationfree = 0;
+    ((jl_datatype_t*)jl_array_int32_type)->ismutationfree = 0;
+    ((jl_datatype_t*)jl_array_uint64_type)->ismutationfree = 0;
 
     // override the preferred layout for a couple types
     jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
+    export_small_typeof();
+}
+
+static jl_value_t *core(const char *name)
+{
+    return jl_get_global(jl_core_module, jl_symbol(name));
+}
+
+// fetch references to things defined in boot.jl
+void post_boot_hooks(void)
+{
+    jl_char_type    = (jl_datatype_t*)core("Char");
+    XX(char);
+    jl_int8_type    = (jl_datatype_t*)core("Int8");
+    XX(int8);
+    jl_int16_type   = (jl_datatype_t*)core("Int16");
+    XX(int16);
+    jl_float16_type = (jl_datatype_t*)core("Float16");
+    //XX(float16);
+    jl_float32_type = (jl_datatype_t*)core("Float32");
+    //XX(float32);
+    jl_float64_type = (jl_datatype_t*)core("Float64");
+    //XX(float64);
+    jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
+    jl_number_type  = (jl_datatype_t*)core("Number");
+    jl_signed_type  = (jl_datatype_t*)core("Signed");
+    jl_datatype_t *jl_unsigned_type = (jl_datatype_t*)core("Unsigned");
+    jl_datatype_t *jl_integer_type = (jl_datatype_t*)core("Integer");
+
+    jl_bool_type->super = jl_integer_type;
+    jl_uint8_type->super = jl_unsigned_type;
+    jl_uint16_type->super = jl_unsigned_type;
+    jl_uint32_type->super = jl_unsigned_type;
+    jl_uint64_type->super = jl_unsigned_type;
+    jl_int32_type->super = jl_signed_type;
+    jl_int64_type->super = jl_signed_type;
+
+    jl_errorexception_type = (jl_datatype_t*)core("ErrorException");
+    jl_stackovf_exception  = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
+    jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
+    jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
+    jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
+    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
+    jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
+    jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
+    jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
+    jl_readonlymemory_exception = jl_new_struct_uninit((jl_datatype_t*)core("ReadOnlyMemoryError"));
+    jl_typeerror_type      = (jl_datatype_t*)core("TypeError");
+    jl_argumenterror_type  = (jl_datatype_t*)core("ArgumentError");
+    jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
+    jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
+    jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_pair_type           = core("Pair");
+    jl_kwcall_func         = core("kwcall");
+    jl_kwcall_mt           = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
+    jl_atomic_store_relaxed(&jl_kwcall_mt->max_args, 0);
+
+    jl_weakref_type = (jl_datatype_t*)core("WeakRef");
+    jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
+
+    jl_init_box_caches();
+
+    // set module field of primitive types
+    jl_svec_t *bindings = jl_atomic_load_relaxed(&jl_core_module->bindings);
+    jl_value_t **table = jl_svec_data(bindings);
+    for (size_t i = 0; i < jl_svec_len(bindings); i++) {
+        if (table[i] != jl_nothing) {
+            jl_binding_t *b = (jl_binding_t*)table[i];
+            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+            if (v) {
+                if (jl_is_unionall(v))
+                    v = jl_unwrap_unionall(v);
+                if (jl_is_datatype(v)) {
+                    jl_datatype_t *tt = (jl_datatype_t*)v;
+                    tt->name->module = jl_core_module;
+                    if (tt->name->mt)
+                        tt->name->mt->module = jl_core_module;
+                }
+            }
+        }
+    }
+    export_small_typeof();
 }
+#undef XX
 
 #ifdef __cplusplus
 }
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index 710ddc2f3bdd5..210ba8f0ae07b 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -10,7 +10,7 @@
 ;; comma - higher than assignment outside parentheses, lower when inside
 (define prec-pair (add-dots '(=>)))
 (define prec-conditional '(?))
-(define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
+(define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⥷ ⭄ ⥺ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
 (define prec-lazy-or     (add-dots '(|\|\||)))
 (define prec-lazy-and    (add-dots '(&&)))
 (define prec-comparison
@@ -20,7 +20,7 @@
 (define prec-pipe>       '(|.\|>| |\|>|))
 (define prec-colon       (append! '(: |..|) (add-dots '(… ⁝ ⋮ ⋱ ⋰ ⋯))))
 (define prec-plus        (append! '($)
-                          (add-dots '(+ - − ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
+                          (add-dots '(+ - − ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⟇ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
 (define prec-times       (add-dots '(* / ⌿ ÷ % & · · ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟)))
 (define prec-rational    (add-dots '(//)))
 (define prec-bitshift    (add-dots '(<< >> >>>)))
@@ -49,7 +49,7 @@
         ((not (length> l 8))
          (eval `(lambda (x)
                   (not (not (,(if (every symbol? l) 'memq 'memv) x (quote ,l)))))))
-        ((and (every symbol? l) (not (length> l 20)))
+        ((and (not (length> l 20)) (every symbol? l))
          (eval `(lambda (x)
                   (not (not (memq x (quote ,l)))))))
         (else
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 4a0407e019432..cac8c7b5228b9 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -382,13 +382,8 @@
                                          `((meta generated
                                                  (new (core GeneratedFunctionStub)
                                                       ,gname
-                                                      ,(cons 'list anames)
-                                                      ,(if (null? sparams)
-                                                           'nothing
-                                                           (cons 'list (map car sparams)))
-                                                      ,(cadr loc)
-                                                      (inert ,(caddr loc))
-                                                      (false))))))
+                                                      (call (core svec) ,@(map quotify anames))
+                                                      (call (core svec) ,@(map quotify names)))))))
                              (list gf))
                            '()))
             (types (llist-types argl))
@@ -512,6 +507,7 @@
                               positional-sparams)))
                   sparams))
          (kw      (gensy))
+         (kwdecl  `(|::| ,kw (core NamedTuple)))
          (rkw     (if (null? restkw) (make-ssavalue) (symbol (string (car restkw) "..."))))
          (restkw  (map (lambda (v) `(|::| ,v (call (top pairs) (core NamedTuple)))) restkw))
          (mangled (let ((und (and name (undot-name name))))
@@ -560,7 +556,7 @@
           `((|::|
              ;; if there are optional positional args, we need to be able to reference the function name
              ,(if (any kwarg? pargl) (gensy) UNUSED)
-             (call (core kwftype) ,ftype)) ,kw ,@pargl ,@vararg)
+             (call (core kwftype) ,ftype)) ,kwdecl ,@pargl ,@vararg)
           `(block
             ;; propagate method metadata to keyword sorter
             ,@(map propagate-method-meta (filter meta? prologue))
@@ -756,8 +752,18 @@
 
 (define (default-inner-ctors name field-names field-types params bounds locs)
   (let* ((field-names (safe-field-names field-names field-types))
-         (any-ctor
+         (all-ctor (if (null? params)
+          ;; definition with exact types for all arguments
+          `(function (call ,name
+                          ,@(map make-decl field-names field-types))
+                    (block
+                     ,@locs
+                     (new (outerref ,name) ,@field-names)))
+          #f))
+         (any-ctor (if (or (not all-ctor) (any (lambda (t) (not (equal? t '(core Any))))
+                                 field-types))
           ;; definition with Any for all arguments
+          ;; only if any field type is not Any, checked at runtime
           `(function (call (|::| |#ctor-self#|
                             ,(with-wheres
                               `(curly (core Type) ,(if (pair? params)
@@ -767,23 +773,18 @@
                            ,@field-names)
                      (block
                       ,@locs
-                      (call new ,@field-names)))))
-    (if (and (null? params) (any (lambda (t) (not (equal? t '(core Any))))
-                                 field-types))
-        (list
-         ;; definition with field types for all arguments
-         ;; only if any field type is not Any, checked at runtime
-         `(if ,(foldl (lambda (t u)
-                        `(&& ,u (call (core ===) (core Any) ,t)))
-                      `(call (core ===) (core Any) ,(car field-types))
-                      (cdr field-types))
-            (block)
-            (function (call ,name
-                            ,@(map make-decl field-names field-types))
-                      (block
-                       ,@locs
-                       (new (outerref ,name) ,@field-names))))
-         any-ctor)
+                      (call new ,@field-names))) ; this will add convert calls later
+          #f)))
+    (if all-ctor
+        (if any-ctor
+            (list all-ctor
+                  `(if ,(foldl (lambda (t u)
+                           `(&& ,u (call (core ===) (core Any) ,t)))
+                         `(call (core ===) (core Any) ,(car field-types))
+                         (cdr field-types))
+                       '(block)
+                       ,any-ctor))
+            (list all-ctor))
         (list any-ctor))))
 
 (define (default-outer-ctor name field-names field-types params bounds locs)
@@ -793,7 +794,7 @@
                  (map (lambda (b) (cons 'var-bounds b)) bounds))
                (block
                 ,@locs
-                (call (curly ,name ,@params) ,@field-names)))))
+                (new (curly ,name ,@params) ,@field-names)))))
 
 (define (num-non-varargs args)
   (count (lambda (a) (not (vararg? a))) args))
@@ -818,12 +819,14 @@
          (field-convert (lambda (fld fty val)
                           (if (equal? fty '(core Any))
                               val
-                              `(call (top convert)
-                                     ,(if (and (not selftype?) (equal? type-params params) (memq fty params) (memq fty sparams))
-                                          fty ; the field type is a simple parameter, the usage here is of a
-                                              ; local variable (currently just handles sparam) for the bijection of params to type-params
-                                          `(call (core fieldtype) ,tn ,(+ fld 1)))
-                                     ,val)))))
+                              (convert-for-type-decl val
+                                                     ; for ty, usually use the fieldtype, not the fty expression
+                                                     (if (and (not selftype?) (equal? type-params params) (memq fty params) (memq fty sparams))
+                                                      fty ; the field type is a simple parameter, the usage here is of a
+                                                          ; local variable (currently just handles sparam) for the bijection of params to type-params
+                                                      `(call (core fieldtype) ,tn ,(+ fld 1)))
+                                                      #f
+                                                      #f)))))
     (cond ((> (num-non-varargs args) (length field-names))
            `(call (core throw) (call (top ArgumentError)
                                      ,(string "new: too many arguments (expected " (length field-names) ")"))))
@@ -3253,8 +3256,9 @@
         ((and (pair? e) (eq? (car e) 'with-static-parameters)) (free-vars- (cadr e) tab))
         ((or (atom? e) (quoted? e)) tab)
         ((eq? (car e) 'lambda)
-         (let ((bound (lambda-all-vars e)))
-           (for-each (lambda (v) (if (not (memq v bound)) (put! tab v #t)))
+         (let ((bound (table)))
+           (for-each (lambda (b) (put! bound b #t)) (lambda-all-vars e))
+           (for-each (lambda (v) (if (not (has? bound v)) (put! tab v #t)))
                      (free-vars (lam:body e))))
          tab)
         (else
@@ -3472,13 +3476,13 @@ f(x) = yt(x)
 
 (define (convert-lambda lam fname interp capt-sp opaq)
   (let ((body (add-box-inits-to-body
-               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam)))))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
         ,(caddr (lam:vinfo lam))
         ,(delete-duplicates (append (lam:sp lam) capt-sp)))
-      ,body)))
+       ,body)))
 
 ;; renumber ssavalues assigned in an expr, allowing it to be repeated
 (define (renumber-assigned-ssavalues e)
@@ -3498,26 +3502,34 @@ f(x) = yt(x)
                     (cons (car x)
                           (map do-replace (cdr x))))))))))
 
-(define (convert-for-type-decl rhs t)
+(define (convert-for-type-decl rhs t assert lam)
   (if (equal? t '(core Any))
       rhs
-      (let* ((temp (if (or (atom? t) (ssavalue? t) (quoted? t))
+      (let* ((new-mutable-var
+               (lambda () (let ((g (gensy)))
+                               (if lam (set-car! (lam:vinfo lam) (append (car (lam:vinfo lam)) `((,g Any 10)))))
+                               g)))
+             (left (if (or (atom? t) (ssavalue? t) (quoted? t))
                        #f
                        (make-ssavalue)))
-             (ty   (or temp t))
-             (ex   `(call (core typeassert)
-                          (call (top convert) ,ty ,rhs)
-                          ,ty)))
-        (if temp
-            `(block (= ,temp ,(renumber-assigned-ssavalues t)) ,ex)
-            ex))))
+             (temp (new-mutable-var)) ; use a slot to permit union-splitting this in inference
+             (ty   (or left t))
+             (ex   `(call (top convert) ,ty ,temp))
+             (ex   (if assert `(call (core typeassert) ,ex ,ty) ex))
+             (ex   `(= ,temp ,ex))
+             (ex   `(if (call (core isa) ,temp ,ty) (null) ,ex))
+             (t    (if left (renumber-assigned-ssavalues t) t))
+             (ex   `((= ,temp ,rhs) ,ex ,temp))
+             (ex   (if left (cons `(= ,left ,t) ex) ex))
+             (ex   (if lam ex (cons `(local-def ,temp) ex))))
+        (cons 'block ex))))
 
 (define (capt-var-access var fname opaq)
   (if opaq
       `(call (core getfield) ,fname ,(get opaq var))
       `(call (core getfield) ,fname (inert ,var))))
 
-(define (convert-global-assignment var rhs0 globals)
+(define (convert-global-assignment var rhs0 globals lam)
   (let* ((rhs1 (if (or (simple-atom? rhs0)
                        (equal? rhs0 '(the_exception)))
                    rhs0
@@ -3525,7 +3537,7 @@ f(x) = yt(x)
          (ref   (binding-to-globalref var))
          (ty   `(call (core get_binding_type) ,(cadr ref) (inert ,(caddr ref))))
          (rhs  (if (get globals ref #t) ;; no type declaration for constants
-                   (convert-for-type-decl rhs1 ty)
+                   (convert-for-type-decl rhs1 ty #f lam)
                    rhs1))
          (ex   `(= ,var ,rhs)))
     (if (eq? rhs1 rhs0)
@@ -3539,10 +3551,10 @@ f(x) = yt(x)
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp opaq globals)
+(define (convert-assignment var rhs0 fname lam interp opaq globals locals)
   (cond
     ((symbol? var)
-     (let* ((vi (assq var (car  (lam:vinfo lam))))
+     (let* ((vi (get locals var #f))
             (cv (assq var (cadr (lam:vinfo lam))))
             (vt  (or (and vi (vinfo:type vi))
                      (and cv (vinfo:type cv))
@@ -3552,14 +3564,12 @@ f(x) = yt(x)
        (if (and (not closed) (not capt) (equal? vt '(core Any)))
            (if (or (local-in? var lam) (underscore-symbol? var))
                `(= ,var ,rhs0)
-               (convert-global-assignment var rhs0 globals))
+               (convert-global-assignment var rhs0 globals lam))
            (let* ((rhs1 (if (or (simple-atom? rhs0)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
-                  (rhs  (if (equal? vt '(core Any))
-                            rhs1
-                            (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq))))
+                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
@@ -3574,7 +3584,7 @@ f(x) = yt(x)
                          ,ex
                          ,rhs1))))))
      ((or (outerref? var) (globalref? var))
-      (convert-global-assignment var rhs0 globals))
+      (convert-global-assignment var rhs0 globals lam))
      ((ssavalue? var)
       `(= ,var ,rhs0))
      (else
@@ -3678,8 +3688,9 @@ f(x) = yt(x)
          const atomic null true false ssavalue isdefined toplevel module lambda
          error gc_preserve_begin gc_preserve_end import using export inline noinline)))
 
-(define (local-in? s lam)
-  (or (assq s (car  (lam:vinfo lam)))
+(define (local-in? s lam (tab #f))
+  (or (and tab (has? tab s))
+      (assq s (car  (lam:vinfo lam)))
       (assq s (cadr (lam:vinfo lam)))))
 
 ;; Try to identify never-undef variables, and then clear the `captured` flag for single-assigned,
@@ -3834,17 +3845,17 @@ f(x) = yt(x)
 (define (toplevel-preserving? e)
   (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)))
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
   (if toplevel
       (map (lambda (x)
              (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
                                                   (and toplevel (toplevel-preserving? x))
-                                                  interp opaq globals))))
+                                                  interp opaq globals locals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs)))
 
 (define (prepare-lambda! lam)
   ;; mark all non-arguments as assigned, since locals that are never assigned
@@ -3853,11 +3864,11 @@ f(x) = yt(x)
             (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
   (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)))
+(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
   (if (and (not lam)
            (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3876,12 +3887,12 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals)))))
+                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals)))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
                ,val)))
-        (let ((vi (assq e (car  (lam:vinfo lam))))
+        (let ((vi (get locals e #f))
               (cv (assq e (cadr (lam:vinfo lam)))))
           (cond ((eq? e fname) e)
                 ((memq e (lam:sp lam)) e)
@@ -3908,15 +3919,15 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals)))
-             (convert-assignment var rhs fname lam interp opaq globals)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals)))
+             (convert-assignment var rhs fname lam interp opaq globals locals)))
           ((local-def) ;; make new Box for local declaration of defined variable
-           (let ((vi (assq (cadr e) (car (lam:vinfo lam)))))
+           (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
                  `(= ,(cadr e) (call (core Box)))
                  '(null))))
           ((local) ;; convert local declarations to newvar statements
-           (let ((vi (assq (cadr e) (car (lam:vinfo lam)))))
+           (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
                  `(= ,(cadr e) (call (core Box)))
                  (if (vinfo:never-undef vi)
@@ -3927,12 +3938,12 @@ f(x) = yt(x)
            e)
           ((atomic) e)
           ((const-if-global)
-           (if (local-in? (cadr e) lam)
+           (if (local-in? (cadr e) lam locals)
                '(null)
                `(const ,(cadr e))))
           ((isdefined) ;; convert isdefined expr to function for closure converted variables
            (let* ((sym (cadr e))
-                  (vi (and (symbol? sym) (assq sym (car  (lam:vinfo lam)))))
+                  (vi (and (symbol? sym) (get locals sym #f)))
                   (cv (and (symbol? sym) (assq sym (cadr (lam:vinfo lam))))))
              (cond ((eq? sym fname) e)
                    ((memq sym (lam:sp lam)) e)
@@ -3972,13 +3983,13 @@ f(x) = yt(x)
                   (lam2  (if short #f (cadddr e)))
                   (vis   (if short '(() () ()) (lam:vinfo lam2)))
                   (cvs   (map car (cadr vis)))
-                  (local? (lambda (s) (and lam (symbol? s) (local-in? s lam))))
+                  (local? (lambda (s) (and lam (symbol? s) (local-in? s lam locals))))
                   (local (and (not (outerref? (cadr e))) (local? name)))
                   (sig      (and (not short) (caddr e)))
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp opaq globals)))
+                                                fname lam namemap defined toplevel interp opaq globals locals)))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -4005,10 +4016,11 @@ f(x) = yt(x)
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp opaq globals)
+                                          fname lam namemap defined toplevel interp opaq globals locals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq))))
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table)
+                                                            (vinfo-to-table (car (lam:vinfo lam2)))))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
@@ -4019,7 +4031,7 @@ f(x) = yt(x)
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals)
+                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals)
                                            ,(julia-bq-macro newlam)))
                             ,@top-stmts))))
 
@@ -4122,7 +4134,7 @@ f(x) = yt(x)
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
@@ -4161,7 +4173,7 @@ f(x) = yt(x)
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp opaq globals)
+                           ,(convert-assignment name mk-closure fname lam interp opaq globals locals)
                            ,@typedef
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
@@ -4169,42 +4181,43 @@ f(x) = yt(x)
           ((lambda)  ;; happens inside (thunk ...) and generated function bodies
            (for-each (lambda (vi) (vinfo:set-asgn! vi #t))
                      (list-tail (car (lam:vinfo e)) (length (lam:args e))))
+           (lambda-optimize-vars! e)
            (let ((body (map-cl-convert (cdr (lam:body e)) 'anon
-                                       (lambda-optimize-vars! e)
+                                       e
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp opaq globals)))
+                                       interp opaq globals (vinfo-to-table (car (lam:vinfo e))))))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
            (cond ((and (symbol? (cadr e))
-                       (local-in? (cadr e) lam))
+                       (local-in? (cadr e) lam locals))
                   '(null))
                  (else
                   (cl-convert
-                    (let ((ref (binding-to-globalref (cadr e))))
-                      (if ref
-                          (begin
-                            (put! globals ref #t)
-                            `(block
-                               (toplevel-only set_binding_type! ,(cadr e))
-                               (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
-                          `(call (core typeassert) ,@(cdr e))))
-                    fname lam namemap defined toplevel interp opaq globals))))
+                   (let ((ref (binding-to-globalref (cadr e))))
+                     (if ref
+                         (begin
+                           (put! globals ref #t)
+                           `(block
+                             (toplevel-only set_binding_type! ,(cadr e))
+                             (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
+                         `(call (core typeassert) ,@(cdr e))))
+                   fname lam namemap defined toplevel interp opaq globals locals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))))))))
 
 (define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
 
@@ -4248,6 +4261,7 @@ f(x) = yt(x)
         (current-loc #f)
         (rett #f)
         (global-const-error #f)
+        (vinfo-table (vinfo-to-table (car (lam:vinfo lam))))
         (arg-map #f)          ;; map arguments to new names if they are assigned
         (label-counter 0)     ;; counter for generating label addresses
         (label-map (table))   ;; maps label names to generated addresses
@@ -4260,6 +4274,7 @@ f(x) = yt(x)
         (handler-level 0)     ;; exception handler nesting depth
         (catch-token-stack '())) ;; tokens identifying handler enter for current catch blocks
     (define (emit c)
+      (or c (raise "missing value in IR"))
       (set! code (cons c code))
       c)
     (define (make-label)
@@ -4315,7 +4330,7 @@ f(x) = yt(x)
               x)))
       (define (actually-return x)
         (let* ((x (if rett
-                      (compile (convert-for-type-decl (emit- x) rett) '() #t #f)
+                      (compile (convert-for-type-decl (emit- x) rett #t lam) '() #t #f)
                       x))
                (x (emit- x)))
           (let ((pexc (pop-exc-expr catch-token-stack '())))
@@ -4434,7 +4449,8 @@ f(x) = yt(x)
               (emit `(= ,lhs ,rhs))
               (let ((rr (make-ssavalue)))
                 (emit `(= ,rr ,rhs))
-                (emit `(= ,lhs ,rr)))))
+                (emit `(= ,lhs ,rr))))
+          (emit `(= ,lhs (null)))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved
       #f)
     ;; the interpreter loop. `break-labels` keeps track of the labels to jump to
     ;; for all currently closing break-blocks.
@@ -4553,7 +4569,7 @@ f(x) = yt(x)
                                  (emit '(meta pop_loc))
                                  (emit `(return ,retv)))
                                (emit '(meta pop_loc))))
-                          ((and value (not (simple-atom? v)))
+                          ((and v value (not (simple-atom? v)))
                            (let ((tmp (make-ssavalue)))
                              (emit `(= ,tmp ,v))
                              (set! v tmp)
@@ -4682,7 +4698,7 @@ f(x) = yt(x)
                      (begin (mark-label els)
                             (let ((v3 (compile (cadddr e) break-labels value tail))) ;; emit else block code
                               (if val (emit-assignment val v3)))
-                            (emit `(goto ,endl))))
+                            (if endl (emit `(goto ,endl)))))
                  ;; emit either catch or finally block
                  (mark-label catch)
                  (emit `(leave 1))
@@ -4723,7 +4739,7 @@ f(x) = yt(x)
              ;; avoid duplicate newvar nodes
              (if (and (not (and (pair? code) (equal? (car code) e)))
                       ;; exclude deleted vars
-                      (assq (cadr e) (car (lam:vinfo lam))))
+                      (has? vinfo-table (cadr e)))
                  (emit e)
                  #f))
             ((global) ; keep global declarations as statements
@@ -4938,22 +4954,20 @@ f(x) = yt(x)
         (linetable    '(list))
         (labltable    (table))
         (ssavtable    (table))
-        (reachable    #t)
         (current-loc  0)
         (current-file file)
         (current-line line)
         (locstack     '())
         (i            1))
     (define (emit e)
+      (or e (raise "missing value in IR"))
       (if (and (null? (cdr linetable))
                (not (and (pair? e) (eq? (car e) 'meta))))
           (begin (set! linetable (cons (make-lineinfo name file line) linetable))
                  (set! current-loc 1)))
-      (if (or reachable
-              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline))))
-          (begin (set! code (cons e code))
-                 (set! i (+ i 1))
-                 (set! locs (cons current-loc locs)))))
+      (set! code (cons e code))
+      (set! i (+ i 1))
+      (set! locs (cons current-loc locs)))
     (let loop ((stmts (cdr body)))
       (if (pair? stmts)
           (let ((e (car stmts)))
@@ -4985,7 +4999,6 @@ f(x) = yt(x)
                      (set! current-line (cadr l))
                      (set! current-file (caddr l))))
                   ((eq? (car e) 'label)
-                   (set! reachable #t)
                    (put! labltable (cadr e) i))
                   ((and (assignment? e) (ssavalue? (cadr e)))
                    (let ((idx (and (ssavalue? (caddr e)) (get ssavtable (cadr (caddr e)) #f))))
@@ -4996,9 +5009,7 @@ f(x) = yt(x)
                            (put! ssavtable (cadr (cadr e)) i)
                            (emit (caddr e))))))
                   (else
-                   (emit e)
-                   (if (or (eq? (car e) 'goto) (eq? (car e) 'return))
-                       (set! reachable #f))))
+                   (emit e)))
             (loop (cdr stmts)))))
     (vector (reverse code) (reverse locs) (reverse linetable) ssavtable labltable)))
 
@@ -5032,8 +5043,8 @@ f(x) = yt(x)
             ((or (atom? e) (quoted? e) (eq? (car e) 'global))
              e)
             ((ssavalue? e)
-             (let ((idx (or (get ssavalue-table (cadr e) #f)
-                            (error "ssavalue with no def"))))
+             (let ((idx (get ssavalue-table (cadr e) #f)))
+               (if (not idx) (begin (prn e) (prn lam) (error "ssavalue with no def")))
                `(ssavalue ,idx)))
             ((memq (car e) '(goto enter))
              (list* (car e) (get label-table (cadr e)) (cddr e)))
diff --git a/src/julia.expmap b/src/julia.expmap
index 35cc5eac48b6a..94b955e95981f 100644
--- a/src/julia.expmap
+++ b/src/julia.expmap
@@ -7,6 +7,7 @@
     ios_*;
     arraylist_grow;
     small_arraylist_grow;
+    small_typeof;
     jl_*;
     ijl_*;
     _jl_mutex_*;
@@ -18,10 +19,7 @@
     memhash32;
     memhash32_seed;
     memhash_seed;
-    restore_arg_area_loc;
     restore_signals;
-    rl_clear_input;
-    save_arg_area_loc;
     u8_*;
     uv_*;
     add_library_mapping;
@@ -35,6 +33,9 @@
     LLVMExtra*;
     llvmGetPassPluginInfo;
 
+    /* Make visible so that linker will merge duplicate definitions across DSO boundaries */
+    _ZN4llvm3Any6TypeId*;
+
     /* freebsd */
     environ;
     __progname;
diff --git a/src/julia.h b/src/julia.h
index 75ebab99dbbf7..7950eca3e0f1d 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -3,7 +3,10 @@
 #ifndef JULIA_H
 #define JULIA_H
 
-#ifdef LIBRARY_EXPORTS
+#if defined(JL_LIBRARY_EXPORTS_INTERNAL) || defined(JL_LIBRARY_EXPORTS_CODEGEN)
+#define JL_LIBRARY_EXPORTS
+#endif
+#ifdef JL_LIBRARY_EXPORTS
 // Generated file, needs to be searched in include paths so that the builddir
 // retains priority
 #include <jl_internal_funcs.inc>
@@ -70,7 +73,7 @@
 typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
 typedef struct _jl_tls_states_t *jl_ptls_t;
 
-#ifdef LIBRARY_EXPORTS
+#ifdef JL_LIBRARY_EXPORTS
 #include "uv.h"
 #endif
 #include "julia_atomics.h"
@@ -91,6 +94,13 @@ typedef struct _jl_value_t jl_value_t;
 
 struct _jl_taggedvalue_bits {
     uintptr_t gc:2;
+    uintptr_t in_image:1;
+    uintptr_t unused:1;
+#ifdef _P64
+    uintptr_t tag:60;
+#else
+    uintptr_t tag:28;
+#endif
 };
 
 JL_EXTENSION struct _jl_taggedvalue_t {
@@ -103,6 +113,7 @@ JL_EXTENSION struct _jl_taggedvalue_t {
     // jl_value_t value;
 };
 
+static inline jl_value_t *jl_to_typeof(uintptr_t t) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #ifdef __clang_gcanalyzer__
 JL_DLLEXPORT jl_taggedvalue_t *_jl_astaggedvalue(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_astaggedvalue(v) _jl_astaggedvalue((jl_value_t*)(v))
@@ -113,10 +124,10 @@ JL_DLLEXPORT jl_value_t *_jl_typeof(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFE
 #else
 #define jl_astaggedvalue(v)                                             \
     ((jl_taggedvalue_t*)((char*)(v) - sizeof(jl_taggedvalue_t)))
-#define jl_valueof(v)                                           \
+#define jl_valueof(v)                                                   \
     ((jl_value_t*)((char*)(v) + sizeof(jl_taggedvalue_t)))
 #define jl_typeof(v)                                                    \
-    ((jl_value_t*)(jl_astaggedvalue(v)->header & ~(uintptr_t)15))
+    jl_to_typeof(jl_typetagof(v))
 #endif
 static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 {
@@ -124,7 +135,11 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
     jl_taggedvalue_t *tag = jl_astaggedvalue(v);
     jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t);
 }
+#define jl_typetagof(v)                                                 \
+    ((jl_astaggedvalue(v)->header) & ~(uintptr_t)15)
 #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t))
+#define jl_typetagis(v,t) (jl_typetagof(v)==(uintptr_t)(t))
+#define jl_set_typetagof(v,t,gc) (jl_set_typeof((v), (void*)(((uintptr_t)(t) << 4) | (gc))))
 
 // Symbols are interned strings (hash-consed) stored as an invasive binary tree.
 // The string data is nul-terminated and hangs off the end of the struct.
@@ -216,22 +231,24 @@ typedef jl_call_t *jl_callptr_t;
 // "speccall" calling convention signatures.
 // This describes some of the special ABI used by compiled julia functions.
 extern jl_call_t jl_fptr_args;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_args_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_args_addr;
 typedef jl_value_t *(*jl_fptr_args_t)(jl_value_t*, jl_value_t**, uint32_t);
 
 extern jl_call_t jl_fptr_const_return;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_const_return_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_const_return_addr;
 
 extern jl_call_t jl_fptr_sparam;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_sparam_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_sparam_addr;
 typedef jl_value_t *(*jl_fptr_sparam_t)(jl_value_t*, jl_value_t**, uint32_t, jl_svec_t*);
 
 extern jl_call_t jl_fptr_interpret_call;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_interpret_call_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_interpret_call_addr;
+
+JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr;
 
 typedef struct _jl_line_info_node_t {
     struct _jl_module_t *module;
-    jl_value_t *method;
+    jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t
     jl_sym_t *file;
     int32_t line;
     int32_t inlined_at;
@@ -284,7 +301,6 @@ typedef struct _jl_code_info_t {
     // various boolean properties:
     uint8_t inferred;
     uint8_t propagate_inbounds;
-    uint8_t pure;
     uint8_t has_fcall;
     // uint8 settings
     uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline
@@ -309,7 +325,7 @@ typedef struct _jl_method_t {
     jl_value_t *sig;
 
     // table of all jl_method_instance_t specializations we have
-    _Atomic(jl_svec_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....]
+    _Atomic(jl_value_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....], or a single item
     _Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
@@ -342,10 +358,11 @@ typedef struct _jl_method_t {
                             // of another method.
     // various boolean properties
     uint8_t isva;
-    uint8_t pure;
     uint8_t is_for_opaque_closure;
     // uint8 settings
-    uint8_t constprop;     // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
+    uint8_t constprop;      // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
+    uint8_t max_varargs;    // 0xFF = use heuristic; otherwise, max # of args to expand
+                            // varargs when specializing.
 
     // Override the conclusions of inter-procedural effect analysis,
     // forcing the conclusion to always true.
@@ -373,6 +390,7 @@ struct _jl_method_instance_t {
     jl_array_t *callbacks; // list of callback functions to inform external caches about invalidations
     _Atomic(struct _jl_code_instance_t*) cache;
     uint8_t inInference; // flags to tell if inference is running on this object
+    uint8_t cache_with_orig; // !cache_with_specTypes
     _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
 };
 
@@ -399,7 +417,7 @@ typedef struct _jl_code_instance_t {
     // inference state cache
     jl_value_t *rettype; // return type for fptr
     jl_value_t *rettype_const; // inferred constant return value, or null
-    _Atomic(jl_value_t *) inferred; // inferred jl_code_info_t, or jl_nothing, or null
+    _Atomic(jl_value_t *) inferred; // inferred jl_code_info_t (may be compressed), or jl_nothing, or null
     //TODO: jl_array_t *edges; // stored information about edges from this object
     //TODO: uint8_t absolute_max; // whether true max world is unknown
 
@@ -426,8 +444,9 @@ typedef struct _jl_code_instance_t {
     jl_value_t *argescapes; // escape information of call arguments
 
     // compilation state cache
-    // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with the invoke pointers.
-    uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype
+    _Atomic(uint8_t) specsigflags; // & 0b001 == specptr is a specialized function signature for specTypes->rettype
+                                   // & 0b010 == invokeptr matches specptr
+                                   // & 0b100 == From image
     _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
     uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
     _Atomic(jl_callptr_t) invoke; // jlcall entry point
@@ -548,10 +567,11 @@ typedef struct _jl_datatype_t {
     uint16_t isbitstype:1; // relevant query for C-api and type-parameters
     uint16_t zeroinit:1; // if one or more fields requires zero-initialization
     uint16_t has_concrete_subtype:1; // If clear, no value will have this datatype
-    uint16_t cached_by_hash:1; // stored in hash-based set cache (instead of linear cache)
+    uint16_t maybe_subtype_of_cache:1; // Computational bit for has_concrete_supertype. See description in jltypes.c.
     uint16_t isprimitivetype:1; // whether this is declared with 'primitive type' keyword (sized, no fields, and immutable)
     uint16_t ismutationfree:1; // whether any mutable memory is reachable through this type (in the type or via fields)
     uint16_t isidentityfree:1; // whether this type or any object reachable through its fields has non-content-based identity
+    uint16_t smalltag:6; // whether this type has a small-tag optimization
 } jl_datatype_t;
 
 typedef struct _jl_vararg_t {
@@ -576,6 +596,7 @@ typedef struct _jl_binding_t {
     uint8_t imported:1;
     uint8_t usingfailed:1;
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
+    uint8_t padding:2;
 } jl_binding_t;
 
 typedef struct {
@@ -639,10 +660,10 @@ typedef struct _jl_typemap_level_t {
     // next split may be on Type{T} as LeafTypes then TypeName's parents up to Any
     // next split may be on LeafType
     // next split may be on TypeName
-    _Atomic(jl_array_t*) arg1; // contains LeafType
-    _Atomic(jl_array_t*) targ; // contains Type{LeafType}
-    _Atomic(jl_array_t*) name1; // contains non-abstract TypeName, for parents up to (excluding) Any
-    _Atomic(jl_array_t*) tname; // contains a dict of Type{TypeName}, for parents up to Any
+    _Atomic(jl_array_t*) arg1; // contains LeafType (in a map of non-abstract TypeName)
+    _Atomic(jl_array_t*) targ; // contains Type{LeafType} (in a map of non-abstract TypeName)
+    _Atomic(jl_array_t*) name1; // a map for a map for TypeName, for parents up to (excluding) Any
+    _Atomic(jl_array_t*) tname; // a map for Type{TypeName}, for parents up to (including) Any
     // next a linear list of things too complicated at this level for analysis (no more levels)
     _Atomic(jl_typemap_entry_t*) linear;
     // finally, start a new level if the type at offs is Any
@@ -682,6 +703,59 @@ typedef struct {
 
 // constants and type objects -------------------------------------------------
 
+#define JL_SMALL_TYPEOF(XX) \
+    /* kinds */ \
+    XX(typeofbottom) \
+    XX(datatype) \
+    XX(unionall) \
+    XX(uniontype) \
+    /* type parameter objects */ \
+    XX(vararg) \
+    XX(tvar) \
+    XX(symbol) \
+    XX(module) \
+    /* special GC objects */ \
+    XX(simplevector) \
+    XX(string) \
+    XX(task) \
+    /* bits types with special allocators */ \
+    XX(bool) \
+    XX(char) \
+    /*XX(float16)*/ \
+    /*XX(float32)*/ \
+    /*XX(float64)*/ \
+    XX(int16) \
+    XX(int32) \
+    XX(int64) \
+    XX(int8) \
+    XX(uint16) \
+    XX(uint32) \
+    XX(uint64) \
+    XX(uint8) \
+    /* AST objects */ \
+    /* XX(argument) */ \
+    /* XX(newvarnode) */ \
+    /* XX(slotnumber) */ \
+    /* XX(ssavalue) */ \
+    /* end of JL_SMALL_TYPEOF */
+enum jlsmall_typeof_tags {
+    jl_null_tag = 0,
+#define XX(name) jl_##name##_tag,
+    JL_SMALL_TYPEOF(XX)
+#undef XX
+    jl_tags_count,
+    jl_bitstags_first = jl_char_tag, // n.b. bool is not considered a bitstype, since it can be compared by pointer
+    jl_max_tags = 64
+};
+extern jl_datatype_t *small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+static inline jl_value_t *jl_to_typeof(uintptr_t t)
+{
+    if (t < (jl_max_tags << 4))
+        return (jl_value_t*)small_typeof[t / sizeof(*small_typeof)];
+    return (jl_value_t*)t;
+}
+
+
 // kinds
 extern JL_DLLIMPORT jl_datatype_t *jl_typeofbottom_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_datatype_type JL_GLOBALLY_ROOTED;
@@ -695,9 +769,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_typename_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_type_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_symbol_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_ssavalue_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_abstractslot_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_slotnumber_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_typedslot_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_argument_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_const_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_partial_struct_type JL_GLOBALLY_ROOTED;
@@ -923,6 +995,7 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
 JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
 // Set GC memory trigger in bytes for greedy memory collecting
 JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
 
 JL_DLLEXPORT void jl_clear_malloc_data(void);
 
@@ -998,7 +1071,7 @@ STATIC_INLINE jl_value_t *jl_svecset(
 #else
 STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
 {
-    assert(jl_typeis(t,jl_simplevector_type));
+    assert(jl_typetagis(t,jl_simplevector_tag << 4));
     assert(i < jl_svec_len(t));
     // while svec is supposedly immutable, in practice we sometimes publish it first
     // and set the values lazily
@@ -1008,7 +1081,7 @@ STATIC_INLINE jl_value_t *jl_svecset(
     void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT,
     size_t i, void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
 {
-    assert(jl_typeis(t,jl_simplevector_type));
+    assert(jl_typetagis(t,jl_simplevector_tag << 4));
     assert(i < jl_svec_len(t));
     // while svec is supposedly immutable, in practice we sometimes publish it
     // first and set the values lazily. Those users occasionally might need to
@@ -1064,13 +1137,13 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
 STATIC_INLINE uint8_t jl_array_uint8_ref(void *a, size_t i) JL_NOTSAFEPOINT
 {
     assert(i < jl_array_len(a));
-    assert(jl_typeis(a, jl_array_uint8_type));
+    assert(jl_typetagis(a, jl_array_uint8_type));
     return ((uint8_t*)(jl_array_data(a)))[i];
 }
 STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPOINT
 {
     assert(i < jl_array_len(a));
-    assert(jl_typeis(a, jl_array_uint8_type));
+    assert(jl_typetagis(a, jl_array_uint8_type));
     ((uint8_t*)(jl_array_data(a)))[i] = x;
 }
 
@@ -1239,56 +1312,57 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_nothing(v)     (((jl_value_t*)(v)) == ((jl_value_t*)jl_nothing))
 #define jl_is_tuple(v)       (((jl_datatype_t*)jl_typeof(v))->name == jl_tuple_typename)
 #define jl_is_namedtuple(v)  (((jl_datatype_t*)jl_typeof(v))->name == jl_namedtuple_typename)
-#define jl_is_svec(v)        jl_typeis(v,jl_simplevector_type)
+#define jl_is_svec(v)        jl_typetagis(v,jl_simplevector_tag<<4)
 #define jl_is_simplevector(v) jl_is_svec(v)
-#define jl_is_datatype(v)    jl_typeis(v,jl_datatype_type)
+#define jl_is_datatype(v)    jl_typetagis(v,jl_datatype_tag<<4)
 #define jl_is_mutable(t)     (((jl_datatype_t*)t)->name->mutabl)
 #define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->name->mutabl))
 #define jl_is_immutable(t)   (!((jl_datatype_t*)t)->name->mutabl)
 #define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->name->mutabl))
-#define jl_is_uniontype(v)   jl_typeis(v,jl_uniontype_type)
-#define jl_is_typevar(v)     jl_typeis(v,jl_tvar_type)
-#define jl_is_unionall(v)    jl_typeis(v,jl_unionall_type)
-#define jl_is_typename(v)    jl_typeis(v,jl_typename_type)
-#define jl_is_int8(v)        jl_typeis(v,jl_int8_type)
-#define jl_is_int16(v)       jl_typeis(v,jl_int16_type)
-#define jl_is_int32(v)       jl_typeis(v,jl_int32_type)
-#define jl_is_int64(v)       jl_typeis(v,jl_int64_type)
-#define jl_is_uint8(v)       jl_typeis(v,jl_uint8_type)
-#define jl_is_uint16(v)      jl_typeis(v,jl_uint16_type)
-#define jl_is_uint32(v)      jl_typeis(v,jl_uint32_type)
-#define jl_is_uint64(v)      jl_typeis(v,jl_uint64_type)
-#define jl_is_bool(v)        jl_typeis(v,jl_bool_type)
-#define jl_is_symbol(v)      jl_typeis(v,jl_symbol_type)
-#define jl_is_ssavalue(v)    jl_typeis(v,jl_ssavalue_type)
-#define jl_is_slot(v)        (jl_typeis(v,jl_slotnumber_type) || jl_typeis(v,jl_typedslot_type))
-#define jl_is_expr(v)        jl_typeis(v,jl_expr_type)
-#define jl_is_binding(v)     jl_typeis(v,jl_binding_type)
-#define jl_is_globalref(v)   jl_typeis(v,jl_globalref_type)
-#define jl_is_gotonode(v)    jl_typeis(v,jl_gotonode_type)
-#define jl_is_gotoifnot(v)   jl_typeis(v,jl_gotoifnot_type)
-#define jl_is_returnnode(v)  jl_typeis(v,jl_returnnode_type)
-#define jl_is_argument(v)    jl_typeis(v,jl_argument_type)
-#define jl_is_pinode(v)      jl_typeis(v,jl_pinode_type)
-#define jl_is_phinode(v)     jl_typeis(v,jl_phinode_type)
-#define jl_is_phicnode(v)    jl_typeis(v,jl_phicnode_type)
-#define jl_is_upsilonnode(v) jl_typeis(v,jl_upsilonnode_type)
-#define jl_is_quotenode(v)   jl_typeis(v,jl_quotenode_type)
-#define jl_is_newvarnode(v)  jl_typeis(v,jl_newvarnode_type)
-#define jl_is_linenode(v)    jl_typeis(v,jl_linenumbernode_type)
-#define jl_is_method_instance(v) jl_typeis(v,jl_method_instance_type)
-#define jl_is_code_instance(v) jl_typeis(v,jl_code_instance_type)
-#define jl_is_code_info(v)   jl_typeis(v,jl_code_info_type)
-#define jl_is_method(v)      jl_typeis(v,jl_method_type)
-#define jl_is_module(v)      jl_typeis(v,jl_module_type)
-#define jl_is_mtable(v)      jl_typeis(v,jl_methtable_type)
-#define jl_is_task(v)        jl_typeis(v,jl_task_type)
-#define jl_is_string(v)      jl_typeis(v,jl_string_type)
+#define jl_is_uniontype(v)   jl_typetagis(v,jl_uniontype_tag<<4)
+#define jl_is_typevar(v)     jl_typetagis(v,jl_tvar_tag<<4)
+#define jl_is_unionall(v)    jl_typetagis(v,jl_unionall_tag<<4)
+#define jl_is_vararg(v)      jl_typetagis(v,jl_vararg_tag<<4)
+#define jl_is_typename(v)    jl_typetagis(v,jl_typename_type)
+#define jl_is_int8(v)        jl_typetagis(v,jl_int8_tag<<4)
+#define jl_is_int16(v)       jl_typetagis(v,jl_int16_tag<<4)
+#define jl_is_int32(v)       jl_typetagis(v,jl_int32_tag<<4)
+#define jl_is_int64(v)       jl_typetagis(v,jl_int64_tag<<4)
+#define jl_is_uint8(v)       jl_typetagis(v,jl_uint8_tag<<4)
+#define jl_is_uint16(v)      jl_typetagis(v,jl_uint16_tag<<4)
+#define jl_is_uint32(v)      jl_typetagis(v,jl_uint32_tag<<4)
+#define jl_is_uint64(v)      jl_typetagis(v,jl_uint64_tag<<4)
+#define jl_is_bool(v)        jl_typetagis(v,jl_bool_tag<<4)
+#define jl_is_symbol(v)      jl_typetagis(v,jl_symbol_tag<<4)
+#define jl_is_ssavalue(v)    jl_typetagis(v,jl_ssavalue_type)
+#define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_type)
+#define jl_is_expr(v)        jl_typetagis(v,jl_expr_type)
+#define jl_is_binding(v)     jl_typetagis(v,jl_binding_type)
+#define jl_is_globalref(v)   jl_typetagis(v,jl_globalref_type)
+#define jl_is_gotonode(v)    jl_typetagis(v,jl_gotonode_type)
+#define jl_is_gotoifnot(v)   jl_typetagis(v,jl_gotoifnot_type)
+#define jl_is_returnnode(v)  jl_typetagis(v,jl_returnnode_type)
+#define jl_is_argument(v)    jl_typetagis(v,jl_argument_type)
+#define jl_is_pinode(v)      jl_typetagis(v,jl_pinode_type)
+#define jl_is_phinode(v)     jl_typetagis(v,jl_phinode_type)
+#define jl_is_phicnode(v)    jl_typetagis(v,jl_phicnode_type)
+#define jl_is_upsilonnode(v) jl_typetagis(v,jl_upsilonnode_type)
+#define jl_is_quotenode(v)   jl_typetagis(v,jl_quotenode_type)
+#define jl_is_newvarnode(v)  jl_typetagis(v,jl_newvarnode_type)
+#define jl_is_linenode(v)    jl_typetagis(v,jl_linenumbernode_type)
+#define jl_is_method_instance(v) jl_typetagis(v,jl_method_instance_type)
+#define jl_is_code_instance(v) jl_typetagis(v,jl_code_instance_type)
+#define jl_is_code_info(v)   jl_typetagis(v,jl_code_info_type)
+#define jl_is_method(v)      jl_typetagis(v,jl_method_type)
+#define jl_is_module(v)      jl_typetagis(v,jl_module_tag<<4)
+#define jl_is_mtable(v)      jl_typetagis(v,jl_methtable_type)
+#define jl_is_task(v)        jl_typetagis(v,jl_task_tag<<4)
+#define jl_is_string(v)      jl_typetagis(v,jl_string_tag<<4)
 #define jl_is_cpointer(v)    jl_is_cpointer_type(jl_typeof(v))
 #define jl_is_pointer(v)     jl_is_cpointer_type(jl_typeof(v))
-#define jl_is_uint8pointer(v)jl_typeis(v,jl_uint8pointer_type)
+#define jl_is_uint8pointer(v)jl_typetagis(v,jl_uint8pointer_type)
 #define jl_is_llvmpointer(v) (((jl_datatype_t*)jl_typeof(v))->name == jl_llvmpointer_typename)
-#define jl_is_intrinsic(v)   jl_typeis(v,jl_intrinsic_type)
+#define jl_is_intrinsic(v)   jl_typetagis(v,jl_intrinsic_type)
 #define jl_array_isbitsunion(a) (!(((jl_array_t*)(a))->flags.ptrarray) && jl_is_uniontype(jl_tparam0(jl_typeof(a))))
 
 JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b);
@@ -1299,9 +1373,16 @@ STATIC_INLINE int jl_is_kind(jl_value_t *v) JL_NOTSAFEPOINT
             v==(jl_value_t*)jl_unionall_type || v==(jl_value_t*)jl_typeofbottom_type);
 }
 
+STATIC_INLINE int jl_is_kindtag(uintptr_t t) JL_NOTSAFEPOINT
+{
+    t >>= 4;
+    return (t==(uintptr_t)jl_uniontype_tag || t==(uintptr_t)jl_datatype_tag ||
+            t==(uintptr_t)jl_unionall_tag || t==(uintptr_t)jl_typeofbottom_tag);
+}
+
 STATIC_INLINE int jl_is_type(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_is_kind(jl_typeof(v));
+    return jl_is_kindtag(jl_typetagof(v));
 }
 
 STATIC_INLINE int jl_is_primitivetype(void *v) JL_NOTSAFEPOINT
@@ -1417,33 +1498,34 @@ STATIC_INLINE int jl_is_array_zeroinit(jl_array_t *a) JL_NOTSAFEPOINT
 // object identity
 JL_DLLEXPORT int jl_egal(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT;
 
-STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
-    if (dt->name->mutabl) {
-        if (dt == jl_simplevector_type || dt == jl_string_type || dt == jl_datatype_type)
-            return jl_egal__special(a, b, dt);
-        return 0;
+    if (dtag < jl_max_tags << 4) {
+        if (dtag == jl_symbol_tag << 4 || dtag == jl_bool_tag << 4)
+            return 0;
     }
-    return jl_egal__bits(a, b, dt);
+    else if (((jl_datatype_t*)dtag)->name->mutabl)
+        return 0;
+    return jl_egal__bitstag(a, b, dtag);
 }
 
 STATIC_INLINE int jl_egal_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
     if (a == b)
         return 1;
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
-    if (dt != (jl_datatype_t*)jl_typeof(b))
+    uintptr_t dtag = jl_typetagof(a);
+    if (dtag != jl_typetagof(b))
         return 0;
-    return jl_egal__unboxed_(a, b, dt);
+    return jl_egal__unboxed_(a, b, dtag);
 }
 #define jl_egal(a, b) jl_egal_((a), (b))
 
 // type predicates and basic operations
-JL_DLLEXPORT int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_typevar(jl_value_t *t, jl_tvar_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua);
@@ -1481,8 +1563,8 @@ JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
 JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
 JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params);
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params);
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
                                             jl_module_t *module,
                                             jl_datatype_t *super,
@@ -1516,7 +1598,6 @@ JL_DLLEXPORT jl_svec_t *jl_alloc_svec(size_t n);
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n);
 JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a);
 JL_DLLEXPORT jl_svec_t *jl_svec_fill(size_t n, jl_value_t *x);
-JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v);
 JL_DLLEXPORT jl_sym_t *jl_symbol(const char *str) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_symbol_lookup(const char *str) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT;
@@ -1528,7 +1609,7 @@ JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  _Atomic(jl_value_t*) *bp,
                                                  jl_binding_t *bnd);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo);
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_NOTSAFEPOINT;
@@ -1641,10 +1722,10 @@ JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d);
 JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s);
 
 // modules and global variables
-extern JL_DLLEXPORT jl_module_t *jl_main_module JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_core_module JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_base_module JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_top_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_main_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_core_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_base_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_top_module JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent);
 JL_DLLEXPORT void jl_set_module_nospecialize(jl_module_t *self, int on);
 JL_DLLEXPORT void jl_set_module_optlevel(jl_module_t *self, int lvl);
@@ -1707,9 +1788,10 @@ JL_DLLEXPORT long jl_getallocationgranularity(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_UNAME(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
+JL_DLLIMPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
 extern JL_DLLIMPORT int jl_n_threadpools;
 extern JL_DLLIMPORT _Atomic(int) jl_n_threads;
+extern JL_DLLIMPORT int jl_n_gcthreads;
 extern JL_DLLIMPORT int *jl_n_threads_per_pool;
 
 // environment entries
@@ -1780,7 +1862,7 @@ typedef enum {
     //JL_IMAGE_LIBJULIA = 2,
 } JL_IMAGE_SEARCH;
 
-JL_DLLEXPORT const char *jl_get_libdir(void);
+JL_DLLIMPORT const char *jl_get_libdir(void);
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel);
 JL_DLLEXPORT void jl_init(void);
 JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
@@ -1801,7 +1883,7 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle);
 JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t emit_split, ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos);
 JL_DLLEXPORT void jl_restore_system_image(const char *fname);
 JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
-JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete);
+JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
 
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
 JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);
@@ -1867,15 +1949,14 @@ JL_DLLEXPORT void jl_register_newmeth_tracer(void (*callback)(jl_method_t *trace
 JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr JL_MAYBE_UNROOTED);
 
 // IR representation
-JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code);
-JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_array_t *data);
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT ssize_t jl_ir_nslots(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_array_t *data, size_t i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code);
+JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_value_t *data);
+JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_value_t *data, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms);
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
 JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
@@ -1941,6 +2022,8 @@ typedef struct _jl_handler_t {
     size_t world_age;
 } jl_handler_t;
 
+#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1
+
 typedef struct _jl_task_t {
     JL_DATA_TYPE
     jl_value_t *next; // invasive linked list for scheduler
@@ -1950,18 +2033,34 @@ typedef struct _jl_task_t {
     jl_value_t *result;
     jl_value_t *logstate;
     jl_function_t *start;
-    uint64_t rngState[4];
+    // 4 byte padding on 32-bit systems
+    // uint32_t padding0;
+    uint64_t rngState[JL_RNG_SIZE];
     _Atomic(uint8_t) _state;
     uint8_t sticky; // record whether this Task can be migrated to a new thread
     _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
+    // 1 byte padding
+    // uint8_t padding1;
     // multiqueue priority
     uint16_t priority;
 
 // hidden state:
+
+#ifdef USE_TRACY
+    const char *name;
+#endif
     // id of owning thread - does not need to be defined until the task runs
     _Atomic(int16_t) tid;
     // threadpool id
     int8_t threadpoolid;
+    // Reentrancy bits
+    // Bit 0: 1 if we are currently running inference/codegen
+    // Bit 1-2: 0-3 counter of how many times we've reentered inference
+    // Bit 3: 1 if we are writing the image and inference is illegal
+    uint8_t reentrant_timing;
+    // 2 bytes of padding on 32-bit, 6 bytes on 64-bit
+    // uint16_t padding2_32;
+    // uint48_t padding2_64;
     // saved gc stack top for context switches
     jl_gcframe_t *gcstack;
     size_t world_age;
@@ -1975,9 +2074,6 @@ typedef struct _jl_task_t {
     jl_ucontext_t ctx;
     void *stkbuf; // malloc'd memory (either copybuf or stack)
     size_t bufsz; // actual sizeof stkbuf
-    uint64_t inference_start_time; // time when inference started
-    uint16_t reentrant_inference; // How many times we've reentered inference
-    uint16_t reentrant_timing; // How many times we've reentered timing
     unsigned int copy_stack:31; // sizeof stack for copybuf
     unsigned int started:1;
 } jl_task_t;
@@ -1998,6 +2094,9 @@ JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
 JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
 
+extern JL_DLLIMPORT int jl_task_gcstack_offset;
+extern JL_DLLIMPORT int jl_task_ptls_offset;
+
 #include "julia_locks.h"   // requires jl_task_t definition
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
@@ -2018,7 +2117,7 @@ void (jl_longjmp)(jmp_buf _Buf, int _Value);
 JL_DLLEXPORT int (ijl_setjmp)(jmp_buf _Buf);
 void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #endif
-#ifdef LIBRARY_EXPORTS
+#ifdef JL_LIBRARY_EXPORTS
 #define jl_setjmp_f ijl_setjmp
 #define jl_setjmp_name "ijl_setjmp"
 #define jl_setjmp(a,b) ijl_setjmp(a)
@@ -2046,7 +2145,7 @@ void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #define jl_setjmp(a,b) sigsetjmp(a,b)
 #if defined(_COMPILER_ASAN_ENABLED_) && __GLIBC__
 // Bypass the ASAN longjmp wrapper - we're unpoisoning the stack ourselves.
-extern int __attribute__ ((nothrow)) (__libc_siglongjmp)(jl_jmp_buf buf, int val);
+JL_DLLIMPORT int __attribute__ ((nothrow)) (__libc_siglongjmp)(jl_jmp_buf buf, int val);
 #define jl_longjmp(a,b) __libc_siglongjmp(a,b)
 #else
 #define jl_longjmp(a,b) siglongjmp(a,b)
@@ -2284,7 +2383,7 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind;
 extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
 extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
 extern const uint8_t MMTK_NEEDS_WRITE_BARRIER;
-extern const uint8_t OBJECT_BARRIER;
+extern const uint8_t MMTK_OBJECT_BARRIER;
 extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
 
 // Directly call into MMTk for write barrier (debugging only)
@@ -2298,7 +2397,7 @@ STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSA
 // Inlined fastpath
 STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
-    if (MMTK_NEEDS_WRITE_BARRIER == OBJECT_BARRIER) {
+    if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
         intptr_t addr = (intptr_t) (void*) parent;
         uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
         intptr_t shift = (addr >> 3) & 0b111;
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
index cb14e535cd010..c4488f774c987 100644
--- a/src/julia_atomics.h
+++ b/src/julia_atomics.h
@@ -73,7 +73,18 @@ enum jl_memory_order {
  * are). We also need to access these atomic variables from the LLVM JIT code
  * which is very hard unless the layout of the object is fully specified.
  */
-#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+
+/**
+ * On modern Intel and AMD platforms `lock orq` on the SP is faster than
+ * `mfence`. GCC 11 did switch to this representation. See #48123
+ */
+#if defined(_CPU_X86_64_) && \
+    ((defined(__GNUC__) && __GNUC__ < 11) || \
+     (defined(__clang__)))
+    #define jl_fence() __asm__ volatile("lock orq $0 , (%rsp)")
+#else
+    #define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+#endif
 #define jl_fence_release() atomic_thread_fence(memory_order_release)
 #define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)
 
@@ -149,6 +160,11 @@ bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::me
 {
      return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
 }
+template<class T, class S>
+bool jl_atomic_cmpswap_acqrel(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_acq_rel, memory_order_acquire);
+}
 #define jl_atomic_cmpswap_relaxed(ptr, expected, val) jl_atomic_cmpswap_explicit(ptr, expected, val, memory_order_relaxed)
 template<class T, class S>
 T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
@@ -180,6 +196,8 @@ extern "C" {
     atomic_compare_exchange_strong(obj, expected, desired)
 #  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
+#define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_acq_rel, memory_order_acquire)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
 #  define jl_atomic_exchange(obj, desired)       \
     atomic_exchange(obj, desired)
@@ -240,6 +258,7 @@ extern "C" {
 #define jl_atomic_exchange_relaxed jl_atomic_exchange
 
 #undef jl_atomic_cmpswap
+#undef jl_atomic_cmpswap_acqrel
 #undef jl_atomic_cmpswap_relaxed
 #define jl_atomic_cmpswap(obj, expected, desired) \
     (__extension__({ \
@@ -253,6 +272,7 @@ extern "C" {
                 *x__analyzer__ = temp__analyzer__; \
             eq__analyzer__; \
         }))
+#define jl_atomic_cmpswap_acqrel jl_atomic_cmpswap
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
 #undef jl_atomic_store
diff --git a/src/julia_internal.h b/src/julia_internal.h
index fb939e81b4a69..5e5b0ebb76e41 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -4,6 +4,7 @@
 #define JL_INTERNAL_H
 
 #include "options.h"
+#include "julia_assert.h"
 #include "julia_locks.h"
 #include "julia_threads.h"
 #include "support/utils.h"
@@ -24,6 +25,9 @@
 #include <sys/time.h>
 #endif
 
+// pragma visibility is more useful than -fvisibility
+#pragma GCC visibility push(hidden)
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -47,9 +51,9 @@ static inline uintptr_t jmpbuf_sp(jl_jmp_buf *buf)
 #else
 #error Need to implement jmpbuf_sp for this architecture
 #endif
-void __sanitizer_start_switch_fiber(void**, const void*, size_t);
-void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
-extern void __asan_unpoison_stack_memory(uintptr_t addr, size_t size);
+JL_DLLIMPORT void __sanitizer_start_switch_fiber(void**, const void*, size_t);
+JL_DLLIMPORT void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
+JL_DLLIMPORT void __asan_unpoison_stack_memory(uintptr_t addr, size_t size);
 static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
 {
     if (!ct)
@@ -69,9 +73,9 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf) JL_N
 static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) JL_NOTSAFEPOINT {}
 #endif
 #ifdef _COMPILER_MSAN_ENABLED_
-void __msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFEPOINT;
-void __msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT;
-void __msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT;
+JL_DLLIMPORT void __msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLIMPORT void __msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLIMPORT void __msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT;
 static inline void msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT {
     __msan_allocated_memory(a, size);
 }
@@ -87,10 +91,10 @@ static inline void msan_allocated_memory(const volatile void *a, size_t size) JL
 static inline void msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT {}
 #endif
 #ifdef _COMPILER_TSAN_ENABLED_
-void *__tsan_create_fiber(unsigned flags);
-void *__tsan_get_current_fiber(void);
-void __tsan_destroy_fiber(void *fiber);
-void __tsan_switch_to_fiber(void *fiber, unsigned flags);
+JL_DLLIMPORT void *__tsan_create_fiber(unsigned flags);
+JL_DLLIMPORT void *__tsan_get_current_fiber(void);
+JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
+JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
 #ifdef __cplusplus
 }
@@ -301,11 +305,13 @@ static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOT
 #define GC_MARKED 1 // reachable and young
 #define GC_OLD    2 // if it is reachable it will be marked as old
 #define GC_OLD_MARKED (GC_OLD | GC_MARKED) // reachable and old
+#define GC_IN_IMAGE 4
 
 // useful constants
-extern jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
-extern jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
 extern jl_methtable_t *jl_kwcall_mt JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_method_t *jl_opaque_closure_method JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
@@ -313,9 +319,10 @@ extern tracer_cb jl_newmeth_tracer;
 void jl_call_tracer(tracer_cb callback, jl_value_t *tracee);
 void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
-extern arraylist_t jl_linkage_blobs;                        // external linkage: sysimg/pkgimages
-extern jl_array_t *jl_build_ids JL_GLOBALLY_ROOTED;         // external linkage: corresponding build_ids
-extern arraylist_t jl_image_relocs;                        // external linkage: sysimg/pkgimages
+JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
+JL_DLLEXPORT extern arraylist_t jl_image_relocs;  // external linkage: sysimg/pkgimages
+extern arraylist_t eytzinger_image_tree;
+extern arraylist_t eytzinger_idxs;
 
 extern JL_DLLEXPORT size_t jl_page_size;
 extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
@@ -326,15 +333,16 @@ extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT extern int jl_lineno;
 JL_DLLEXPORT extern const char *jl_filename;
 
-void enable_collection(void);
-void disable_collection(void);
+extern void enable_collection(void);
+extern void disable_collection(void);
 jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
                                    int osize);
 jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
 #ifdef MMTK_GC
-JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty);
-JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz);
-extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator);
+JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty);
+JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz);
+JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator);
+JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls);
 #endif // MMTK_GC
 JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
 extern uv_mutex_t gc_perm_lock;
@@ -542,14 +550,12 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
     o->header = tag | GC_OLD_MARKED;
 #ifdef MMTK_GC
     jl_ptls_t ptls = jl_current_task->ptls;
-    post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1);
+    mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1);
 #endif
     return jl_valueof(o);
 }
-jl_value_t *jl_permbox8(jl_datatype_t *t, int8_t x);
-jl_value_t *jl_permbox16(jl_datatype_t *t, int16_t x);
-jl_value_t *jl_permbox32(jl_datatype_t *t, int32_t x);
-jl_value_t *jl_permbox64(jl_datatype_t *t, int64_t x);
+jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x);
+jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
 
 // this sizeof(__VA_ARGS__) trick can't be computed until C11, but that only matters to Clang in some situations
@@ -636,6 +642,9 @@ void jl_gc_reset_alloc_count(void);
 uint32_t jl_get_gs_ctr(void);
 void jl_set_gs_ctr(uint32_t ctr);
 
+typedef struct _jl_static_show_config_t { uint8_t quiet; } jl_static_show_config_t;
+size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+
 STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
 {
      if (dt->layout->first_ptr >= 0) {
@@ -651,7 +660,6 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
 typedef struct {
     uint8_t inferred:1;
     uint8_t propagate_inbounds:1;
-    uint8_t pure:1;
     uint8_t has_fcall:1;
     uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
     uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
@@ -666,8 +674,6 @@ typedef union {
 
 JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
 JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
-jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
-void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world);
@@ -676,12 +682,12 @@ jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
 
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT);
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
 jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ast);
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void);
-void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
-                              int binding_effects);
+JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+                                           int binding_effects);
 
 int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT;
 int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller);
@@ -715,7 +721,7 @@ void jl_install_thread_signal_handler(jl_ptls_t ptls);
 JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
 
 extern uv_loop_t *jl_io_loop;
-void jl_uv_flush(uv_stream_t *stream);
+JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream);
 
 typedef struct jl_typeenv_t {
     jl_tvar_t *var;
@@ -748,6 +754,7 @@ JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
+jl_unionall_t *jl_rename_unionall(jl_unionall_t *u);
 JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u);
@@ -777,6 +784,7 @@ void jl_init_main_module(void);
 JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
 jl_array_t *jl_get_loaded_modules(void);
 JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
+int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 
 void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
@@ -808,7 +816,6 @@ jl_methtable_t *jl_kwmethod_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
     jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
-jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT);
 
 JL_DLLEXPORT int jl_pointer_egal(jl_value_t *t);
 JL_DLLEXPORT jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
@@ -816,7 +823,7 @@ void jl_compute_field_offsets(jl_datatype_t *st);
 jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
                                              int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
-jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
+JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
 JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *sym, jl_binding_t *b);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
@@ -824,7 +831,6 @@ extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTE
 extern jl_array_t *jl_global_roots_table JL_GLOBALLY_ROOTED;
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val JL_MAYBE_UNROOTED);
-int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
 
 jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
     jl_value_t *source,  jl_value_t **env, size_t nenv, int do_compile);
@@ -842,11 +848,6 @@ typedef enum {
     JL_VARARG_UNBOUND = 3
 } jl_vararg_kind_t;
 
-STATIC_INLINE int jl_is_vararg(jl_value_t *v) JL_NOTSAFEPOINT
-{
-    return jl_typeof(v) == (jl_value_t*)jl_vararg_type;
-}
-
 STATIC_INLINE jl_value_t *jl_unwrap_vararg(jl_vararg_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
     assert(jl_is_vararg((jl_value_t*)v));
@@ -907,7 +908,6 @@ void jl_init_flisp(void);
 void jl_init_common_symbols(void);
 void jl_init_primitives(void) JL_GC_DISABLED;
 void jl_init_llvm(void);
-void jl_init_codegen(void);
 void jl_init_runtime_ccall(void);
 void jl_init_intrinsic_functions(void);
 void jl_init_intrinsic_properties(void);
@@ -921,15 +921,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 void jl_init_int32_int64_cache(void);
 JL_DLLEXPORT void jl_init_options(void);
 
-void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
-
 void jl_set_base_ctx(char *__stk);
 
 extern JL_DLLEXPORT ssize_t jl_tls_offset;
 extern JL_DLLEXPORT const int jl_tls_elf_support;
 void jl_init_threading(void);
 void jl_start_threads(void);
-int jl_effective_threads(void);
 
 // Whether the GC is running
 extern char *jl_safepoint_pages;
@@ -1005,55 +1002,12 @@ static inline void jl_set_gc_and_wait(void)
 // Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image)
 STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
 {
-    if (!jl_build_ids)
-        return 0;
-    assert(jl_is_array(jl_build_ids));
-    return jl_array_len(jl_build_ids);
-}
-
-// TODO: Makes this a binary search
-STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT {
-    size_t i, nblobs = n_linkage_blobs();
-    assert(jl_linkage_blobs.len == 2*nblobs);
-    for (i = 0; i < nblobs; i++) {
-        uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i];
-        uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1];
-        if (left < (uintptr_t)v && (uintptr_t)v <= right) {
-            // the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here)
-            break;
-        }
-    }
-    return i;
-}
-
-STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT {
-    size_t blob = external_blob_index(v);
-    if (blob == n_linkage_blobs()) {
-        return 0;
-    }
-    return 1;
+    return jl_image_relocs.len;
 }
 
-typedef struct {
-    LLVMOrcThreadSafeModuleRef TSM;
-    LLVMValueRef F;
-} jl_llvmf_dump_t;
-
-JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
-        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLEXPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
-JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLEXPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
-JL_DLLEXPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
+size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
 
-void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache);
-void jl_dump_native(void *native_code,
-        const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
-        const char *sysimg_data, size_t sysimg_len, ios_t *s);
-void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs);
-void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs);
-JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
-        int32_t *func_idx, int32_t *specfunc_idx);
+uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
@@ -1066,8 +1020,8 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
 JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
 jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp);
 JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
-JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
-JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_value_t *type, size_t world);
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
     jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams);
 jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins);
@@ -1075,14 +1029,15 @@ JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee,
 JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT extern jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
 JL_DLLEXPORT int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
 
-jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                     size_t lineno, size_t offset, jl_value_t *options);
+JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
+                                  size_t lineno, size_t offset, jl_value_t *options);
 
 //--------------------------------------------------
 // Backtraces
@@ -1199,8 +1154,6 @@ typedef struct {
     int inlined;
 } jl_frame_t;
 
-// Might be called from unmanaged thread
-uint64_t jl_getUnwindInfo(uint64_t dwBase);
 #ifdef _OS_WINDOWS_
 #include <dbghelp.h>
 JL_DLLEXPORT EXCEPTION_DISPOSITION NTAPI __julia_personality(
@@ -1219,7 +1172,9 @@ extern JL_DLLEXPORT uv_mutex_t jl_in_stackwalk;
 #elif !defined(JL_DISABLE_LIBUNWIND)
 // This gives unwind only local unwinding options ==> faster code
 #  define UNW_LOCAL_ONLY
+#pragma GCC visibility push(default)
 #  include <libunwind.h>
+#pragma GCC visibility pop
 typedef unw_context_t bt_context_t;
 typedef unw_cursor_t bt_cursor_t;
 #  if (!defined(SYSTEM_LIBUNWIND) || UNW_VERSION_MAJOR > 1 ||   \
@@ -1244,7 +1199,6 @@ size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_cont
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
 void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *ct);
 JL_DLLEXPORT void jl_raise_debugger(void) JL_NOTSAFEPOINT;
-int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
 void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT;
 void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
@@ -1328,11 +1282,12 @@ JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_rand(void);
 
+JL_DLLEXPORT extern void *jl_exe_handle;
+JL_DLLEXPORT extern void *jl_libjulia_handle;
 JL_DLLEXPORT extern void *jl_libjulia_internal_handle;
 JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle;
+
 #if defined(_OS_WINDOWS_)
-JL_DLLEXPORT extern void *jl_exe_handle;
-JL_DLLEXPORT extern void *jl_libjulia_handle;
 JL_DLLEXPORT extern const char *jl_crtdll_basename;
 extern void *jl_ntdll_handle;
 extern void *jl_kernel32_handle;
@@ -1342,6 +1297,7 @@ void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT;
 #endif
 
 JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err);
+void *jl_find_dynamic_library_by_addr(void *symbol);
 #define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
 JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd);
 JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);
@@ -1351,11 +1307,11 @@ JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
     jl_unionall_t *env, jl_value_t **vals);
 
 
-// Windows only
+// Special filenames used to refer to internal julia libraries
 #define JL_EXE_LIBNAME                  ((const char*)1)
 #define JL_LIBJULIA_DL_LIBNAME          ((const char*)2)
 #define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)3)
-JL_DLLEXPORT const char *jl_dlfind_win32(const char *name);
+JL_DLLEXPORT const char *jl_dlfind(const char *name);
 
 // libuv wrappers:
 JL_DLLEXPORT int jl_fs_rename(const char *src_path, const char *dst_path);
@@ -1397,7 +1353,6 @@ JL_DLLEXPORT jl_value_t *jl_add_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_sub_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_mul_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_div_float(jl_value_t *a, jl_value_t *b);
-JL_DLLEXPORT jl_value_t *jl_rem_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_fma_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 JL_DLLEXPORT jl_value_t *jl_muladd_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 
@@ -1534,12 +1489,16 @@ struct typemap_intersection_env {
     jl_typemap_intersection_visitor_fptr const fptr; // fptr to call on a match
     jl_value_t *const type; // type to match
     jl_value_t *const va; // the tparam0 for the vararg in type, if applicable (or NULL)
+    size_t search_slurp;
     // output values
+    size_t min_valid;
+    size_t max_valid;
     jl_value_t *ti; // intersection type
     jl_svec_t *env; // intersection env (initialize to null to perform intersection without an environment)
     int issubty;    // if `a <: b` is true in `intersect(a,b)`
 };
 int jl_typemap_intersection_visitor(jl_typemap_t *a, int offs, struct typemap_intersection_env *closure);
+void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure);
 
 // -- simplevector.c -- //
 
@@ -1547,6 +1506,12 @@ int jl_typemap_intersection_visitor(jl_typemap_t *a, int offs, struct typemap_in
 JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i);
 
+// check whether the specified number of arguments is compatible with the
+// specified number of parameters of the tuple type
+JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0);
+JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0);
 
 JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
@@ -1624,7 +1589,6 @@ extern JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_pure_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_meta_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_inert_sym;
@@ -1676,8 +1640,6 @@ JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, c
 
 struct _jl_image_fptrs_t;
 
-void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
-                       jl_method_instance_t **linfos, size_t n);
 void jl_write_coverage_data(const char*);
 void jl_write_malloc_log(void);
 
@@ -1734,10 +1696,49 @@ JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len);
 
+// -- exports from codegen -- //
+
+JL_DLLIMPORT jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
+JL_DLLIMPORT void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
+JL_DLLIMPORT void jl_generate_fptr_for_oc_wrapper(jl_code_instance_t *unspec);
+JL_DLLIMPORT int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+
+typedef struct {
+    LLVMOrcThreadSafeModuleRef TSM;
+    LLVMValueRef F;
+} jl_llvmf_dump_t;
+
+JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
+JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
+JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
+JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
+JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
+
+JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world);
+JL_DLLIMPORT void jl_dump_native(void *native_code,
+        const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
+        const char *sysimg_data, size_t sysimg_len, ios_t *s);
+JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs);
+JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs);
+JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
+        int32_t *func_idx, int32_t *specfunc_idx);
+JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
+                                    jl_method_instance_t **linfos, size_t n);
+
+JL_DLLIMPORT void jl_init_codegen(void);
+JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
+JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
+// n.b. this might be called from unmanaged thread:
+JL_DLLIMPORT uint64_t jl_getUnwindInfo(uint64_t dwBase);
+
 #ifdef __cplusplus
 }
 #endif
 
+#pragma GCC visibility pop
+
+
 #ifdef USE_DTRACE
 // Generated file, needs to be searched in include paths so that the builddir
 // retains priority
diff --git a/src/julia_locks.h b/src/julia_locks.h
index 7db37b03f0bed..47e258f69aab2 100644
--- a/src/julia_locks.h
+++ b/src/julia_locks.h
@@ -3,8 +3,6 @@
 #ifndef JL_LOCKS_H
 #define JL_LOCKS_H
 
-#include "julia_assert.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -17,6 +15,7 @@ extern "C" {
 // The JL_LOCK* and JL_UNLOCK* macros are no-op for non-threading build
 // while the jl_mutex_* functions are always locking and unlocking the locks.
 
+JL_DLLEXPORT void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint);
 JL_DLLEXPORT void _jl_mutex_lock(jl_task_t *self, jl_mutex_t *lock);
 JL_DLLEXPORT int _jl_mutex_trylock_nogc(jl_task_t *self, jl_mutex_t *lock) JL_NOTSAFEPOINT;
@@ -86,13 +85,12 @@ static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT JL_NOT
     _jl_mutex_unlock_nogc(lock);
 }
 
-static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT
+static inline void jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT
 {
-    jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
-    lock->count = 0;
+    _jl_mutex_init(lock, name);
 }
 
-#define JL_MUTEX_INIT(m) jl_mutex_init(m)
+#define JL_MUTEX_INIT(m, name) jl_mutex_init(m, name)
 #define JL_LOCK(m) jl_mutex_lock(m)
 #define JL_UNLOCK(m) jl_mutex_unlock(m)
 #define JL_LOCK_NOGC(m) jl_mutex_lock_nogc(m)
diff --git a/src/julia_threads.h b/src/julia_threads.h
index 17d9d0857dc39..46ad724b71aa0 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -8,6 +8,7 @@
 #include "mmtkMutator.h"
 #endif
 
+#include "work-stealing-queue.h"
 #include "julia_atomics.h"
 #ifndef _OS_WINDOWS_
 #include "pthread.h"
@@ -83,6 +84,7 @@ typedef struct {
     void *stacktop;
 } _jl_ucontext_t;
 #endif
+#pragma GCC visibility push(default)
 #if defined(JL_HAVE_UNW_CONTEXT)
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
@@ -92,6 +94,7 @@ typedef unw_context_t _jl_ucontext_t;
 #include <ucontext.h>
 typedef ucontext_t _jl_ucontext_t;
 #endif
+#pragma GCC visibility pop
 #endif
 
 typedef struct {
@@ -175,12 +178,9 @@ typedef struct {
 } jl_thread_heap_t;
 
 typedef struct {
-    struct _jl_gc_chunk_t *chunk_start;
-    struct _jl_gc_chunk_t *current_chunk;
-    struct _jl_gc_chunk_t *chunk_end;
-    struct _jl_value_t **start;
-    struct _jl_value_t **current;
-    struct _jl_value_t **end;
+    ws_queue_t chunk_queue;
+    ws_queue_t ptr_queue;
+    arraylist_t reclaim_set;
 } jl_gc_markqueue_t;
 
 typedef struct {
@@ -288,13 +288,13 @@ typedef struct _jl_tls_states_t {
 #endif
 
     // some hidden state (usually just because we don't have the type's size declaration)
-#ifdef LIBRARY_EXPORTS
+#ifdef JL_LIBRARY_EXPORTS
     uv_mutex_t sleep_lock;
     uv_cond_t wake_signal;
 #endif
 } jl_tls_states_t;
 
-#ifndef LIBRARY_EXPORTS
+#ifndef JL_LIBRARY_EXPORTS
 // deprecated (only for external consumers)
 JL_DLLEXPORT void *jl_get_ptls_states(void);
 #endif
@@ -302,23 +302,28 @@ JL_DLLEXPORT void *jl_get_ptls_states(void);
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
 #  define jl_cpu_pause() _mm_delay_64(100)
+#  define jl_cpu_suspend() _mm_delay_64(100)
 #  define jl_cpu_wake() ((void)0)
 #  define JL_CPU_WAKE_NOOP 1
 #elif defined(_CPU_X86_64_) || defined(_CPU_X86_)  /* !__MIC__ */
 #  define jl_cpu_pause() _mm_pause()
+#  define jl_cpu_suspend() _mm_pause()
 #  define jl_cpu_wake() ((void)0)
 #  define JL_CPU_WAKE_NOOP 1
 #elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-#  define jl_cpu_pause() __asm__ volatile ("wfe" ::: "memory")
+#  define jl_cpu_pause() __asm__ volatile ("isb" ::: "memory")
+#  define jl_cpu_suspend() __asm__ volatile ("wfe" ::: "memory")
 #  define jl_cpu_wake() __asm__ volatile ("sev" ::: "memory")
 #  define JL_CPU_WAKE_NOOP 0
 #else
 #  define jl_cpu_pause() ((void)0)
+#  define jl_cpu_suspend() ((void)0)
 #  define jl_cpu_wake() ((void)0)
 #  define JL_CPU_WAKE_NOOP 1
 #endif
 
 JL_DLLEXPORT void (jl_cpu_pause)(void);
+JL_DLLEXPORT void (jl_cpu_suspend)(void);
 JL_DLLEXPORT void (jl_cpu_wake)(void);
 
 #ifdef __clang_gcanalyzer__
@@ -375,6 +380,7 @@ JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
 extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void);
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index 6ba0c73c14785..d24c08b4b4930 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -7,6 +7,8 @@
 
 #include <llvm/IR/IntrinsicInst.h>
 
+#define DEBUG_TYPE "escape-analysis"
+
 using namespace llvm;
 using namespace jl_alloc;
 
@@ -110,40 +112,58 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
     return true;
 }
 
-JL_USED_FUNC void AllocUseInfo::dump()
+JL_USED_FUNC void AllocUseInfo::dump(llvm::raw_ostream &OS)
 {
-    jl_safe_printf("escaped: %d\n", escaped);
-    jl_safe_printf("addrescaped: %d\n", addrescaped);
-    jl_safe_printf("returned: %d\n", returned);
-    jl_safe_printf("haserror: %d\n", haserror);
-    jl_safe_printf("hasload: %d\n", hasload);
-    jl_safe_printf("haspreserve: %d\n", haspreserve);
-    jl_safe_printf("hasunknownmem: %d\n", hasunknownmem);
-    jl_safe_printf("hastypeof: %d\n", hastypeof);
-    jl_safe_printf("refload: %d\n", refload);
-    jl_safe_printf("refstore: %d\n", refstore);
-    jl_safe_printf("Uses: %d\n", (unsigned)uses.size());
+    OS << "AllocUseInfo:\n";
+    OS << "escaped: " << escaped << '\n';
+    OS << "addrescaped: " << addrescaped << '\n';
+    OS << "returned: " << returned << '\n';
+    OS << "haserror: " << haserror << '\n';
+    OS << "hasload: " << hasload << '\n';
+    OS << "haspreserve: " << haspreserve << '\n';
+    OS << "hasunknownmem: " << hasunknownmem << '\n';
+    OS << "hastypeof: " << hastypeof << '\n';
+    OS << "refload: " << refload << '\n';
+    OS << "refstore: " << refstore << '\n';
+    OS << "Uses: " << uses.size() << '\n';
     for (auto inst: uses)
-        llvm_dump(inst);
+        inst->print(OS);
     if (!preserves.empty()) {
-        jl_safe_printf("Preserves: %d\n", (unsigned)preserves.size());
-        for (auto inst: preserves) {
-            llvm_dump(inst);
-        }
+        OS << "Preserves: " << preserves.size() << '\n';
+        for (auto inst: preserves)
+            inst->print(OS);
     }
-    if (!memops.empty()) {
-        jl_safe_printf("Memops: %d\n", (unsigned)memops.size());
-        for (auto &field: memops) {
-            jl_safe_printf("  Field %d @ %d\n", field.second.size, field.first);
-            jl_safe_printf("    Accesses:\n");
-            for (auto memop: field.second.accesses) {
-                jl_safe_printf("    ");
-                llvm_dump(memop.inst);
-            }
+    OS << "MemOps: " << memops.size() << '\n';
+    for (auto &field: memops) {
+        OS << "  offset: " << field.first << '\n';
+        OS << "  size: " << field.second.size << '\n';
+        OS << "  hasobjref: " << field.second.hasobjref << '\n';
+        OS << "  hasload: " << field.second.hasload << '\n';
+        OS << "  hasaggr: " << field.second.hasaggr << '\n';
+        OS << "  accesses: " << field.second.accesses.size() << '\n';
+        for (auto &memop: field.second.accesses) {
+            OS << "    ";
+            memop.inst->print(OS);
+            OS << '\n';
+            OS << "    " << (memop.isaggr ? "aggr" : "scalar") << '\n';
+            OS << "    " << (memop.isobjref ? "objref" : "bits") << '\n';
+            OS << "    " << memop.offset << '\n';
+            OS << "    " << memop.size << '\n';
         }
     }
 }
 
+JL_USED_FUNC void AllocUseInfo::dump()
+{
+    dump(dbgs());
+}
+
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) if (options.ORE) options.ORE->emit(remark)
+#else
+#define REMARK(remark)
+#endif
+
 void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
     required.use_info.reset();
     if (I->use_empty())
@@ -161,9 +181,11 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
     };
 
     auto check_inst = [&] (Instruction *inst, Use *use) {
+        LLVM_DEBUG(dbgs() << "Checking: " << *inst << "\n");
         if (isa<LoadInst>(inst)) {
             required.use_info.hasload = true;
             if (cur.offset == UINT32_MAX) {
+                LLVM_DEBUG(dbgs() << "Load inst has unknown offset\n");
                 auto elty = inst->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
@@ -186,13 +208,16 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                             !isa<ConstantInt>(call->getArgOperand(2)) ||
                             !isa<ConstantInt>(call->getArgOperand(1)) ||
                             (cast<ConstantInt>(call->getArgOperand(2))->getLimitedValue() >=
-                             UINT32_MAX - cur.offset))
+                             UINT32_MAX - cur.offset)) {
+                            LLVM_DEBUG(dbgs() << "Memset inst has unknown offset\n");
                             required.use_info.hasunknownmem = true;
+                        }
                         return true;
                     }
                     if (id == Intrinsic::lifetime_start || id == Intrinsic::lifetime_end ||
                         isa<DbgInfoIntrinsic>(II))
                         return true;
+                    LLVM_DEBUG(dbgs() << "Unknown intrinsic, marking addrescape\n");
                     required.use_info.addrescaped = true;
                     return true;
                 }
@@ -220,23 +245,38 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             if (!call->isBundleOperand(opno) ||
                 call->getOperandBundleForOperand(opno).getTagName() != "jl_roots") {
                 if (isa<UnreachableInst>(call->getParent()->getTerminator())) {
+                    LLVM_DEBUG(dbgs() << "Detected use of allocation in block terminating with unreachable, likely error function\n");
                     required.use_info.haserror = true;
                     return true;
                 }
+                LLVM_DEBUG(dbgs() << "Unknown call, marking escape\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownCall",
+                                                    inst)
+                           << "Unknown call, marking escape (" << ore::NV("Call", inst) << ")";
+                });
                 required.use_info.escaped = true;
                 return false;
             }
+            LLVM_DEBUG(dbgs() << "Call is in jl_roots bundle, marking haspreserve\n");
             required.use_info.haspreserve = true;
             return true;
         }
         if (auto store = dyn_cast<StoreInst>(inst)) {
             // Only store value count
             if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
+                LLVM_DEBUG(dbgs() << "Object address is stored somewhere, marking escape\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
+                                                    inst)
+                           << "Object address is stored somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                });
                 required.use_info.escaped = true;
                 return false;
             }
             auto storev = store->getValueOperand();
             if (cur.offset == UINT32_MAX) {
+                LLVM_DEBUG(dbgs() << "Store inst has unknown offset\n");
                 auto elty = storev->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
@@ -250,6 +290,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
         if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
             // Only store value count
             if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
+                LLVM_DEBUG(dbgs() << "Object address is cmpxchg/rmw-ed somewhere, marking escape\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
+                                                    inst)
+                           << "Object address is cmpxchg/rmw-ed somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                });
                 required.use_info.escaped = true;
                 return false;
             }
@@ -257,8 +303,10 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
             if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
-                                                               true, required.DL))
+                                                               true, required.DL)) {
+                LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
                 required.use_info.hasunknownmem = true;
+            }
             required.use_info.refload = true;
             return true;
         }
@@ -272,10 +320,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 APInt apoffset(sizeof(void*) * 8, cur.offset, true);
                 if (!gep->accumulateConstantOffset(required.DL, apoffset) || apoffset.isNegative()) {
                     next_offset = UINT32_MAX;
+                    LLVM_DEBUG(dbgs() << "GEP inst has unknown offset\n");
                 }
                 else {
                     next_offset = apoffset.getLimitedValue();
                     if (next_offset > UINT32_MAX) {
+                        LLVM_DEBUG(dbgs() << "GEP inst exceeeds 32-bit offset\n");
                         next_offset = UINT32_MAX;
                     }
                 }
@@ -285,9 +335,16 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             return true;
         }
         if (isa<ReturnInst>(inst)) {
+            LLVM_DEBUG(dbgs() << "Allocation is returned\n");
             required.use_info.returned = true;
             return true;
         }
+        LLVM_DEBUG(dbgs() << "Unknown instruction, marking escape\n");
+        REMARK([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownInst",
+                                            inst)
+                   << "Unknown instruction, marking escape (" << ore::NV("Inst", inst) << ")";
+        });
         required.use_info.escaped = true;
         return false;
     };
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 38a0b2ba181ce..3bd80704a0888 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -6,6 +6,7 @@
 
 #include <llvm/ADT/SmallSet.h>
 #include <llvm/ADT/SmallVector.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Instructions.h>
 
 #include <utility>
@@ -110,6 +111,7 @@ namespace jl_alloc {
             preserves.clear();
             memops.clear();
         }
+        void dump(llvm::raw_ostream &OS);
         void dump();
         bool addMemOp(llvm::Instruction *inst, unsigned opno, uint32_t offset, llvm::Type *elty,
                       bool isstore, const llvm::DataLayout &DL);
@@ -136,6 +138,7 @@ namespace jl_alloc {
         //will not be considered. Defaults to nullptr, which means all uses of the allocation
         //are considered
         const llvm::SmallPtrSetImpl<const llvm::BasicBlock*> *valid_set;
+        llvm::OptimizationRemarkEmitter *ORE = nullptr;
 
         EscapeAnalysisOptionalArgs() = default;
 
@@ -143,6 +146,11 @@ namespace jl_alloc {
             this->valid_set = valid_set;
             return *this;
         }
+
+        EscapeAnalysisOptionalArgs &with_optimization_remark_emitter(decltype(ORE) ORE) {
+            this->ORE = ORE;
+            return *this;
+        }
     };
 
     void runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index f6a2593724f57..acb2d673d6760 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -10,6 +10,7 @@
 #include <llvm/ADT/SmallVector.h>
 #include <llvm/ADT/SetVector.h>
 #include <llvm/ADT/Statistic.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/CFG.h>
 #include <llvm/IR/LegacyPassManager.h>
@@ -37,7 +38,7 @@
 #include <map>
 #include <set>
 
-#define DEBUG_TYPE "alloc_opt"
+#define DEBUG_TYPE "alloc-opt"
 #include "julia_assert.h"
 
 using namespace llvm;
@@ -98,6 +99,11 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  * * Handle jl_box*
  */
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
 struct AllocOpt : public JuliaPassContext {
 
     const DataLayout *DL;
@@ -112,6 +118,7 @@ struct AllocOpt : public JuliaPassContext {
 struct Optimizer {
     Optimizer(Function &F, AllocOpt &pass, function_ref<DominatorTree&()> GetDT)
         : F(F),
+          ORE(&F),
           pass(pass),
           GetDT(std::move(GetDT))
     {}
@@ -139,6 +146,7 @@ struct Optimizer {
     void optimizeTag(CallInst *orig_inst);
 
     Function &F;
+    OptimizationRemarkEmitter ORE;
     AllocOpt &pass;
     DominatorTree *_DT = nullptr;
     function_ref<DominatorTree &()> GetDT;
@@ -215,17 +223,29 @@ void Optimizer::optimizeAll()
         size_t sz = item.second;
         checkInst(orig);
         if (use_info.escaped) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation escaped " << ore::NV("GC Allocation", orig);
+            });
             if (use_info.hastypeof)
                 optimizeTag(orig);
             continue;
         }
         if (use_info.haserror || use_info.returned) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation has error or was returned " << ore::NV("GC Allocation", orig);
+            });
             if (use_info.hastypeof)
                 optimizeTag(orig);
             continue;
         }
         if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
                                                            !use_info.refstore)) {
+            REMARK([&]() {
+                return OptimizationRemark(DEBUG_TYPE, "Dead Allocation", orig)
+                    << "GC allocation removed " << ore::NV("GC Allocation", orig);
+            });
             // No one took the address, no one reads anything and there's no meaningful
             // preserve of fields (either no preserve/ccall or no object reference fields)
             // We can just delete all the uses.
@@ -246,16 +266,28 @@ void Optimizer::optimizeAll()
                 }
             }
         }
-        if (!use_info.hasunknownmem && !use_info.addrescaped && !has_refaggr) {
-            // No one actually care about the memory layout of this object, split it.
-            splitOnStack(orig);
-            continue;
-        }
         if (has_refaggr) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation has unusual object reference, unable to move to stack " << ore::NV("GC Allocation", orig);
+            });
             if (use_info.hastypeof)
                 optimizeTag(orig);
             continue;
         }
+        if (!use_info.hasunknownmem && !use_info.addrescaped) {
+            REMARK([&](){
+                return OptimizationRemark(DEBUG_TYPE, "Stack Split Allocation", orig)
+                    << "GC allocation split on stack " << ore::NV("GC Allocation", orig);
+            });
+            // No one actually care about the memory layout of this object, split it.
+            splitOnStack(orig);
+            continue;
+        }
+        REMARK([&](){
+            return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
+                << "GC allocation moved to stack " << ore::NV("GC Allocation", orig);
+        });
         // The object has no fields with mix reference access
         moveToStack(orig, sz, has_ref);
     }
@@ -324,8 +356,15 @@ ssize_t Optimizer::getGCAllocSize(Instruction *I)
 
 void Optimizer::checkInst(Instruction *I)
 {
+    LLVM_DEBUG(dbgs() << "Running escape analysis on " << *I << "\n");
     jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, pass, *pass.DL};
-    jl_alloc::runEscapeAnalysis(I, required);
+    jl_alloc::runEscapeAnalysis(I, required, jl_alloc::EscapeAnalysisOptionalArgs().with_optimization_remark_emitter(&ORE));
+    REMARK([&](){
+        std::string suse_info;
+        llvm::raw_string_ostream osuse_info(suse_info);
+        use_info.dump(osuse_info);
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", I) << "escape analysis for " << ore::NV("GC Allocation", I) << "\n" << ore::NV("UseInfo", osuse_info.str());
+    });
 }
 
 void Optimizer::insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert)
@@ -615,8 +654,10 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         }
         return false;
     };
-    if (simple_replace(orig_inst, new_inst))
+    if (simple_replace(orig_inst, new_inst)) {
+        LLVM_DEBUG(dbgs() << "Simple replace of allocation was successful in stack move\n");
         return;
+    }
     assert(replace_stack.empty());
     ReplaceUses::Frame cur{orig_inst, new_inst};
     auto finish_cur = [&] () {
@@ -731,8 +772,10 @@ void Optimizer::removeAlloc(CallInst *orig_inst)
         }
         return false;
     };
-    if (simple_remove(orig_inst))
+    if (simple_remove(orig_inst)) {
+        LLVM_DEBUG(dbgs() << "Simple remove of allocation was successful in removeAlloc\n");
         return;
+    }
     assert(replace_stack.empty());
     ReplaceUses::Frame cur{orig_inst, nullptr};
     auto finish_cur = [&] () {
@@ -818,6 +861,10 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             auto callee = call->getCalledOperand();
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
+                REMARK([&](){
+                    return OptimizationRemark(DEBUG_TYPE, "typeof", call)
+                        << "removed typeof call for GC allocation " << ore::NV("Alloc", orig_inst);
+                });
                 call->replaceAllUsesWith(tag);
                 // Push to the removed instructions to trigger `finalize` to
                 // return the correct result.
@@ -894,8 +941,10 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         }
         return false;
     };
-    if (simple_replace(orig_inst))
+    if (simple_replace(orig_inst)) {
+        LLVM_DEBUG(dbgs() << "Simple replace of allocation was successful in stack split\n");
         return;
+    }
     assert(replace_stack.empty());
     ReplaceUses::Frame cur{orig_inst, uint32_t(0)};
     auto finish_cur = [&] () {
@@ -975,7 +1024,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 assert(slot.offset == offset);
                 auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
                 if (!isa<PointerType>(store_ty)) {
-                    store_val = builder.CreateBitCast(store_val, getSizeTy(builder.getContext()));
+                    store_val = builder.CreateBitCast(store_val, pass.DL->getIntPtrType(builder.getContext(), T_pjlvalue->getAddressSpace()));
                     store_val = builder.CreateIntToPtr(store_val, T_pjlvalue);
                     store_ty = T_pjlvalue;
                 }
@@ -1038,7 +1087,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                                 else {
                                     uint64_t intval;
                                     memset(&intval, val, 8);
-                                    Constant *val = ConstantInt::get(getSizeTy(builder.getContext()), intval);
+                                    Constant *val = ConstantInt::get(pass.DL->getIntPtrType(builder.getContext(), pass.T_prjlvalue->getAddressSpace()), intval);
                                     val = ConstantExpr::getIntToPtr(val, JuliaType::get_pjlvalue_ty(builder.getContext()));
                                     ptr = ConstantExpr::getAddrSpaceCast(val, pass.T_prjlvalue);
                                 }
@@ -1089,9 +1138,12 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                     ref->setOrdering(AtomicOrdering::NotAtomic);
                     operands.push_back(ref);
                 }
+#ifndef __clang_analyzer__
+                // FIXME: SA finds "Called C++ object pointer is null" inside the LLVM code.
                 auto new_call = builder.CreateCall(pass.gc_preserve_begin_func, operands);
                 new_call->takeName(call);
                 call->replaceAllUsesWith(new_call);
+#endif
                 call->eraseFromParent();
                 return;
             }
@@ -1175,8 +1227,10 @@ bool AllocOpt::doInitialization(Module &M)
 
 bool AllocOpt::runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT)
 {
-    if (!alloc_obj_func)
+    if (!alloc_obj_func) {
+        LLVM_DEBUG(dbgs() << "AllocOpt: no alloc_obj function found, skipping pass\n");
         return false;
+    }
     Optimizer optimizer(F, *this, std::move(GetDT));
     optimizer.initialize();
     optimizer.optimizeAll();
@@ -1235,7 +1289,8 @@ PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
     }
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createAllocOptPass());
 }
diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h
index e0edb792d7645..0ab140b42b8b7 100644
--- a/src/llvm-codegen-shared.h
+++ b/src/llvm-codegen-shared.h
@@ -22,15 +22,6 @@ enum AddressSpace {
     LastSpecial = Loaded,
 };
 
-static inline auto getSizeTy(llvm::LLVMContext &ctxt) {
-    //return M.getDataLayout().getIntPtrType(M.getContext());
-    if (sizeof(size_t) > sizeof(uint32_t)) {
-        return llvm::Type::getInt64Ty(ctxt);
-    } else {
-        return llvm::Type::getInt32Ty(ctxt);
-    }
-}
-
 namespace JuliaType {
     static inline llvm::StructType* get_jlvalue_ty(llvm::LLVMContext &C) {
         return llvm::StructType::get(C);
@@ -179,7 +170,7 @@ static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder,
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
 {
     using namespace llvm;
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
@@ -187,17 +178,16 @@ static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &bui
     const int pgcstack_offset = offsetof(jl_task_t, gcstack);
     return builder.CreateInBoundsGEP(
             T_pjlvalue, emit_bitcast_with_builder(builder, pgcstack, T_ppjlvalue),
-            ConstantInt::get(getSizeTy(builder.getContext()), -(pgcstack_offset / sizeof(void *))),
+            ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
             "current_task");
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
     auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
-    auto T_size = getSizeTy(builder.getContext());
     const int ptls_offset = offsetof(jl_task_t, ptls);
     llvm::Value *pptls = builder.CreateInBoundsGEP(
             T_pjlvalue, current_task,
@@ -211,11 +201,10 @@ static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder
 }
 
 // Get signal page through current task.
-static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     // return builder.CreateCall(prepare_call(reuse_signal_page_func));
-    auto T_size = getSizeTy(builder.getContext());
     auto T_psize = T_size->getPointerTo();
     auto T_ppsize = T_psize->getPointerTo();
     int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
@@ -234,22 +223,20 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
     builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
 }
 
-static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
+static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
 {
     using namespace llvm;
-    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
+    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
     emit_signal_fence(builder);
     Module *M = builder.GetInsertBlock()->getModule();
     LLVMContext &C = builder.getContext();
     // inline jlsafepoint_func->realize(M)
     if (final) {
-        auto T_size = getSizeTy(builder.getContext());
         builder.CreateLoad(T_size, signal_page, true);
     }
     else {
         Function *F = M->getFunction("julia.safepoint");
         if (!F) {
-            auto T_size = getSizeTy(builder.getContext());
             auto T_psize = T_size->getPointerTo();
             FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
             F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
@@ -260,7 +247,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *pt
     emit_signal_fence(builder);
 }
 
-static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final)
+static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final)
 {
     using namespace llvm;
     Type *T_int8 = state->getType();
@@ -286,38 +273,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::V
                          passBB, exitBB);
     builder.SetInsertPoint(passBB);
     MDNode *tbaa = get_tbaa_const(builder.getContext());
-    emit_gc_safepoint(builder, ptls, tbaa, final);
+    emit_gc_safepoint(builder, T_size, ptls, tbaa, final);
     builder.CreateBr(exitBB);
     builder.SetInsertPoint(exitBB);
     return old_state;
 }
 
-static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final)
+static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
 {
     using namespace llvm;
     Value *state = builder.getInt8(0);
-    return emit_gc_state_set(builder, ptls, state, nullptr, final);
+    return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
 }
 
-static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final)
+static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
 {
     using namespace llvm;
     Value *old_state = builder.getInt8(0);
-    return emit_gc_state_set(builder, ptls, state, old_state, final);
+    return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
 }
 
-static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final)
+static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
 {
     using namespace llvm;
     Value *state = builder.getInt8(JL_GC_STATE_SAFE);
-    return emit_gc_state_set(builder, ptls, state, nullptr, final);
+    return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
 }
 
-static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final)
+static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
 {
     using namespace llvm;
     Value *old_state = builder.getInt8(JL_GC_STATE_SAFE);
-    return emit_gc_state_set(builder, ptls, state, old_state, final);
+    return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
 }
 
 // Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.
@@ -329,124 +316,224 @@ using namespace llvm;
 
 inline void addFnAttr(CallInst *Target, Attribute::AttrKind Attr)
 {
-#if JL_LLVM_VERSION >= 140000
     Target->addFnAttr(Attr);
-#else
-    Target->addAttribute(AttributeList::FunctionIndex, Attr);
-#endif
 }
 
 template<class T, class A>
 inline void addRetAttr(T *Target, A Attr)
 {
-#if JL_LLVM_VERSION >= 140000
     Target->addRetAttr(Attr);
-#else
-    Target->addAttribute(AttributeList::ReturnIndex, Attr);
-#endif
 }
 
 inline void addAttributeAtIndex(Function *F, unsigned Index, Attribute Attr)
 {
-#if JL_LLVM_VERSION >= 140000
     F->addAttributeAtIndex(Index, Attr);
-#else
-    F->addAttribute(Index, Attr);
-#endif
 }
 
 inline AttributeSet getFnAttrs(const AttributeList &Attrs)
 {
-#if JL_LLVM_VERSION >= 140000
     return Attrs.getFnAttrs();
-#else
-    return Attrs.getFnAttributes();
-#endif
 }
 
 inline AttributeSet getRetAttrs(const AttributeList &Attrs)
 {
-#if JL_LLVM_VERSION >= 140000
     return Attrs.getRetAttrs();
-#else
-    return Attrs.getRetAttributes();
-#endif
 }
 
 inline bool hasFnAttr(const AttributeList &L, Attribute::AttrKind Kind)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.hasFnAttr(Kind);
-#else
-    return L.hasAttribute(AttributeList::FunctionIndex, Kind);
-#endif
 }
 
 inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
                                          unsigned Index, Attribute::AttrKind Kind)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.addAttributeAtIndex(C, Index, Kind);
-#else
-    return L.addAttribute(C, Index, Kind);
-#endif
 }
 
 inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
                                          unsigned Index, Attribute Attr)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.addAttributeAtIndex(C, Index, Attr);
-#else
-    return L.addAttribute(C, Index, Attr);
-#endif
 }
 
 inline AttributeList addAttributesAtIndex(const AttributeList &L, LLVMContext &C,
                                           unsigned Index, const AttrBuilder &Builder)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.addAttributesAtIndex(C, Index, Builder);
-#else
-    return L.addAttributes(C, Index, Builder);
-#endif
 }
 
 inline AttributeList addFnAttribute(const AttributeList &L, LLVMContext &C,
                                     Attribute::AttrKind Kind)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.addFnAttribute(C, Kind);
-#else
-    return L.addAttribute(C, AttributeList::FunctionIndex, Kind);
-#endif
 }
 
 inline AttributeList addRetAttribute(const AttributeList &L, LLVMContext &C,
                                      Attribute::AttrKind Kind)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.addRetAttribute(C, Kind);
-#else
-    return L.addAttribute(C, AttributeList::ReturnIndex, Kind);
-#endif
 }
 
 inline bool hasAttributesAtIndex(const AttributeList &L, unsigned Index)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.hasAttributesAtIndex(Index);
-#else
-    return L.hasAttributes(Index);
-#endif
 }
 
 inline Attribute getAttributeAtIndex(const AttributeList &L, unsigned Index, Attribute::AttrKind Kind)
 {
-#if JL_LLVM_VERSION >= 140000
     return L.getAttributeAtIndex(Index, Kind);
-#else
-    return L.getAttribute(Index, Kind);
-#endif
+}
+
+// Iterate through uses of a particular type.
+// Recursively scan through `ConstantExpr` and `ConstantAggregate` use.
+template<typename U>
+struct ConstantUses {
+    template<typename T>
+    struct Info {
+        llvm::Use *use;
+        T *val;
+        // If `samebits == true`, the offset the original value appears in the constant.
+        size_t offset;
+        // This specify whether the original value appears in the current value in exactly
+        // the same bit pattern (with possibly an offset determined by `offset`).
+        bool samebits;
+        Info(llvm::Use *use, T *val, size_t offset, bool samebits) :
+            use(use),
+            val(val),
+            offset(offset),
+            samebits(samebits)
+        {
+        }
+        Info(llvm::Use *use, size_t offset, bool samebits) :
+            use(use),
+            val(cast<T>(use->getUser())),
+            offset(offset),
+            samebits(samebits)
+        {
+        }
+    };
+    using UseInfo = Info<U>;
+    struct Frame : Info<llvm::Constant> {
+        template<typename... Args>
+        Frame(Args &&... args) :
+            Info<llvm::Constant>(std::forward<Args>(args)...),
+            cur(this->val->use_empty() ? nullptr : &*this->val->use_begin()),
+            _next(cur ? cur->getNext() : nullptr)
+        {
+        }
+    private:
+        void next()
+        {
+            cur = _next;
+            if (!cur)
+                return;
+            _next = cur->getNext();
+        }
+        llvm::Use *cur;
+        llvm::Use *_next;
+        friend struct ConstantUses;
+    };
+    ConstantUses(llvm::Constant *c, llvm::Module &M)
+        : stack{Frame(nullptr, c, 0u, true)},
+          M(M)
+    {
+        forward();
+    }
+    UseInfo get_info() const
+    {
+        auto &top = stack.back();
+        return UseInfo(top.cur, top.offset, top.samebits);
+    }
+    const auto &get_stack() const
+    {
+        return stack;
+    }
+    void next()
+    {
+        stack.back().next();
+        forward();
+    }
+    bool done()
+    {
+        return stack.empty();
+    }
+private:
+    void forward();
+    llvm::SmallVector<Frame, 4> stack;
+    llvm::Module &M;
+};
+
+template<typename U>
+void ConstantUses<U>::forward()
+{
+    assert(!stack.empty());
+    auto frame = &stack.back();
+    const auto &DL = M.getDataLayout();
+    auto pop = [&] {
+        stack.pop_back();
+        if (stack.empty()) {
+            return false;
+        }
+        frame = &stack.back();
+        return true;
+    };
+    auto push = [&] (llvm::Use *use, llvm::Constant *c, size_t offset, bool samebits) {
+        stack.emplace_back(use, c, offset, samebits);
+        frame = &stack.back();
+    };
+    auto handle_constaggr = [&] (llvm::Use *use, llvm::ConstantAggregate *aggr) {
+        if (!frame->samebits) {
+            push(use, aggr, 0, false);
+            return;
+        }
+        if (auto strct = dyn_cast<llvm::ConstantStruct>(aggr)) {
+            auto layout = DL.getStructLayout(strct->getType());
+            push(use, strct, frame->offset + layout->getElementOffset(use->getOperandNo()), true);
+        }
+        else if (auto ary = dyn_cast<llvm::ConstantArray>(aggr)) {
+            auto elty = ary->getType()->getElementType();
+            push(use, ary, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
+        }
+        else if (auto vec = dyn_cast<llvm::ConstantVector>(aggr)) {
+            auto elty = vec->getType()->getElementType();
+            push(use, vec, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
+        }
+        else {
+            abort();
+        }
+    };
+    auto handle_constexpr = [&] (llvm::Use *use, llvm::ConstantExpr *expr) {
+        if (!frame->samebits) {
+            push(use, expr, 0, false);
+            return;
+        }
+        auto opcode = expr->getOpcode();
+        if (opcode == llvm::Instruction::PtrToInt || opcode == llvm::Instruction::IntToPtr ||
+            opcode == llvm::Instruction::AddrSpaceCast || opcode == llvm::Instruction::BitCast) {
+            push(use, expr, frame->offset, true);
+        }
+        else {
+            push(use, expr, 0, false);
+        }
+    };
+    while (true) {
+        auto use = frame->cur;
+        if (!use) {
+            if (!pop())
+                return;
+            continue;
+        }
+        auto user = use->getUser();
+        if (isa<U>(user))
+            return;
+        frame->next();
+        if (auto aggr = dyn_cast<llvm::ConstantAggregate>(user)) {
+            handle_constaggr(use, aggr);
+        }
+        else if (auto expr = dyn_cast<llvm::ConstantExpr>(user)) {
+            handle_constexpr(use, expr);
+        }
+    }
 }
 }
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 0f39bf06d2101..45637a4c567f6 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -38,20 +38,18 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
 extern JuliaOJIT *jl_ExecutionEngine;
 
 // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
-Optional<bool> always_have_fma(Function &intr) JL_NOTSAFEPOINT {
-    auto intr_name = intr.getName();
-    auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
-
-#if defined(_CPU_AARCH64_)
-    return typ == "f32" || typ == "f64";
-#else
-    (void)typ;
-    return {};
-#endif
+Optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
+    if (TT.isAArch64()) {
+        auto intr_name = intr.getName();
+        auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+        return typ == "f32" || typ == "f64";
+    } else {
+        return {};
+    }
 }
 
-bool have_fma(Function &intr, Function &caller) JL_NOTSAFEPOINT {
-    auto unconditional = always_have_fma(intr);
+static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTSAFEPOINT {
+    auto unconditional = always_have_fma(intr, TT);
     if (unconditional.hasValue())
         return unconditional.getValue();
 
@@ -65,21 +63,21 @@ bool have_fma(Function &intr, Function &caller) JL_NOTSAFEPOINT {
     SmallVector<StringRef, 6> Features;
     FS.split(Features, ',');
     for (StringRef Feature : Features)
-#if defined _CPU_ARM_
+    if (TT.isARM()) {
       if (Feature == "+vfp4")
-        return typ == "f32" || typ == "f64";lowerCPUFeatures
+        return typ == "f32" || typ == "f64";
       else if (Feature == "+vfp4sp")
         return typ == "f32";
-#else
+    } else {
       if (Feature == "+fma" || Feature == "+fma4")
         return typ == "f32" || typ == "f64";
-#endif
+    }
 
     return false;
 }
 
-void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) JL_NOTSAFEPOINT {
-    if (have_fma(intr, caller)) {
+void lowerHaveFMA(Function &intr, Function &caller, const Triple &TT, CallInst *I) JL_NOTSAFEPOINT {
+    if (have_fma(intr, caller, TT)) {
         ++LoweredWithFMA;
         I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
     } else {
@@ -91,6 +89,7 @@ void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) JL_NOTSAFEPOINT
 
 bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
 {
+    auto TT = Triple(M.getTargetTriple());
     SmallVector<Instruction*,6> Materialized;
 
     for (auto &F: M.functions()) {
@@ -100,7 +99,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
             for (Use &U: F.uses()) {
                 User *RU = U.getUser();
                 CallInst *I = cast<CallInst>(RU);
-                lowerHaveFMA(F, *I->getParent()->getParent(), I);
+                lowerHaveFMA(F, *I->getParent()->getParent(), TT, I);
                 Materialized.push_back(I);
             }
         }
@@ -151,7 +150,8 @@ Pass *createCPUFeaturesPass()
     return new CPUFeaturesLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createCPUFeaturesPass());
 }
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index 57ec30ca57947..6ff7feaa550c8 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -47,41 +47,28 @@ INST_STATISTIC(FCmp);
 
 extern JuliaOJIT *jl_ExecutionEngine;
 
-Optional<bool> always_have_fp16() {
-#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
-    // x86 doesn't support fp16
-    // TODO: update for sapphire rapids when it comes out
-    return false;
-#else
-    return {};
-#endif
-}
-
 namespace {
 
-bool have_fp16(Function &caller) {
-    auto unconditional = always_have_fp16();
-    if (unconditional.hasValue())
-        return unconditional.getValue();
-
+static bool have_fp16(Function &caller, const Triple &TT) {
     Attribute FSAttr = caller.getFnAttribute("target-features");
     StringRef FS =
         FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
-#if defined(_CPU_AARCH64_)
-    if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
-        return true;
-    }
-#else
-    if (FS.find("+avx512fp16") != llvm::StringRef::npos){
-        return true;
+    if (TT.isAArch64()) {
+        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
+            return true;
+        }
+    } else if (TT.getArch() == Triple::x86_64) {
+        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
+            return true;
+        }
     }
-#endif
     return false;
 }
 
 static bool demoteFloat16(Function &F)
 {
-    if (have_fp16(F))
+    auto TT = Triple(F.getParent()->getTargetTriple());
+    if (have_fp16(F, TT))
         return false;
 
     auto &ctx = F.getContext();
@@ -233,7 +220,8 @@ Pass *createDemoteFloat16Pass()
     return new DemoteFloat16Legacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createDemoteFloat16Pass());
 }
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index d60a8e181177b..6ad46f1eb01d4 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -48,6 +48,7 @@ struct FinalLowerGC: private JuliaPassContext {
     Function *queueRootFunc;
     Function *poolAllocFunc;
     Function *bigAllocFunc;
+    Function *allocTypedFunc;
 #ifdef MMTK_GC
     Function *writeBarrier1Func;
     Function *writeBarrier2Func;
@@ -55,6 +56,7 @@ struct FinalLowerGC: private JuliaPassContext {
     Function *writeBarrier2SlowFunc;
 #endif
     Instruction *pgcstack;
+    Type *T_size;
 
     // Lowers a `julia.new_gc_frame` intrinsic.
     Value *lowerNewGCFrame(CallInst *target, Function &F);
@@ -137,10 +139,10 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
     IRBuilder<> builder(target->getContext());
     builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
     StoreInst *inst = builder.CreateAlignedStore(
-                ConstantInt::get(getSizeTy(F.getContext()), JL_GC_ENCODE_PUSHARGS(nRoots)),
+                ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
                 builder.CreateBitCast(
                         builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0),
-                        getSizeTy(F.getContext())->getPointerTo()),
+                        T_size->getPointerTo()),
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(F.getContext());
@@ -211,7 +213,6 @@ Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
     assert(target->arg_size() == 1);
     IRBuilder<> builder(target->getContext());
     builder.SetInsertPoint(target);
-    auto T_size = getSizeTy(builder.getContext());
     Value* signal_page = target->getOperand(0);
     Value* load = builder.CreateLoad(T_size, signal_page, true);
     return load;
@@ -252,95 +253,105 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
 {
     ++GCAllocBytesCount;
     assert(target->arg_size() == 2);
-    auto sz = (size_t)cast<ConstantInt>(target->getArgOperand(1))->getZExtValue();
-    // This is strongly architecture and OS dependent
-    int osize;
-    int offset = jl_gc_classify_pools(sz, &osize);
+    CallInst *newI;
+
     IRBuilder<> builder(target);
     builder.SetCurrentDebugLocation(target->getDebugLoc());
     auto ptls = target->getArgOperand(0);
-    CallInst *newI;
     Attribute derefAttr;
-    if (offset < 0) {
-        newI = builder.CreateCall(
-            bigAllocFunc,
-            { ptls, ConstantInt::get(getSizeTy(F.getContext()), sz + sizeof(void*)) });
-        derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*));
-    }
-    else {
-#ifndef MMTK_GC
-        auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
-        auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-        newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
-        derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
-#else // MMTK_GC
-        auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-        auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);
-        auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor));
-        auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()),  offsetof(jl_tls_states_t, limit));
-
-        auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
-        auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr");
-        auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor");
-
-
-        auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
-        auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
-        auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
-        auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
-        auto result = builder.CreateNSWAdd(cursor, delta, "result");
-
-        auto new_cursor = builder.CreateNSWAdd(result, pool_osize);
-
-        auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
-        auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr");
-        auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit");
-
-        auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);
-
-        auto current_block = target->getParent();
-        builder.SetInsertPoint(target->getNextNode());
-        auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow");
-        auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont");
-
-        auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
-        auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont);
-
-        auto next_br = current_block->getTerminator();
-        next_br->eraseFromParent();
-        builder.SetInsertPoint(current_block);
-        builder.CreateCondBr(gt_limit, slowpath, fastpath);
-
-        // slowpath
-        builder.SetInsertPoint(slowpath);
-        auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
-        auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
-        new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
-        builder.CreateBr(top_cont);
-
-        // // fastpath
-        builder.SetInsertPoint(fastpath);
-        builder.CreateStore(new_cursor, cursor_ptr);
-
-        // ptls->gc_num.allocd += osize;
-        auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num));
-        auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
-        auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
-        auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
-        auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
-        builder.CreateStore(pool_allocd_total, pool_alloc_tls);
-
-        auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
-        auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
-        builder.CreateBr(top_cont);
-
-        phiNode->addIncoming(new_call, slowpath);
-        phiNode->addIncoming(v_as_ptr, fastpath);
-        phiNode->takeName(target);
-
-        return phiNode;
-#endif // MMTK_GC
+
+    if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
+        size_t sz = (size_t)CI->getZExtValue();
+        // This is strongly architecture and OS dependent
+        int osize;
+        int offset = jl_gc_classify_pools(sz, &osize);
+        if (offset < 0) {
+            newI = builder.CreateCall(
+                bigAllocFunc,
+                { ptls, ConstantInt::get(T_size, sz + sizeof(void*)) });
+            derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*));
+        }
+        else {
+        #ifndef MMTK_GC
+            auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
+            auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
+            newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
+            derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
+        #else // MMTK_GC
+            auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
+            auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);
+            auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor));
+            auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()),  offsetof(jl_tls_states_t, limit));
+
+            auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
+            auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr");
+            auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor");
+
+
+            auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
+            auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
+            auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
+            auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
+            auto result = builder.CreateNSWAdd(cursor, delta, "result");
+
+            auto new_cursor = builder.CreateNSWAdd(result, pool_osize);
+
+            auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
+            auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr");
+            auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit");
+
+            auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);
+
+            auto current_block = target->getParent();
+            builder.SetInsertPoint(target->getNextNode());
+            auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow");
+            auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont");
+
+            auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
+            auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont);
+
+            auto next_br = current_block->getTerminator();
+            next_br->eraseFromParent();
+            builder.SetInsertPoint(current_block);
+            builder.CreateCondBr(gt_limit, slowpath, fastpath);
+
+            // slowpath
+            builder.SetInsertPoint(slowpath);
+            auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+            auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
+            new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
+            builder.CreateBr(top_cont);
+
+            // // fastpath
+            builder.SetInsertPoint(fastpath);
+            builder.CreateStore(new_cursor, cursor_ptr);
+
+            // ptls->gc_num.allocd += osize;
+            auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num));
+            auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
+            auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
+            auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
+            auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
+            builder.CreateStore(pool_allocd_total, pool_alloc_tls);
+
+            auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
+            auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
+            builder.CreateBr(top_cont);
+
+            phiNode->addIncoming(new_call, slowpath);
+            phiNode->addIncoming(v_as_ptr, fastpath);
+            phiNode->takeName(target);
+
+            return phiNode;
+        #endif // MMTK_GC
+        }
+    } else {
+        auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
+        size = builder.CreateAdd(size, ConstantInt::get(T_size, sizeof(void*)));
+        newI = builder.CreateCall(allocTypedFunc, { ptls, size, ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext())) });
+        derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sizeof(void*));
     }
+
     newI->setAttributes(newI->getCalledFunction()->getAttributes());
     newI->addRetAttr(derefAttr);
     newI->takeName(target);
@@ -355,6 +366,8 @@ bool FinalLowerGC::doInitialization(Module &M) {
     queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
     poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
     bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
+    allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
+    T_size = M.getDataLayout().getIntPtrType(M.getContext());
 #ifdef MMTK_GC
     writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1);
     writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2);
@@ -362,7 +375,7 @@ bool FinalLowerGC::doInitialization(Module &M) {
     writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow);
     GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
 #else
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
+    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
 #endif
     unsigned j = 0;
     for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
@@ -384,7 +397,7 @@ bool FinalLowerGC::doFinalization(Module &M)
     queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr;
 #else
     GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
-    queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr;
+    queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
 #endif
     auto used = M.getGlobalVariable("llvm.compiler.used");
     if (!used)
@@ -583,7 +596,8 @@ Pass *createFinalLowerGCPass()
     return new FinalLowerGCLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createFinalLowerGCPass());
 }
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index af9a1862089e4..26288dc09379d 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -222,7 +222,8 @@ Pass *createGCInvariantVerifierPass(bool Strong) {
     return new GCInvariantVerifierLegacy(Strong);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
 {
     unwrap(PM)->add(createGCInvariantVerifierPass(Strong));
 }
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index 553d091ef4c6f..fc867252318c5 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -8,6 +8,7 @@
 #include <llvm/Analysis/LoopIterator.h>
 #include <llvm/Analysis/MemorySSA.h>
 #include <llvm/Analysis/MemorySSAUpdater.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/Analysis/ValueTracking.h>
 #include <llvm/Analysis/ScalarEvolution.h>
 #include <llvm/ADT/Statistic.h>
@@ -38,6 +39,12 @@ STATISTIC(HoistedAllocation, "Number of allocations hoisted out of a loop");
  * loop context as well but it is inside a loop where they matter the most.
  */
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
+
 namespace {
 
 //Stolen and modified from LICM.cpp
@@ -51,13 +58,13 @@ static void eraseInstruction(Instruction &I,
 //Stolen and modified from LICM.cpp
 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
                                   MemorySSAUpdater &MSSAU,
-                                  ScalarEvolution *SE) {
+                                  ScalarEvolution *SE,
+                                  MemorySSA::InsertionPlace Place = MemorySSA::BeforeTerminator) {
   I.moveBefore(&Dest);
   if (MSSAU.getMemorySSA())
     if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
             MSSAU.getMemorySSA()->getMemoryAccess(&I)))
-      MSSAU.moveToPlace(OldMemAcc, Dest.getParent(),
-                         MemorySSA::BeforeTerminator);
+      MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), Place);
   if (SE)
     SE->forgetValue(&I);
 }
@@ -127,7 +134,6 @@ struct JuliaLICMPassLegacy : public LoopPass {
             getLoopAnalysisUsage(AU);
         }
 };
-
 struct JuliaLICM : public JuliaPassContext {
     function_ref<DominatorTree &()> GetDT;
     function_ref<LoopInfo &()> GetLI;
@@ -142,7 +148,7 @@ struct JuliaLICM : public JuliaPassContext {
                 GetMSSA(GetMSSA),
                 GetSE(GetSE) {}
 
-    bool runOnLoop(Loop *L)
+    bool runOnLoop(Loop *L, OptimizationRemarkEmitter &ORE)
     {
         // Get the preheader block to move instructions into,
         // required to run this pass.
@@ -157,8 +163,10 @@ struct JuliaLICM : public JuliaPassContext {
         // `gc_preserve_end_func` must be from `gc_preserve_begin_func`.
         // We also hoist write barriers here, so we don't exit if write_barrier_func exists
         if (!gc_preserve_begin_func && !write_barrier_func &&
-            !alloc_obj_func)
+            !alloc_obj_func) {
+            LLVM_DEBUG(dbgs() << "No gc_preserve_begin_func or write_barrier_func or alloc_obj_func found, skipping JuliaLICM\n");
             return false;
+        }
         auto LI = &GetLI();
         auto DT = &GetDT();
         auto MSSA = GetMSSA();
@@ -214,6 +222,11 @@ struct JuliaLICM : public JuliaPassContext {
                         continue;
                     ++HoistedPreserveBegin;
                     moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
+                    LLVM_DEBUG(dbgs() << "Hoisted gc_preserve_begin: " << *call << "\n");
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Hoisted", call)
+                            << "hoisting preserve begin " << ore::NV("PreserveBegin", call);
+                    });
                     changed = true;
                 }
                 else if (callee == gc_preserve_end_func) {
@@ -228,11 +241,21 @@ struct JuliaLICM : public JuliaPassContext {
                         continue;
                     }
                     ++SunkPreserveEnd;
-                    moveInstructionBefore(*call, *exit_pts[0], MSSAU, SE);
+                    moveInstructionBefore(*call, *exit_pts[0], MSSAU, SE, MemorySSA::Beginning);
+                    LLVM_DEBUG(dbgs() << "Sunk gc_preserve_end: " << *call << "\n");
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Sunk", call)
+                            << "sinking preserve end " << ore::NV("PreserveEnd", call);
+                    });
                     for (unsigned i = 1; i < exit_pts.size(); i++) {
                         // Clone exit
                         auto CI = CallInst::Create(call, {}, exit_pts[i]);
                         createNewInstruction(CI, call, MSSAU);
+                        LLVM_DEBUG(dbgs() << "Cloned and sunk gc_preserve_end: " << *CI << "\n");
+                        REMARK([&](){
+                            return OptimizationRemark(DEBUG_TYPE, "Sunk", call)
+                                << "cloning and sinking preserve end" << ore::NV("PreserveEnd", call);
+                        });
                     }
                 }
                 else if (callee == write_barrier_func) {
@@ -242,41 +265,93 @@ struct JuliaLICM : public JuliaPassContext {
                             changed, preheader->getTerminator(),
                             MSSAU, SE)) {
                             valid = false;
+                            LLVM_DEBUG(dbgs() << "Failed to hoist write barrier argument: " << *call->getArgOperand(i) << "\n");
                             break;
                         }
                     }
-                    if (valid) {
-                        ++HoistedWriteBarrier;
-                        moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
-                        changed = true;
+                    if (!valid) {
+                        LLVM_DEBUG(dbgs() << "Failed to hoist write barrier: " << *call << "\n");
+                        continue;
                     }
+                    ++HoistedWriteBarrier;
+                    moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
+                    changed = true;
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Hoist", call)
+                            << "hoisting write barrier " << ore::NV("GC Write Barrier", call);
+                    });
                 }
                 else if (callee == alloc_obj_func) {
-                    jl_alloc::AllocUseInfo use_info;
-                    jl_alloc::CheckInst::Stack check_stack;
-                    jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, *this, DL};
-                    jl_alloc::runEscapeAnalysis(call, required, jl_alloc::EscapeAnalysisOptionalArgs().with_valid_set(&L->getBlocksSet()));
-                    if (use_info.escaped || use_info.addrescaped) {
-                        continue;
-                    }
                     bool valid = true;
                     for (std::size_t i = 0; i < call->arg_size(); i++) {
                         if (!makeLoopInvariant(L, call->getArgOperand(i), changed,
                             preheader->getTerminator(), MSSAU, SE)) {
                             valid = false;
+                            LLVM_DEBUG(dbgs() << "Failed to hoist alloc_obj argument: " << *call->getArgOperand(i) << "\n");
                             break;
                         }
                     }
+                    if (!valid) {
+                        LLVM_DEBUG(dbgs() << "Failed to hoist alloc_obj: " << *call << "\n");
+                        continue;
+                    }
+                    LLVM_DEBUG(dbgs() << "Running escape analysis for " << *call << "\n");
+                    jl_alloc::AllocUseInfo use_info;
+                    jl_alloc::CheckInst::Stack check_stack;
+                    jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, *this, DL};
+                    jl_alloc::runEscapeAnalysis(call, required, jl_alloc::EscapeAnalysisOptionalArgs().with_valid_set(&L->getBlocksSet()).with_optimization_remark_emitter(&ORE));
+                    REMARK([&](){
+                        std::string suse_info;
+                        llvm::raw_string_ostream osuse_info(suse_info);
+                        use_info.dump(osuse_info);
+                        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", call) << "escape analysis for " << ore::NV("GC Allocation", call) << "\n" << ore::NV("UseInfo", osuse_info.str());
+                    });
+                    if (use_info.escaped) {
+                        REMARK([&](){
+                            return OptimizationRemarkMissed(DEBUG_TYPE, "Escape", call)
+                                << "not hoisting gc allocation " << ore::NV("GC Allocation", call)
+                                << " because it may escape";
+                        });
+                        continue;
+                    }
+                    if (use_info.addrescaped) {
+                        REMARK([&](){
+                            return OptimizationRemarkMissed(DEBUG_TYPE, "Escape", call)
+                                << "not hoisting gc allocation " << ore::NV("GC Allocation", call)
+                                << " because its address may escape";
+                        });
+                        continue;
+                    }
                     if (use_info.refstore) {
                         // We need to add write barriers to any stores
                         // that may start crossing generations
+                        REMARK([&](){
+                            return OptimizationRemarkMissed(DEBUG_TYPE, "Escape", call)
+                                << "not hoisting gc allocation " << ore::NV("GC Allocation", call)
+                                << " because it may have an object stored to it";
+                        });
                         continue;
                     }
-                    if (valid) {
-                        ++HoistedAllocation;
-                        moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
-                        changed = true;
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Hoist", call)
+                            << "hoisting gc allocation " << ore::NV("GC Allocation", call);
+                    });
+                    ++HoistedAllocation;
+                    moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
+                    IRBuilder<> builder(preheader->getTerminator());
+                    builder.SetCurrentDebugLocation(call->getDebugLoc());
+                    auto obj_i8 = builder.CreateBitCast(call, Type::getInt8PtrTy(call->getContext(), call->getType()->getPointerAddressSpace()));
+                    // Note that this alignment is assuming the GC allocates at least pointer-aligned memory
+                    auto align = Align(DL.getPointerSize(0));
+                    auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
+                    if (MSSAU.getMemorySSA()) {
+                        auto alloc_mdef = MSSAU.getMemorySSA()->getMemoryAccess(call);
+                        assert(isa<MemoryDef>(alloc_mdef) && "Expected alloc to be associated with a memory def!");
+                        auto clear_mdef = MSSAU.createMemoryAccessAfter(clear_obj, nullptr, alloc_mdef);
+                        assert(isa<MemoryDef>(clear_mdef) && "Expected memset to be associated with a memory def!");
+                        (void) clear_mdef;
                     }
+                    changed = true;
                 }
             }
         }
@@ -291,6 +366,7 @@ struct JuliaLICM : public JuliaPassContext {
 };
 
 bool JuliaLICMPassLegacy::runOnLoop(Loop *L, LPPassManager &LPM) {
+    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
     auto GetDT = [this]() -> DominatorTree & {
         return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     };
@@ -304,7 +380,7 @@ bool JuliaLICMPassLegacy::runOnLoop(Loop *L, LPPassManager &LPM) {
         return nullptr;
     };
     auto juliaLICM = JuliaLICM(GetDT, GetLI, GetMSSA, GetSE);
-    return juliaLICM.runOnLoop(L);
+    return juliaLICM.runOnLoop(L, ORE);
 }
 
 char JuliaLICMPassLegacy::ID = 0;
@@ -316,6 +392,7 @@ static RegisterPass<JuliaLICMPassLegacy>
 PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
                           LoopStandardAnalysisResults &AR, LPMUpdater &U)
 {
+    OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
     auto GetDT = [&AR]() -> DominatorTree & {
         return AR.DT;
     };
@@ -329,7 +406,11 @@ PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
         return &AR.SE;
     };
     auto juliaLICM = JuliaLICM(GetDT, GetLI, GetMSSA, GetSE);
-    if (juliaLICM.runOnLoop(&L)) {
+    if (juliaLICM.runOnLoop(&L, ORE)) {
+#ifdef JL_DEBUG_BUILD
+        if (AR.MSSA)
+            AR.MSSA->verifyMemorySSA();
+#endif
         auto preserved = getLoopPassPreservedAnalyses();
         preserved.preserveSet<CFGAnalyses>();
         preserved.preserve<MemorySSAAnalysis>();
@@ -343,7 +424,8 @@ Pass *createJuliaLICMPass()
     return new JuliaLICMPassLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createJuliaLICMPass());
 }
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index eec21c0c64010..2bf340be13b62 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -371,8 +371,8 @@ struct LateLowerGCFrame:  private JuliaPassContext {
     SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
     void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
     void RefineLiveSet(LargeSparseBitVector &LS, State &S, const std::vector<int> &CalleeRoots);
-    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
-    Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
+    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
+    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
 };
 
 static unsigned getValueAddrSpace(Value *V) {
@@ -1262,6 +1262,7 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
     // We only emit single slot GV in codegen
     // but LLVM global merging can change the pointer operands to GEPs/bitcasts
     auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
+    assert(load_base); // Static analyzer
     auto gv = dyn_cast<GlobalVariable>(load_base);
     if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa),
                {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
@@ -2207,28 +2208,27 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
 }
 
 // Size of T is assumed to be `sizeof(void*)`
-Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
+Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V)
 {
-    auto T_size = getSizeTy(T->getContext());
     assert(T == T_size || isa<PointerType>(T));
     auto TV = cast<PointerType>(V->getType());
     auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
     return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1));
 }
 
-Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
+Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V)
 {
-    auto T_size = getSizeTy(builder.getContext());
-    auto addr = EmitTagPtr(builder, T_size, V);
-    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, Align(sizeof(size_t)));
+    auto addr = EmitTagPtr(builder, T_size, T_size, V);
+    auto &M = *builder.GetInsertBlock()->getModule();
+    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0));
     load->setOrdering(AtomicOrdering::Unordered);
     load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
     MDBuilder MDB(load->getContext());
     auto *NullInt = ConstantInt::get(T_size, 0);
-    // We can be sure that the tag is larger than page size.
+    // We can be sure that the tag is at least 16 (1<<4)
     // Hopefully this is enough to convince LLVM that the value is still not NULL
     // after masking off the tag bits
-    auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 4096));
+    auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 16));
     load->setMetadata(LLVMContext::MD_range, MDB.createRange(NonNullInt, NullInt));
     return load;
 }
@@ -2278,7 +2278,7 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
 
 bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     auto T_int32 = Type::getInt32Ty(F.getContext());
-    auto T_size = getSizeTy(F.getContext());
+    auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
     bool ChangesMade = false;
     // We create one alloca for all the jlcall frames that haven't been processed
     // yet. LLVM would merge them anyway later, so might as well save it a bit
@@ -2346,7 +2346,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
                 // `julia.gc_alloc_obj` except it doesn't set the tag.
                 auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
-                auto ptlsLoad = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
+                auto ptlsLoad = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
                 auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
                 auto newI = builder.CreateCall(
                     allocBytesIntrinsic,
@@ -2398,8 +2398,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     }
                 }
                 // Set the tag.
+                auto &M = *builder.GetInsertBlock()->getModule();
                 StoreInst *store = builder.CreateAlignedStore(
-                    tag, EmitTagPtr(builder, tag_type, newI), Align(sizeof(size_t)));
+                    tag, EmitTagPtr(builder, tag_type, T_size, newI), M.getDataLayout().getPointerABIAlignment(0));
                 store->setOrdering(AtomicOrdering::Unordered);
                 store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
 
@@ -2413,7 +2414,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 assert(CI->arg_size() == 1);
                 IRBuilder<> builder(CI);
                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
-                auto tag = EmitLoadTag(builder, CI->getArgOperand(0));
+                auto tag = EmitLoadTag(builder, T_size, CI->getArgOperand(0));
                 auto masked = builder.CreateAnd(tag, ConstantInt::get(T_size, ~(uintptr_t)15));
                 auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, JuliaType::get_pjlvalue_ty(masked->getContext())),
                                                        T_prjlvalue);
@@ -2513,14 +2514,14 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
         IRBuilder<> builder(CI);
         builder.SetCurrentDebugLocation(CI->getDebugLoc());
 #ifndef MMTK_GC
-        auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
+        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), 3);
         auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
         auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
         builder.SetInsertPoint(mayTrigTerm);
         Value *anyChldNotMarked = NULL;
         for (unsigned i = 1; i < CI->arg_size(); i++) {
             Value *child = CI->getArgOperand(i);
-            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
+            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), 1);
             Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
             anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
         }
@@ -2542,7 +2543,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
         // But for other MMTk plans, we need to be careful.
         const bool INLINE_WRITE_BARRIER = true;
         if (CI->getCalledOperand() == write_barrier_func) {
-            if (MMTK_NEEDS_WRITE_BARRIER == OBJECT_BARRIER) {
+            if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
                 if (INLINE_WRITE_BARRIER) {
                     auto i8_ty = Type::getInt8Ty(F.getContext());
                     auto intptr_ty = T_size;
@@ -2842,7 +2843,8 @@ Pass *createLateLowerGCFramePass() {
     return new LateLowerGCFrameLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLateLowerGCFramePass());
 }
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index a5b05cb4f9066..57fb6ab1c7ed6 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -8,6 +8,7 @@
 
 #include <llvm/ADT/DepthFirstIterator.h>
 #include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/Triple.h>
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/Constants.h>
@@ -81,7 +82,7 @@ namespace {
  * If the module doesn't have declarations for the jl_enter_handler and setjmp
  * functions, insert them.
  */
-static void ensure_enter_function(Module &M)
+static void ensure_enter_function(Module &M, const Triple &TT)
 {
     auto T_int8  = Type::getInt8Ty(M.getContext());
     auto T_pint8 = PointerType::get(T_int8, 0);
@@ -96,9 +97,9 @@ static void ensure_enter_function(Module &M)
     if (!M.getNamedValue(jl_setjmp_name)) {
         std::vector<Type*> args2(0);
         args2.push_back(T_pint8);
-#ifndef _OS_WINDOWS_
-        args2.push_back(T_int32);
-#endif
+        if (!TT.isOSWindows()) {
+            args2.push_back(T_int32);
+        }
         Function::Create(FunctionType::get(T_int32, args2, false),
                          Function::ExternalLinkage, jl_setjmp_name, &M)
             ->addFnAttr(Attribute::ReturnsTwice);
@@ -107,10 +108,11 @@ static void ensure_enter_function(Module &M)
 
 static bool lowerExcHandlers(Function &F) {
     Module &M = *F.getParent();
+    Triple TT(M.getTargetTriple());
     Function *except_enter_func = M.getFunction("julia.except_enter");
     if (!except_enter_func)
         return false; // No EH frames in this module
-    ensure_enter_function(M);
+    ensure_enter_function(M, TT);
     Function *leave_func = M.getFunction(XSTR(jl_pop_handler));
     Function *jlenter_func = M.getFunction(XSTR(jl_enter_handler));
     Function *setjmp_func = M.getFunction(jl_setjmp_name);
@@ -197,14 +199,15 @@ static bool lowerExcHandlers(Function &F) {
             buff
         };
         CallInst::Create(lifetime_start, lifetime_args, "", new_enter);
-#ifndef _OS_WINDOWS_
-        // For LLVM 3.3 compatibility
-        Value *args[] = {buff,
-                         ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-        auto sj = CallInst::Create(setjmp_func, args, "", enter);
-#else
-        auto sj = CallInst::Create(setjmp_func, buff, "", enter);
-#endif
+        CallInst *sj;
+        if (!TT.isOSWindows()) {
+            // For LLVM 3.3 compatibility
+            Value *args[] = {buff,
+                            ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+            sj = CallInst::Create(setjmp_func, args, "", enter);
+        } else {
+            sj = CallInst::Create(setjmp_func, buff, "", enter);
+        }
         // We need to mark this on the call site as well. See issue #6757
         sj->setCanReturnTwice();
         if (auto dbg = enter->getMetadata(LLVMContext::MD_dbg)) {
@@ -269,7 +272,8 @@ Pass *createLowerExcHandlersPass()
     return new LowerExcHandlersLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLowerExcHandlersPass());
 }
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
index b66ea33e57384..efe0acb36f1fc 100644
--- a/src/llvm-muladd.cpp
+++ b/src/llvm-muladd.cpp
@@ -7,6 +7,7 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/ADT/Statistic.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/PassManager.h>
@@ -23,12 +24,18 @@
 #include "julia.h"
 #include "julia_assert.h"
 
-#define DEBUG_TYPE "combine_muladd"
+#define DEBUG_TYPE "combine-muladd"
 #undef DEBUG
 
 using namespace llvm;
 STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
+
 /**
  * Combine
  * ```
@@ -41,16 +48,27 @@ STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
  */
 
 // Return true if we changed the mulOp
-static bool checkCombine(Value *maybeMul) JL_NOTSAFEPOINT
+static bool checkCombine(Value *maybeMul, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
 {
     auto mulOp = dyn_cast<Instruction>(maybeMul);
     if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
         return false;
-    if (!mulOp->hasOneUse())
+    if (!mulOp->hasOneUse()) {
+        LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
+                << "fmul had multiple uses " << ore::NV("fmul", mulOp);
+        });
         return false;
+    }
     // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
     auto fmf = mulOp->getFastMathFlags();
     if (!fmf.allowContract()) {
+        LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
+                << "marked for fma " << ore::NV("fmul", mulOp);
+        });
         ++TotalContracted;
         fmf.setAllowContract(true);
         mulOp->copyFastMathFlags(fmf);
@@ -61,6 +79,7 @@ static bool checkCombine(Value *maybeMul) JL_NOTSAFEPOINT
 
 static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT
 {
+    OptimizationRemarkEmitter ORE(&F);
     bool modified = false;
     for (auto &BB: F) {
         for (auto it = BB.begin(); it != BB.end();) {
@@ -70,13 +89,13 @@ static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT
             case Instruction::FAdd: {
                 if (!I.isFast())
                     continue;
-                modified |= checkCombine(I.getOperand(0)) || checkCombine(I.getOperand(1));
+                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
                 break;
             }
             case Instruction::FSub: {
                 if (!I.isFast())
                     continue;
-                modified |= checkCombine(I.getOperand(0)) || checkCombine(I.getOperand(1));
+                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
                 break;
             }
             default:
@@ -120,7 +139,8 @@ Pass *createCombineMulAddPass()
     return new CombineMulAddLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createCombineMulAddPass());
 }
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index bb1f6590a3207..814b13554358c 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -3,6 +3,8 @@
 // Function multi-versioning
 // LLVM pass to clone function for different archs
 
+//see src/processor.h for documentation of the relevant globals inserted here
+
 #include "llvm-version.h"
 #include "passes.h"
 
@@ -10,12 +12,15 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/BitVector.h>
 #include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/Triple.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
+#include <llvm/IR/Dominators.h>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/Analysis/LoopInfo.h>
 #include <llvm/Analysis/CallGraph.h>
@@ -43,11 +48,7 @@
 
 using namespace llvm;
 
-extern Optional<bool> always_have_fma(Function&);
-
-extern Optional<bool> always_have_fp16();
-
-void replaceUsesWithLoad(Function &F, function_ref<GlobalVariable *(Instruction &I)> should_replace, MDNode *tbaa_const);
+extern Optional<bool> always_have_fma(Function&, const Triple &TT);
 
 namespace {
 constexpr uint32_t clone_mask =
@@ -65,181 +66,284 @@ Value *map_get(T &&vmap, Value *key, Value *def=nullptr)
     return val;
 }
 
-// Iterate through uses of a particular type.
-// Recursively scan through `ConstantExpr` and `ConstantAggregate` use.
-template<typename U>
-struct ConstantUses {
-    template<typename T>
-    struct Info {
-        Use *use;
-        T *val;
-        // If `samebits == true`, the offset the original value appears in the constant.
-        size_t offset;
-        // This specify whether the original value appears in the current value in exactly
-        // the same bit pattern (with possibly an offset determined by `offset`).
-        bool samebits;
-        Info(Use *use, T *val, size_t offset, bool samebits) :
-            use(use),
-            val(val),
-            offset(offset),
-            samebits(samebits)
-        {
-        }
-        Info(Use *use, size_t offset, bool samebits) :
-            use(use),
-            val(cast<T>(use->getUser())),
-            offset(offset),
-            samebits(samebits)
-        {
-        }
-    };
-    using UseInfo = Info<U>;
-    struct Frame : Info<Constant> {
-        template<typename... Args>
-        Frame(Args &&... args) :
-            Info<Constant>(std::forward<Args>(args)...),
-            cur(this->val->use_empty() ? nullptr : &*this->val->use_begin()),
-            _next(cur ? cur->getNext() : nullptr)
-        {
-        }
-    private:
-        void next()
-        {
-            cur = _next;
-            if (!cur)
-                return;
-            _next = cur->getNext();
+static bool is_vector(FunctionType *ty)
+{
+    if (ty->getReturnType()->isVectorTy())
+        return true;
+    for (auto arg: ty->params()) {
+        if (arg->isVectorTy()) {
+            return true;
         }
-        Use *cur;
-        Use *_next;
-        friend struct ConstantUses;
-    };
-    ConstantUses(Constant *c, Module &M)
-        : stack{Frame(nullptr, c, 0u, true)},
-          M(M)
-    {
-        forward();
     }
-    UseInfo get_info() const
-    {
-        auto &top = stack.back();
-        return UseInfo(top.cur, top.offset, top.samebits);
+    return false;
+}
+
+static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_veccall)
+{
+    DominatorTree DT(F);
+    LoopInfo LI(DT);
+    uint32_t flag = 0;
+    if (!LI.empty())
+        flag |= JL_TARGET_CLONE_LOOP;
+    if (is_vector(F.getFunctionType())) {
+        flag |= JL_TARGET_CLONE_SIMD;
+        has_veccall = true;
     }
-    const SmallVector<Frame, 4> &get_stack() const
-    {
-        return stack;
+    for (auto &bb: F) {
+        for (auto &I: bb) {
+            if (auto call = dyn_cast<CallInst>(&I)) {
+                if (is_vector(call->getFunctionType())) {
+                    has_veccall = true;
+                    flag |= JL_TARGET_CLONE_SIMD;
+                }
+                if (auto callee = call->getCalledFunction()) {
+                    auto name = callee->getName();
+                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
+                        flag |= JL_TARGET_CLONE_MATH;
+                    }
+                    else if (name.startswith("julia.cpu.")) {
+                        if (name.startswith("julia.cpu.have_fma.")) {
+                            // for some platforms we know they always do (or don't) support
+                            // FMA. in those cases we don't need to clone the function.
+                            if (!always_have_fma(*callee, TT).hasValue())
+                                flag |= JL_TARGET_CLONE_CPU;
+                        } else {
+                            flag |= JL_TARGET_CLONE_CPU;
+                        }
+                    }
+                }
+            }
+            else if (auto store = dyn_cast<StoreInst>(&I)) {
+                if (store->getValueOperand()->getType()->isVectorTy()) {
+                    flag |= JL_TARGET_CLONE_SIMD;
+                }
+            }
+            else if (I.getType()->isVectorTy()) {
+                flag |= JL_TARGET_CLONE_SIMD;
+            }
+            if (auto mathOp = dyn_cast<FPMathOperator>(&I)) {
+                if (mathOp->getFastMathFlags().any()) {
+                    flag |= JL_TARGET_CLONE_MATH;
+                }
+            }
+
+            for (size_t i = 0; i < I.getNumOperands(); i++) {
+                if(I.getOperand(i)->getType()->isHalfTy()){
+                    flag |= JL_TARGET_CLONE_FLOAT16;
+                }
+                // Check for BFloat16 when they are added to julia can be done here
+            }
+            uint32_t veccall_flags = JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16;
+            if (has_veccall && (flag & veccall_flags) == veccall_flags) {
+                return flag;
+            }
+        }
     }
-    void next()
-    {
-        stack.back().next();
-        forward();
+    return flag;
+}
+
+struct TargetSpec {
+    std::string cpu_name;
+    std::string cpu_features;
+    uint32_t base;
+    uint32_t flags;
+
+    TargetSpec() = default;
+
+    static TargetSpec fromSpec(jl_target_spec_t &spec) {
+        TargetSpec out;
+        out.cpu_name = spec.cpu_name;
+        out.cpu_features = spec.cpu_features;
+        out.base = spec.base;
+        out.flags = spec.flags;
+        return out;
     }
-    bool done()
-    {
-        return stack.empty();
+
+    static TargetSpec fromMD(MDTuple *tup) {
+        TargetSpec out;
+        assert(tup->getNumOperands() == 4);
+        out.cpu_name = cast<MDString>(tup->getOperand(0))->getString().str();
+        out.cpu_features = cast<MDString>(tup->getOperand(1))->getString().str();
+        out.base = cast<ConstantInt>(cast<ConstantAsMetadata>(tup->getOperand(2))->getValue())->getZExtValue();
+        out.flags = cast<ConstantInt>(cast<ConstantAsMetadata>(tup->getOperand(3))->getValue())->getZExtValue();
+        return out;
+    }
+
+    MDNode *toMD(LLVMContext &ctx) const {
+        return MDTuple::get(ctx, {
+            MDString::get(ctx, cpu_name),
+            MDString::get(ctx, cpu_features),
+            ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(ctx), base)),
+            ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(ctx), flags))
+        });
     }
-private:
-    void forward();
-    SmallVector<Frame, 4> stack;
-    Module &M;
 };
 
-template<typename U>
-void ConstantUses<U>::forward()
-{
-    assert(!stack.empty());
-    auto frame = &stack.back();
-    const DataLayout &DL = M.getDataLayout();
-    auto pop = [&] {
-        stack.pop_back();
-        if (stack.empty()) {
-            return false;
-        }
-        frame = &stack.back();
-        return true;
-    };
-    auto push = [&] (Use *use, Constant *c, size_t offset, bool samebits) {
-        stack.emplace_back(use, c, offset, samebits);
-        frame = &stack.back();
-    };
-    auto handle_constaggr = [&] (Use *use, ConstantAggregate *aggr) {
-        if (!frame->samebits) {
-            push(use, aggr, 0, false);
-            return;
-        }
-        if (auto strct = dyn_cast<ConstantStruct>(aggr)) {
-            auto layout = DL.getStructLayout(strct->getType());
-            push(use, strct, frame->offset + layout->getElementOffset(use->getOperandNo()), true);
-        }
-        else if (auto ary = dyn_cast<ConstantArray>(aggr)) {
-            auto elty = ary->getType()->getElementType();
-            push(use, ary, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
-        }
-        else if (auto vec = dyn_cast<ConstantVector>(aggr)) {
-            auto elty = vec->getType()->getElementType();
-            push(use, vec, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
-        }
-        else {
-            jl_safe_printf("Unknown ConstantAggregate:\n");
-            llvm_dump(aggr);
-            abort();
-        }
-    };
-    auto handle_constexpr = [&] (Use *use, ConstantExpr *expr) {
-        if (!frame->samebits) {
-            push(use, expr, 0, false);
-            return;
-        }
-        auto opcode = expr->getOpcode();
-        if (opcode == Instruction::PtrToInt || opcode == Instruction::IntToPtr ||
-            opcode == Instruction::AddrSpaceCast || opcode == Instruction::BitCast) {
-            push(use, expr, frame->offset, true);
-        }
-        else {
-            push(use, expr, 0, false);
-        }
-    };
-    while (true) {
-        auto use = frame->cur;
-        if (!use) {
-            if (!pop())
-                return;
+static Optional<std::vector<TargetSpec>> get_target_specs(Module &M) {
+    auto md = M.getModuleFlag("julia.mv.specs");
+    if (!md)
+        return None;
+    auto tup = cast<MDTuple>(md);
+    std::vector<TargetSpec> out(tup->getNumOperands());
+    for (unsigned i = 0; i < tup->getNumOperands(); i++) {
+        out[i] = TargetSpec::fromMD(cast<MDTuple>(tup->getOperand(i).get()));
+    }
+    return out;
+}
+
+static void set_target_specs(Module &M, ArrayRef<TargetSpec> specs) {
+    std::vector<Metadata *> md;
+    md.reserve(specs.size());
+    for (auto &spec: specs) {
+        md.push_back(spec.toMD(M.getContext()));
+    }
+    M.addModuleFlag(Module::Error, "julia.mv.specs", MDTuple::get(M.getContext(), md));
+}
+
+static void annotate_module_clones(Module &M) {
+    auto TT = Triple(M.getTargetTriple());
+    CallGraph CG(M);
+    std::vector<Function *> orig_funcs;
+    for (auto &F: M) {
+        if (F.isDeclaration())
             continue;
+        orig_funcs.push_back(&F);
+    }
+    bool has_veccall = false;
+    std::vector<TargetSpec> specs;
+    if (auto maybe_specs = get_target_specs(M)) {
+        specs = std::move(*maybe_specs);
+    } else {
+        auto full_specs = jl_get_llvm_clone_targets();
+        specs.reserve(full_specs.size());
+        for (auto &spec: full_specs) {
+            specs.push_back(TargetSpec::fromSpec(spec));
         }
-        auto user = use->getUser();
-        if (isa<U>(user))
-            return;
-        frame->next();
-        if (auto aggr = dyn_cast<ConstantAggregate>(user)) {
-            handle_constaggr(use, aggr);
+        set_target_specs(M, specs);
+    }
+    std::vector<APInt> clones(orig_funcs.size(), APInt(specs.size(), 0));
+    BitVector subtarget_cloned(orig_funcs.size());
+
+    std::vector<unsigned> func_infos(orig_funcs.size());
+    for (unsigned i = 0; i < orig_funcs.size(); i++) {
+        func_infos[i] = collect_func_info(*orig_funcs[i], TT, has_veccall);
+    }
+    for (unsigned i = 1; i < specs.size(); i++) {
+        if (specs[i].flags & JL_TARGET_CLONE_ALL) {
+            for (unsigned j = 0; j < orig_funcs.size(); j++) {
+                clones[j].setBit(i);
+            }
+        } else {
+            unsigned flag = specs[i].flags & clone_mask;
+            std::set<Function*> sets[2];
+            for (unsigned j = 0; j < orig_funcs.size(); j++) {
+                if (!(func_infos[j] & flag)) {
+                    continue;
+                }
+                sets[0].insert(orig_funcs[j]);
+            }
+            std::set<Function*> all_origs(sets[0]);
+            auto *cur_set = &sets[0];
+            auto *next_set = &sets[1];
+            // Reduce dispatch by expand the cloning set to functions that are directly called by
+            // and calling cloned functions.
+            while (!cur_set->empty()) {
+                for (auto orig_f: *cur_set) {
+                    // Use the uncloned function since it's already in the call graph
+                    auto node = CG[orig_f];
+                    for (const auto &I: *node) {
+                        auto child_node = I.second;
+                        auto orig_child_f = child_node->getFunction();
+                        if (!orig_child_f)
+                            continue;
+                        // Already cloned
+                        if (all_origs.count(orig_child_f))
+                            continue;
+                        bool calling_clone = false;
+                        for (const auto &I2: *child_node) {
+                            auto orig_child_f2 = I2.second->getFunction();
+                            if (!orig_child_f2)
+                                continue;
+                            if (all_origs.count(orig_child_f2)) {
+                                calling_clone = true;
+                                break;
+                            }
+                        }
+                        if (!calling_clone)
+                            continue;
+                        next_set->insert(orig_child_f);
+                        all_origs.insert(orig_child_f);
+                    }
+                }
+                std::swap(cur_set, next_set);
+                next_set->clear();
+            }
+            for (unsigned j = 0; j < orig_funcs.size(); j++) {
+                if (all_origs.count(orig_funcs[j])) {
+                    clones[j].setBit(i);
+                    subtarget_cloned.set(j);
+                }
+            }
         }
-        else if (auto expr = dyn_cast<ConstantExpr>(user)) {
-            handle_constexpr(use, expr);
+    }
+    // if there's only one target, we won't need any relocation slots
+    // but even if there is one clone_all and one non-clone_all, we still need
+    // to check for relocation slots because we must fixup instruction uses to
+    // point at the right function.
+    if (specs.size() > 1) {
+        for (unsigned i = 0; i < orig_funcs.size(); i++) {
+            auto &F = *orig_funcs[i];
+            if (subtarget_cloned[i] && !ConstantUses<Instruction>(orig_funcs[i], M).done()) {
+                F.addFnAttr("julia.mv.reloc", "");
+            } else {
+                auto uses = ConstantUses<GlobalValue>(orig_funcs[i], M);
+                if (!uses.done()) {
+                    bool slot = false;
+                    for (; !uses.done(); uses.next()) {
+                        if (isa<GlobalAlias>(uses.get_info().val)) {
+                            slot = true;
+                            break;
+                        }
+                    }
+                    if (slot) {
+                        F.addFnAttr("julia.mv.reloc", "");
+                    } else {
+                        F.addFnAttr("julia.mv.fvar", "");
+                    }
+                }
+            }
         }
     }
+    SmallString<128> cloneset;
+    for (unsigned i = 0; i < orig_funcs.size(); i++) {
+        if (!clones[i].isZero()) {
+            auto &F = *orig_funcs[i];
+            cloneset.clear();
+            clones[i].toStringUnsigned(cloneset, 16);
+            F.addFnAttr("julia.mv.clones", cloneset);
+        }
+    }
+    if (has_veccall) {
+        M.addModuleFlag(Module::Max, "julia.mv.veccall", 1);
+    }
+    M.addModuleFlag(Module::Error, "julia.mv.annotated", 1);
 }
 
 struct CloneCtx {
     struct Target {
         int idx;
-        uint32_t flags;
         std::unique_ptr<ValueToValueMapTy> vmap; // ValueToValueMapTy is not movable....
-        // function ids that needs relocation to be initialized
-        std::set<uint32_t> relocs{};
-        Target(int idx, const jl_target_spec_t &spec) :
+        explicit Target(int idx) :
             idx(idx),
-            flags(spec.flags),
             vmap(new ValueToValueMapTy)
         {
         }
     };
     struct Group : Target {
         std::vector<Target> clones;
-        std::set<uint32_t> clone_fs;
-        Group(int base, const jl_target_spec_t &spec) :
-            Target(base, spec),
-            clones{},
-            clone_fs{}
+        explicit Group(int base) :
+            Target(base),
+            clones{}
         {}
         Function *base_func(Function *orig_f) const
         {
@@ -247,55 +351,51 @@ struct CloneCtx {
                 return orig_f;
             return cast<Function>(vmap->lookup(orig_f));
         }
+
+        bool has_subtarget_clone(Function *orig_f) const
+        {
+            auto base = base_func(orig_f);
+            for (auto &clone: clones) {
+                if (map_get(*clone.vmap, base))
+                    return true;
+            }
+            return false;
+        }
     };
-    CloneCtx(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars);
-    void clone_bases();
-    void collect_func_infos();
-    void clone_all_partials();
+    CloneCtx(Module &M, bool allow_bad_fvars);
+    void prepare_slots();
+    void clone_decls();
+    void clone_bodies();
     void fix_gv_uses();
+    void finalize_orig_clone_attr();
     void fix_inst_uses();
+    void finalize_orig_features();
     void emit_metadata();
 private:
     void prepare_vmap(ValueToValueMapTy &vmap);
-    bool is_vector(FunctionType *ty) const;
-    void clone_function(Function *F, Function *new_f, ValueToValueMapTy &vmap);
-    uint32_t collect_func_info(Function &F);
-    void check_partial(Group &grp, Target &tgt);
     void clone_partial(Group &grp, Target &tgt);
-    void add_features(Function *F, StringRef name, StringRef features, uint32_t flags) const;
-    template<typename T>
-    T *add_comdat(T *G) const;
-    uint32_t get_func_id(Function *F);
-    template<typename Stack>
-    Constant *rewrite_gv_init(const Stack& stack);
-    std::pair<uint32_t,GlobalVariable*> get_reloc_slot(Function *F);
-    Constant *get_ptrdiff32(Constant *ptr, Constant *base) const;
-    template<typename T>
-    Constant *emit_offset_table(const std::vector<T*> &vars, StringRef name) const;
+    uint32_t get_func_id(Function *F) const;
+    std::pair<uint32_t,GlobalVariable*> get_reloc_slot(Function *F) const;
     void rewrite_alias(GlobalAlias *alias, Function* F);
 
     MDNode *tbaa_const;
-    std::vector<jl_target_spec_t> specs;
+    std::vector<TargetSpec> specs;
     std::vector<Group> groups{};
+    std::vector<Target *> linearized;
     std::vector<Function*> fvars;
     std::vector<Constant*> gvars;
     Module &M;
-    function_ref<LoopInfo&(Function&)> GetLI;
-    function_ref<CallGraph&()> GetCG;
+    Type *T_size;
+    Triple TT;
 
     // Map from original function to one based index in `fvars`
     std::map<const Function*,uint32_t> func_ids{};
     std::vector<Function*> orig_funcs{};
-    std::vector<uint32_t> func_infos{};
-    std::set<Function*> cloned{};
     // GV addresses and their corresponding function id (i.e. 0-based index in `fvars`)
     std::vector<std::pair<Constant*,uint32_t>> gv_relocs{};
     // Mapping from function id (i.e. 0-based index in `fvars`) to GVs to be initialized.
     std::map<uint32_t,GlobalVariable*> const_relocs;
-    // Functions that were referred to by a global alias, and might not have other uses.
-    std::set<uint32_t> alias_relocs;
-    bool has_veccall{false};
-    bool has_cloneall{false};
+    std::map<Function *, GlobalVariable*> extern_relocs;
     bool allow_bad_fvars{false};
 };
 
@@ -336,42 +436,43 @@ static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow
 }
 
 // Collect basic information about targets and functions.
-CloneCtx::CloneCtx(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars)
+CloneCtx::CloneCtx(Module &M, bool allow_bad_fvars)
     : tbaa_const(tbaa_make_child_with_context(M.getContext(), "jtbaa_const", nullptr, true).first),
-      specs(jl_get_llvm_clone_targets()),
-      fvars(consume_gv<Function>(M, "jl_sysimg_fvars", allow_bad_fvars)),
-      gvars(consume_gv<Constant>(M, "jl_sysimg_gvars", false)),
+      specs(*get_target_specs(M)),
+      fvars(consume_gv<Function>(M, "jl_fvars", allow_bad_fvars)),
+      gvars(consume_gv<Constant>(M, "jl_gvars", false)),
       M(M),
-      GetLI(GetLI),
-      GetCG(GetCG),
+      T_size(M.getDataLayout().getIntPtrType(M.getContext())),
+      TT(M.getTargetTriple()),
       allow_bad_fvars(allow_bad_fvars)
 {
-    groups.emplace_back(0, specs[0]);
+    groups.emplace_back(0);
+    linearized.resize(specs.size());
+    linearized[0] = &groups[0];
+    std::vector<unsigned> group_ids(specs.size(), 0);
     uint32_t ntargets = specs.size();
     for (uint32_t i = 1; i < ntargets; i++) {
         auto &spec = specs[i];
         if (spec.flags & JL_TARGET_CLONE_ALL) {
-            has_cloneall = true;
-            groups.emplace_back(i, spec);
+            group_ids[i] = groups.size();
+            groups.emplace_back(i);
         }
         else {
-            auto base = spec.base;
-            bool found = false;
-            for (auto &grp: groups) {
-                if (grp.idx == base) {
-                    found = true;
-                    grp.clones.emplace_back(i, spec);
-                    break;
-                }
-            }
-            (void)found;
+            assert(0 <= spec.base && (unsigned) spec.base < i);
+            group_ids[i] = group_ids[spec.base];
+            groups[group_ids[i]].clones.emplace_back(i);
         }
     }
+    for (auto &grp: groups) {
+        for (auto &tgt: grp.clones)
+            linearized[tgt.idx] = &tgt;
+        linearized[grp.idx] = &grp;
+    }
     uint32_t nfvars = fvars.size();
     for (uint32_t i = 0; i < nfvars; i++)
         func_ids[fvars[i]] = i + 1;
     for (auto &F: M) {
-        if (F.empty())
+        if (F.empty() && !F.hasFnAttribute("julia.mv.clones"))
             continue;
         orig_funcs.push_back(&F);
     }
@@ -391,297 +492,132 @@ void CloneCtx::prepare_vmap(ValueToValueMapTy &vmap)
     }
 }
 
-void CloneCtx::clone_function(Function *F, Function *new_f, ValueToValueMapTy &vmap)
+void CloneCtx::prepare_slots()
 {
-    Function::arg_iterator DestI = new_f->arg_begin();
-    for (Function::const_arg_iterator J = F->arg_begin(); J != F->arg_end(); ++J) {
-        DestI->setName(J->getName());
-        vmap[&*J] = &*DestI++;
-    }
-    SmallVector<ReturnInst*,8> Returns;
-#if JL_LLVM_VERSION >= 130000
-    // We are cloning into the same module
-    CloneFunctionInto(new_f, F, vmap, CloneFunctionChangeType::GlobalChanges, Returns);
-#else
-    CloneFunctionInto(new_f, F, vmap, true, Returns);
-#endif
-}
-
-// Clone all clone_all targets. Makes sure that the base targets are all available.
-void CloneCtx::clone_bases()
-{
-    if (!has_cloneall)
-        return;
-    uint32_t ngrps = groups.size();
-    for (uint32_t gid = 1; gid < ngrps; gid++) {
-        auto &grp = groups[gid];
-        auto suffix = ".clone_" + std::to_string(grp.idx);
-        auto &vmap = *grp.vmap;
-        // Fill in old->new mapping. We need to do this before cloning the function so that
-        // the intra target calls are automatically fixed up on cloning.
-        for (auto F: orig_funcs) {
-            Function *new_f = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                               F->getName() + suffix, &M);
-            new_f->copyAttributesFrom(F);
-            vmap[F] = new_f;
-        }
-        prepare_vmap(vmap);
-        for (auto F: orig_funcs) {
-            clone_function(F, cast<Function>(vmap.lookup(F)), vmap);
-        }
-    }
-}
-
-bool CloneCtx::is_vector(FunctionType *ty) const
-{
-    if (ty->getReturnType()->isVectorTy())
-        return true;
-    for (auto arg: ty->params()) {
-        if (arg->isVectorTy()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-uint32_t CloneCtx::collect_func_info(Function &F)
-{
-    uint32_t flag = 0;
-    if (!GetLI(F).empty())
-        flag |= JL_TARGET_CLONE_LOOP;
-    if (is_vector(F.getFunctionType())) {
-        flag |= JL_TARGET_CLONE_SIMD;
-        has_veccall = true;
-    }
-    for (auto &bb: F) {
-        for (auto &I: bb) {
-            if (auto call = dyn_cast<CallInst>(&I)) {
-                if (is_vector(call->getFunctionType())) {
-                    has_veccall = true;
-                    flag |= JL_TARGET_CLONE_SIMD;
-                }
-                if (auto callee = call->getCalledFunction()) {
-                    auto name = callee->getName();
-                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
-                        flag |= JL_TARGET_CLONE_MATH;
-                    }
-                    else if (name.startswith("julia.cpu.")) {
-                        if (name.startswith("julia.cpu.have_fma.")) {
-                            // for some platforms we know they always do (or don't) support
-                            // FMA. in those cases we don't need to clone the function.
-                            if (!always_have_fma(*callee).hasValue())
-                                flag |= JL_TARGET_CLONE_CPU;
-                        } else {
-                            flag |= JL_TARGET_CLONE_CPU;
-                        }
-                    }
-                }
+    for (auto &F : orig_funcs) {
+        if (F->hasFnAttribute("julia.mv.reloc")) {
+            assert(F->hasFnAttribute("julia.mv.clones"));
+            GlobalVariable *GV = new GlobalVariable(M, F->getType(), false, GlobalValue::ExternalLinkage, nullptr, F->getName() + ".reloc_slot");
+            GV->setVisibility(GlobalValue::HiddenVisibility);
+            GV->setDSOLocal(true);
+            if (F->isDeclaration()) {
+                extern_relocs[F] = GV;
             }
-            else if (auto store = dyn_cast<StoreInst>(&I)) {
-                if (store->getValueOperand()->getType()->isVectorTy()) {
-                    flag |= JL_TARGET_CLONE_SIMD;
-                }
-            }
-            else if (I.getType()->isVectorTy()) {
-                flag |= JL_TARGET_CLONE_SIMD;
-            }
-            if (auto mathOp = dyn_cast<FPMathOperator>(&I)) {
-                if (mathOp->getFastMathFlags().any()) {
-                    flag |= JL_TARGET_CLONE_MATH;
-                }
-            }
-            if(!always_have_fp16().hasValue()){
-                for (size_t i = 0; i < I.getNumOperands(); i++) {
-                    if(I.getOperand(i)->getType()->isHalfTy()){
-                        flag |= JL_TARGET_CLONE_FLOAT16;
-                    }
-                    // Check for BFloat16 when they are added to julia can be done here
-                }
-            }
-            if (has_veccall && (flag & JL_TARGET_CLONE_SIMD) && (flag & JL_TARGET_CLONE_MATH) &&
-               (flag & JL_TARGET_CLONE_CPU) && (flag & JL_TARGET_CLONE_FLOAT16)) {
-                return flag;
+            else {
+                auto id = get_func_id(F);
+                const_relocs[id] = GV;
+                GV->setInitializer(Constant::getNullValue(F->getType()));
             }
         }
     }
-    return flag;
-}
-
-void CloneCtx::collect_func_infos()
-{
-    uint32_t nfuncs = orig_funcs.size();
-    func_infos.resize(nfuncs);
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        func_infos[i] = collect_func_info(*orig_funcs[i]);
-    }
 }
 
-void CloneCtx::clone_all_partials()
+void CloneCtx::clone_decls()
 {
-    // First decide what to clone
-    // Do this before actually cloning the functions
-    // so that the call graph is easier to understand
-    for (auto &grp: groups) {
-        for (auto &tgt: grp.clones) {
-            check_partial(grp, tgt);
-        }
-    }
-    for (auto &grp: groups) {
-        for (auto &tgt: grp.clones)
-            clone_partial(grp, tgt);
-        // Also set feature strings for base target functions
-        // now that all the actual cloning is done.
-        auto &base_spec = specs[grp.idx];
-        for (auto orig_f: orig_funcs) {
-            add_features(grp.base_func(orig_f), base_spec.cpu_name,
-                         base_spec.cpu_features, base_spec.flags);
-        }
+    std::vector<std::string> suffixes(specs.size());
+    for (unsigned i = 1; i < specs.size(); i++) {
+        suffixes[i] = "." + std::to_string(i);
     }
-    func_infos.clear(); // We don't need this anymore
-}
-
-void CloneCtx::check_partial(Group &grp, Target &tgt)
-{
-    auto flag = specs[tgt.idx].flags & clone_mask;
-    auto suffix = ".clone_" + std::to_string(tgt.idx);
-    auto &vmap = *tgt.vmap;
-    uint32_t nfuncs = func_infos.size();
-
-    std::set<Function*> all_origs;
-    // Use a simple heuristic to decide which function we need to clone.
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        if (!(func_infos[i] & flag))
+    for (auto &F : orig_funcs) {
+        if (!F->hasFnAttribute("julia.mv.clones"))
             continue;
-        auto orig_f = orig_funcs[i];
-        // Fill in old->new mapping. We need to do this before cloning the function so that
-        // the intra target calls are automatically fixed up on cloning.
-        auto F = grp.base_func(orig_f);
-        Function *new_f = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                           F->getName() + suffix, &M);
-        new_f->copyAttributesFrom(F);
-        vmap[F] = new_f;
-        if (!has_cloneall)
-            cloned.insert(orig_f);
-        grp.clone_fs.insert(i);
-        all_origs.insert(orig_f);
-    }
-    std::set<Function*> sets[2]{all_origs, std::set<Function*>{}};
-    auto *cur_set = &sets[0];
-    auto *next_set = &sets[1];
-    // Reduce dispatch by expand the cloning set to functions that are directly called by
-    // and calling cloned functions.
-    auto &graph = GetCG();
-    while (!cur_set->empty()) {
-        for (auto orig_f: *cur_set) {
-            // Use the uncloned function since it's already in the call graph
-            auto node = graph[orig_f];
-            for (const auto &I: *node) {
-                auto child_node = I.second;
-                auto orig_child_f = child_node->getFunction();
-                if (!orig_child_f)
-                    continue;
-                // Already cloned
-                if (all_origs.count(orig_child_f))
-                    continue;
-                bool calling_clone = false;
-                for (const auto &I2: *child_node) {
-                    auto orig_child_f2 = I2.second->getFunction();
-                    if (!orig_child_f2)
-                        continue;
-                    if (all_origs.count(orig_child_f2)) {
-                        calling_clone = true;
-                        break;
-                    }
-                }
-                if (!calling_clone)
-                    continue;
-                next_set->insert(orig_child_f);
-                all_origs.insert(orig_child_f);
-                auto child_f = grp.base_func(orig_child_f);
-                Function *new_f = Function::Create(child_f->getFunctionType(),
-                                                   child_f->getLinkage(),
-                                                   child_f->getName() + suffix, &M);
-                new_f->copyAttributesFrom(child_f);
-                vmap[child_f] = new_f;
+        APInt clones(specs.size(), F->getFnAttribute("julia.mv.clones").getValueAsString(), 16);
+        for (unsigned i = 1; i < specs.size(); i++) {
+            if (!clones[i]) {
+                continue;
             }
-        }
-        std::swap(cur_set, next_set);
-        next_set->clear();
-    }
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        // Only need to handle expanded functions
-        if (func_infos[i] & flag)
-            continue;
-        auto orig_f = orig_funcs[i];
-        if (all_origs.count(orig_f)) {
-            if (!has_cloneall)
-                cloned.insert(orig_f);
-            grp.clone_fs.insert(i);
+            auto new_F = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName() + suffixes[i], &M);
+            new_F->copyAttributesFrom(F);
+            new_F->setVisibility(F->getVisibility());
+            new_F->setDSOLocal(true);
+            auto base_func = F;
+            if (specs[i].flags & JL_TARGET_CLONE_ALL)
+                base_func = static_cast<Group*>(linearized[specs[i].base])->base_func(F);
+            (*linearized[i]->vmap)[base_func] = new_F;
         }
     }
 }
 
-void CloneCtx::clone_partial(Group &grp, Target &tgt)
+static void clone_function(Function *F, Function *new_f, ValueToValueMapTy &vmap)
 {
-    auto &spec = specs[tgt.idx];
-    auto &vmap = *tgt.vmap;
-    uint32_t nfuncs = orig_funcs.size();
-    prepare_vmap(vmap);
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        auto orig_f = orig_funcs[i];
-        auto F = grp.base_func(orig_f);
-        if (auto new_v = map_get(vmap, F)) {
-            auto new_f = cast<Function>(new_v);
-            assert(new_f != F);
-            clone_function(F, new_f, vmap);
-            // We can set the feature strings now since no one is going to
-            // clone these functions again.
-            add_features(new_f, spec.cpu_name, spec.cpu_features, spec.flags);
-        }
+    Function::arg_iterator DestI = new_f->arg_begin();
+    for (Function::const_arg_iterator J = F->arg_begin(); J != F->arg_end(); ++J) {
+        DestI->setName(J->getName());
+        vmap[&*J] = &*DestI++;
     }
+    SmallVector<ReturnInst*,8> Returns;
+    // We are cloning into the same module
+    CloneFunctionInto(new_f, F, vmap, CloneFunctionChangeType::GlobalChanges, Returns);
 }
 
-void CloneCtx::add_features(Function *F, StringRef name, StringRef features, uint32_t flags) const
+static void add_features(Function *F, TargetSpec &spec)
 {
     auto attr = F->getFnAttribute("target-features");
     if (attr.isStringAttribute()) {
         std::string new_features(attr.getValueAsString());
         new_features += ",";
-        new_features += features;
+        new_features += spec.cpu_features;
         F->addFnAttr("target-features", new_features);
     }
     else {
-        F->addFnAttr("target-features", features);
+        F->addFnAttr("target-features", spec.cpu_features);
     }
-    F->addFnAttr("target-cpu", name);
+    F->addFnAttr("target-cpu", spec.cpu_name);
     if (!F->hasFnAttribute(Attribute::OptimizeNone)) {
-        if (flags & JL_TARGET_OPTSIZE) {
+        if (spec.flags & JL_TARGET_OPTSIZE) {
             F->addFnAttr(Attribute::OptimizeForSize);
         }
-        else if (flags & JL_TARGET_MINSIZE) {
+        else if (spec.flags & JL_TARGET_MINSIZE) {
             F->addFnAttr(Attribute::MinSize);
         }
     }
 }
 
-uint32_t CloneCtx::get_func_id(Function *F)
+void CloneCtx::clone_bodies()
 {
-    auto &ref = func_ids[F];
-    if (!ref) {
-        if (allow_bad_fvars && F->isDeclaration()) {
-            // This should never happen in regular use, but can happen if
-            // bugpoint deletes the function. Just do something here to
-            // allow bugpoint to proceed.
-            return (uint32_t)-1;
+    for (auto F : orig_funcs) {
+        for (unsigned i = 0; i < groups.size(); i++) {
+            Function *group_F = F;
+            if (i != 0) {
+                group_F = groups[i].base_func(F);
+                if (!F->isDeclaration()) {
+                    clone_function(F, group_F, *groups[i].vmap);
+                }
+            }
+            for (auto &target : groups[i].clones) {
+                prepare_vmap(*target.vmap);
+                auto target_F = cast_or_null<Function>(map_get(*target.vmap, F));
+                if (target_F) {
+                    if (!F->isDeclaration()) {
+                        clone_function(group_F, target_F, *target.vmap);
+                    }
+                    add_features(target_F, specs[target.idx]);
+                    target_F->addFnAttr("julia.mv.clone", std::to_string(target.idx));
+                }
+            }
+            // don't set the original function's features yet,
+            // since we may clone it for later groups
+            if (i != 0) {
+                add_features(group_F, specs[groups[i].idx]);
+                group_F->addFnAttr("julia.mv.clone", std::to_string(groups[i].idx));
+            }
         }
-        fvars.push_back(F);
-        ref = fvars.size();
+        // still don't set the original function's features yet,
+        // since we'll copy function attributes if we need to rewrite
+        // the alias, and target specific attributes are illegal on
+        // alias trampolines unless the user explicitly specifies them
     }
-    return ref - 1;
+}
+
+uint32_t CloneCtx::get_func_id(Function *F) const
+{
+    auto ref = func_ids.find(F);
+    assert(ref != func_ids.end() && "Requesting id of non-fvar!");
+    return ref->second - 1;
 }
 
 template<typename Stack>
-Constant *CloneCtx::rewrite_gv_init(const Stack& stack)
+static Constant *rewrite_gv_init(const Stack& stack)
 {
     // Null initialize so that LLVM put it in the correct section.
     SmallVector<Constant*, 8> args;
@@ -732,18 +668,17 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
         Function::Create(F->getFunctionType(), alias->getLinkage(), "", &M);
     trampoline->copyAttributesFrom(F);
     trampoline->takeName(alias);
+    trampoline->setVisibility(alias->getVisibility());
+    trampoline->setDSOLocal(alias->isDSOLocal());
+    // drop multiversioning attributes, add alias attribute for testing purposes
+    trampoline->removeFnAttr("julia.mv.reloc");
+    trampoline->removeFnAttr("julia.mv.clones");
+    trampoline->addFnAttr("julia.mv.alias");
     alias->eraseFromParent();
 
     uint32_t id;
     GlobalVariable *slot;
     std::tie(id, slot) = get_reloc_slot(F);
-    for (auto &grp: groups) {
-        grp.relocs.insert(id);
-        for (auto &tgt: grp.clones) {
-            tgt.relocs.insert(id);
-        }
-    }
-    alias_relocs.insert(id);
 
     auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
     IRBuilder<> irbuilder(BB);
@@ -756,14 +691,12 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
     for (auto &arg : trampoline->args())
         Args.push_back(&arg);
     auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, makeArrayRef(Args));
-    if (F->isVarArg())
-#if (defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_PPC64_))
-        abort();    // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
-#else
+    if (F->isVarArg()) {
+        assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
         call->setTailCallKind(CallInst::TCK_MustTail);
-#endif
-    else
+    } else {
         call->setTailCallKind(CallInst::TCK_Tail);
+    }
 
     if (F->getReturnType() == Type::getVoidTy(F->getContext()))
         irbuilder.CreateRetVoid();
@@ -792,36 +725,48 @@ void CloneCtx::fix_gv_uses()
             assert(info.use->getOperandNo() == 0);
             assert(!val->isConstant());
             auto fid = get_func_id(orig_f);
-            auto addr = ConstantExpr::getPtrToInt(val, getSizeTy(val->getContext()));
+            auto addr = ConstantExpr::getPtrToInt(val, T_size);
             if (info.offset)
-                addr = ConstantExpr::getAdd(addr, ConstantInt::get(getSizeTy(val->getContext()), info.offset));
+                addr = ConstantExpr::getAdd(addr, ConstantInt::get(T_size, info.offset));
             gv_relocs.emplace_back(addr, fid);
             val->setInitializer(rewrite_gv_init(stack));
         }
         return changed;
     };
     for (auto orig_f: orig_funcs) {
-        if (!has_cloneall && !cloned.count(orig_f))
+        if (!orig_f->hasFnAttribute("julia.mv.clones"))
             continue;
         while (single_pass(orig_f)) {
         }
     }
 }
 
-std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F)
+void CloneCtx::finalize_orig_clone_attr()
 {
-    // Null initialize so that LLVM put it in the correct section.
-    auto id = get_func_id(F);
-    auto &slot = const_relocs[id];
-    if (!slot)
-        slot = new GlobalVariable(M, F->getType(), false, GlobalVariable::InternalLinkage,
-                                  ConstantPointerNull::get(F->getType()),
-                                  F->getName() + ".reloc_slot");
-    return std::make_pair(id, slot);
+    for (auto orig_f: orig_funcs) {
+        if (!orig_f->hasFnAttribute("julia.mv.clones"))
+            continue;
+        orig_f->addFnAttr("julia.mv.clone", "0");
+    }
+}
+
+std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F) const
+{
+    if (F->isDeclaration()) {
+        auto extern_decl = extern_relocs.find(F);
+        assert(extern_decl != extern_relocs.end() && "Missing extern relocation slot!");
+        return {(uint32_t)-1, extern_decl->second};
+    }
+    else {
+        auto id = get_func_id(F);
+        auto slot = const_relocs.find(id);
+        assert(slot != const_relocs.end() && "Missing relocation slot!");
+        return {id, slot->second};
+    }
 }
 
 template<typename Stack>
-static Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *insert_before)
+static Value *rewrite_inst_use(const Stack& stack, Type *T_size, Value *replace, Instruction *insert_before)
 {
     SmallVector<Constant*, 8> args;
     uint32_t nlevel = stack.size();
@@ -837,6 +782,7 @@ static Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *
             replace = inst;
             continue;
         }
+        assert(val);
         unsigned nargs = val->getNumOperands();
         args.resize(nargs);
         for (unsigned j = 0; j < nargs; j++) {
@@ -858,7 +804,7 @@ static Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *
         }
         else if (isa<ConstantVector>(val)) {
             replace = InsertElementInst::Create(ConstantVector::get(args), replace,
-                                                ConstantInt::get(getSizeTy(insert_before->getContext()), idx), "",
+                                                ConstantInt::get(T_size, idx), "",
                                                 insert_before);
         }
         else {
@@ -870,73 +816,86 @@ static Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *
     return replace;
 }
 
+template<typename I2GV>
+static void replaceUsesWithLoad(Function &F, Type *T_size, I2GV should_replace, MDNode *tbaa_const) {
+    bool changed;
+    do {
+        changed = false;
+        for (auto uses = ConstantUses<Instruction>(&F, *F.getParent()); !uses.done(); uses.next()) {
+            auto info = uses.get_info();
+            auto use_i = info.val;
+            GlobalVariable *slot = should_replace(*use_i);
+            if (!slot)
+                continue;
+            Instruction *insert_before = use_i;
+            if (auto phi = dyn_cast<PHINode>(use_i))
+                insert_before = phi->getIncomingBlock(*info.use)->getTerminator();
+            Instruction *ptr = new LoadInst(F.getType(), slot, "", false, insert_before);
+            ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+            ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None));
+            use_i->setOperand(info.use->getOperandNo(),
+                                rewrite_inst_use(uses.get_stack(), T_size, ptr,
+                                                insert_before));
+            changed = true;
+        }
+    } while (changed);
+}
+
 void CloneCtx::fix_inst_uses()
 {
     uint32_t nfuncs = orig_funcs.size();
     for (auto &grp: groups) {
-        auto suffix = ".clone_" + std::to_string(grp.idx);
         for (uint32_t i = 0; i < nfuncs; i++) {
-            if (!grp.clone_fs.count(i))
-                continue;
             auto orig_f = orig_funcs[i];
+            if (!grp.has_subtarget_clone(orig_f))
+                continue;
             auto F = grp.base_func(orig_f);
-            replaceUsesWithLoad(*F, [&](Instruction &I) -> GlobalVariable * {
+            auto grpidx = std::to_string(grp.idx);
+            replaceUsesWithLoad(*F, T_size, [&](Instruction &I) -> GlobalVariable * {
                 uint32_t id;
                 GlobalVariable *slot;
                 auto use_f = I.getFunction();
-                if (!use_f->getName().endswith(suffix))
+                if (!use_f->hasFnAttribute("julia.mv.clone") || use_f->getFnAttribute("julia.mv.clone").getValueAsString() != grpidx)
                     return nullptr;
                 std::tie(id, slot) = get_reloc_slot(orig_f);
-
-                grp.relocs.insert(id);
-                for (auto &tgt: grp.clones) {
-                    // The enclosing function of the use is cloned,
-                    // no need to deal with this use on this target.
-                    if (map_get(*tgt.vmap, use_f))
-                        continue;
-                    tgt.relocs.insert(id);
-                }
                 return slot;
             }, tbaa_const);
         }
     }
 }
 
-template<typename T>
-inline T *CloneCtx::add_comdat(T *G) const
-{
-#if defined(_OS_WINDOWS_)
-    // add __declspec(dllexport) to everything marked for export
-    if (G->getLinkage() == GlobalValue::ExternalLinkage)
-        G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
-    else
-        G->setDLLStorageClass(GlobalValue::DefaultStorageClass);
-#endif
-    return G;
+void CloneCtx::finalize_orig_features() {
+    for (auto F : orig_funcs) {
+        add_features(F, specs[0]);
+    }
 }
 
-Constant *CloneCtx::get_ptrdiff32(Constant *ptr, Constant *base) const
+static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
 {
     if (ptr->getType()->isPointerTy())
-        ptr = ConstantExpr::getPtrToInt(ptr, getSizeTy(ptr->getContext()));
+        ptr = ConstantExpr::getPtrToInt(ptr, T_size);
     auto ptrdiff = ConstantExpr::getSub(ptr, base);
     return sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
 }
 
 template<typename T>
-Constant *CloneCtx::emit_offset_table(const std::vector<T*> &vars, StringRef name) const
+static Constant *emit_offset_table(Module &M, Type *T_size, const std::vector<T*> &vars, StringRef name, StringRef suffix)
 {
     auto T_int32 = Type::getInt32Ty(M.getContext());
-    auto T_size = getSizeTy(M.getContext());
     uint32_t nvars = vars.size();
     Constant *base = nullptr;
     if (nvars > 0) {
         base = ConstantExpr::getBitCast(vars[0], T_size->getPointerTo());
-        add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
-                                       name + "_base",
-                                       base, &M));
+        auto ga = GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
+                                       name + "_base" + suffix,
+                                       base, &M);
+        ga->setVisibility(GlobalValue::HiddenVisibility);
+        ga->setDSOLocal(true);
     } else {
-        base = ConstantExpr::getNullValue(T_size->getPointerTo());
+        auto gv = new GlobalVariable(M, T_size, true, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), name + "_base" + suffix);
+        gv->setVisibility(GlobalValue::HiddenVisibility);
+        gv->setDSOLocal(true);
+        base = gv;
     }
     auto vbase = ConstantExpr::getPtrToInt(base, T_size);
     std::vector<Constant*> offsets(nvars + 1);
@@ -944,13 +903,15 @@ Constant *CloneCtx::emit_offset_table(const std::vector<T*> &vars, StringRef nam
     if (nvars > 0) {
         offsets[1] = ConstantInt::get(T_int32, 0);
         for (uint32_t i = 1; i < nvars; i++)
-            offsets[i + 1] = get_ptrdiff32(vars[i], vbase);
+            offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
     }
     ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
-    add_comdat(new GlobalVariable(M, vars_type, true,
+    auto gv = new GlobalVariable(M, vars_type, true,
                                   GlobalVariable::ExternalLinkage,
                                   ConstantArray::get(vars_type, offsets),
-                                  name + "_offsets"));
+                                  name + "_offsets" + suffix);
+    gv->setVisibility(GlobalValue::HiddenVisibility);
+    gv->setDSOLocal(true);
     return vbase;
 }
 
@@ -962,45 +923,25 @@ void CloneCtx::emit_metadata()
         return;
     }
 
+    StringRef suffix;
+    if (auto suffix_md = M.getModuleFlag("julia.mv.suffix")) {
+        suffix = cast<MDString>(suffix_md)->getString();
+    }
+
     // Store back the information about exported functions.
-    auto fbase = emit_offset_table(fvars, "jl_sysimg_fvars");
-    auto gbase = emit_offset_table(gvars, "jl_sysimg_gvars");
+    auto fbase = emit_offset_table(M, T_size, fvars, "jl_fvar", suffix);
+    auto gbase = emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
 
-    uint32_t ntargets = specs.size();
-    SmallVector<Target*, 8> targets(ntargets);
-    for (auto &grp: groups) {
-        targets[grp.idx] = &grp;
-        for (auto &tgt: grp.clones) {
-            targets[tgt.idx] = &tgt;
-        }
-    }
+    M.getGlobalVariable("jl_fvar_idxs")->setName("jl_fvar_idxs" + suffix);
+    M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs" + suffix);
 
-    // Generate `jl_dispatch_target_ids`
-    {
-        const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
-        std::vector<uint8_t> data;
-        auto push_i32 = [&] (uint32_t v) {
-            uint8_t buff[4];
-            memcpy(buff, &v, 4);
-            data.insert(data.end(), buff, buff + 4);
-        };
-        push_i32(ntargets);
-        for (uint32_t i = 0; i < ntargets; i++) {
-            push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
-            auto &specdata = specs[i].data;
-            data.insert(data.end(), specdata.begin(), specdata.end());
-        }
-        auto value = ConstantDataArray::get(M.getContext(), data);
-        add_comdat(new GlobalVariable(M, value->getType(), true,
-                                      GlobalVariable::ExternalLinkage,
-                                      value, "jl_dispatch_target_ids"));
-    }
+    uint32_t ntargets = specs.size();
 
     // Generate `jl_dispatch_reloc_slots`
     std::set<uint32_t> shared_relocs;
     {
         auto T_int32 = Type::getInt32Ty(M.getContext());
-        std::stable_sort(gv_relocs.begin(), gv_relocs.end(),
+        std::sort(gv_relocs.begin(), gv_relocs.end(),
                          [] (const std::pair<Constant*,uint32_t> &lhs,
                              const std::pair<Constant*,uint32_t> &rhs) {
                              return lhs.second < rhs.second;
@@ -1017,22 +958,22 @@ void CloneCtx::emit_metadata()
                  gv_reloc_idx++) {
                 shared_relocs.insert(id);
                 values.push_back(id_v);
-                values.push_back(get_ptrdiff32(gv_relocs[gv_reloc_idx].first, gbase));
+                values.push_back(get_ptrdiff32(T_size, gv_relocs[gv_reloc_idx].first, gbase));
             }
             auto it = const_relocs.find(id);
             if (it != const_relocs.end()) {
-                values.push_back(id_v);
-                values.push_back(get_ptrdiff32(it->second, gbase));
-            }
-            if (alias_relocs.find(id) != alias_relocs.end()) {
                 shared_relocs.insert(id);
+                values.push_back(id_v);
+                values.push_back(get_ptrdiff32(T_size, it->second, gbase));
             }
         }
         values[0] = ConstantInt::get(T_int32, values.size() / 2);
         ArrayType *vars_type = ArrayType::get(T_int32, values.size());
-        add_comdat(new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage,
+        auto gv = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage,
                                       ConstantArray::get(vars_type, values),
-                                      "jl_dispatch_reloc_slots"));
+                                      "jl_clone_slots" + suffix);
+        gv->setVisibility(GlobalValue::HiddenVisibility);
+        gv->setDSOLocal(true);
     }
 
     // Generate `jl_dispatch_fvars_idxs` and `jl_dispatch_fvars_offsets`
@@ -1040,7 +981,7 @@ void CloneCtx::emit_metadata()
         std::vector<uint32_t> idxs;
         std::vector<Constant*> offsets;
         for (uint32_t i = 0; i < ntargets; i++) {
-            auto tgt = targets[i];
+            auto tgt = linearized[i];
             auto &spec = specs[i];
             uint32_t len_idx = idxs.size();
             idxs.push_back(0); // We will fill in the real value later.
@@ -1049,49 +990,53 @@ void CloneCtx::emit_metadata()
                 auto grp = static_cast<Group*>(tgt);
                 count = jl_sysimg_tag_mask;
                 for (uint32_t j = 0; j < nfvars; j++) {
-                    if (shared_relocs.count(j) || tgt->relocs.count(j)) {
+                    if (shared_relocs.count(j)) {
                         count++;
                         idxs.push_back(j);
                     }
                     if (i != 0) {
-                        offsets.push_back(get_ptrdiff32(grp->base_func(fvars[j]), fbase));
+                        offsets.push_back(get_ptrdiff32(T_size, grp->base_func(fvars[j]), fbase));
                     }
                 }
             }
             else {
                 auto baseidx = spec.base;
-                auto grp = static_cast<Group*>(targets[baseidx]);
+                auto grp = static_cast<Group*>(linearized[baseidx]);
                 idxs.push_back(baseidx);
                 for (uint32_t j = 0; j < nfvars; j++) {
                     auto base_f = grp->base_func(fvars[j]);
-                    if (shared_relocs.count(j) || tgt->relocs.count(j)) {
+                    if (shared_relocs.count(j)) {
                         count++;
                         idxs.push_back(jl_sysimg_tag_mask | j);
                         auto f = map_get(*tgt->vmap, base_f, base_f);
-                        offsets.push_back(get_ptrdiff32(cast<Function>(f), fbase));
+                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
                     }
                     else if (auto f = map_get(*tgt->vmap, base_f)) {
                         count++;
                         idxs.push_back(j);
-                        offsets.push_back(get_ptrdiff32(cast<Function>(f), fbase));
+                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
                     }
                 }
             }
             idxs[len_idx] = count;
         }
         auto idxval = ConstantDataArray::get(M.getContext(), idxs);
-        add_comdat(new GlobalVariable(M, idxval->getType(), true,
+        auto gv1 = new GlobalVariable(M, idxval->getType(), true,
                                       GlobalVariable::ExternalLinkage,
-                                      idxval, "jl_dispatch_fvars_idxs"));
+                                      idxval, "jl_clone_idxs" + suffix);
+        gv1->setVisibility(GlobalValue::HiddenVisibility);
+        gv1->setDSOLocal(true);
         ArrayType *offsets_type = ArrayType::get(Type::getInt32Ty(M.getContext()), offsets.size());
-        add_comdat(new GlobalVariable(M, offsets_type, true,
+        auto gv2 = new GlobalVariable(M, offsets_type, true,
                                       GlobalVariable::ExternalLinkage,
                                       ConstantArray::get(offsets_type, offsets),
-                                      "jl_dispatch_fvars_offsets"));
+                                      "jl_clone_offsets" + suffix);
+        gv2->setVisibility(GlobalValue::HiddenVisibility);
+        gv2->setDSOLocal(true);
     }
 }
 
-static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars)
+static bool runMultiVersioning(Module &M, bool allow_bad_fvars)
 {
     // Group targets and identify cloning bases.
     // Also initialize function info maps (we'll update these maps as we go)
@@ -1101,28 +1046,35 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     //     * Cloned function -> Original function (add as we clone functions)
     //     * Original function -> Base function (target specific and updated by LLVM)
     //     * ID -> relocation slots (const).
-    if (M.getName() == "sysimage")
+    if (!M.getModuleFlag("julia.mv.enable")) {
         return false;
+    }
 
-    GlobalVariable *fvars = M.getGlobalVariable("jl_sysimg_fvars");
-    GlobalVariable *gvars = M.getGlobalVariable("jl_sysimg_gvars");
+    // for opt testing purposes
+    bool annotated = !!M.getModuleFlag("julia.mv.annotated");
+    if (!annotated) {
+        annotate_module_clones(M);
+    }
+
+    // also for opt testing purposes
+    if (M.getModuleFlag("julia.mv.skipcloning")) {
+        assert(!annotated && "Multiversioning was enabled and annotations were added, but cloning was skipped!");
+        return true;
+    }
+
+    GlobalVariable *fvars = M.getGlobalVariable("jl_fvars");
+    GlobalVariable *gvars = M.getGlobalVariable("jl_gvars");
     if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer()) ||
                             !gvars || !gvars->hasInitializer() || !isa<ConstantArray>(gvars->getInitializer())))
         return false;
 
-    CloneCtx clone(M, GetLI, GetCG, allow_bad_fvars);
+    CloneCtx clone(M, allow_bad_fvars);
 
-    // Collect a list of original functions and clone base functions
-    clone.clone_bases();
+    clone.prepare_slots();
 
-    // Collect function info (type of instruction used)
-    clone.collect_func_infos();
+    clone.clone_decls();
 
-    // If any partially cloned target exist decide which functions to clone for these targets.
-    // Clone functions for each group and collect a list of them.
-    // We can also add feature strings for cloned functions
-    // now that no additional cloning needs to be done.
-    clone.clone_all_partials();
+    clone.clone_bodies();
 
     // Scan **ALL** cloned functions (including full cloning for base target)
     // for global variables initialization use.
@@ -1130,6 +1082,10 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     // These relocations must be initialized for **ALL** targets.
     clone.fix_gv_uses();
 
+    // Now we have all the cloned functions, we can set the original functions'
+    // clone attribute to be 0
+    clone.finalize_orig_clone_attr();
+
     // For each group, scan all functions cloned by **PARTIALLY** cloned targets for
     // instruction use.
     // A function needs a const relocation slot if it is cloned and is called by a
@@ -1140,6 +1096,9 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     // A target needs a slot to be initialized iff at least one caller is not initialized.
     clone.fix_inst_uses();
 
+    //Now set the original functions' target-specific attributes, since nobody will look at those again
+    clone.finalize_orig_features();
+
     // Store back sysimg information with the correct format.
     // At this point, we should have fixed up all the uses of the cloned functions
     // and collected all the shared/target-specific relocations.
@@ -1159,24 +1118,12 @@ struct MultiVersioningLegacy: public ModulePass {
 
 private:
     bool runOnModule(Module &M) override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        AU.addRequired<LoopInfoWrapperPass>();
-        AU.addRequired<CallGraphWrapperPass>();
-        AU.addPreserved<LoopInfoWrapperPass>();
-    }
     bool allow_bad_fvars;
 };
 
 bool MultiVersioningLegacy::runOnModule(Module &M)
 {
-    auto GetLI = [this](Function &F) -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
-    };
-    auto GetCG = [this]() -> CallGraph & {
-        return getAnalysis<CallGraphWrapperPass>().getCallGraph();
-    };
-    return runMultiVersioning(M, GetLI, GetCG, allow_bad_fvars);
+    return runMultiVersioning(M, allow_bad_fvars);
 }
 
 
@@ -1187,40 +1134,15 @@ static RegisterPass<MultiVersioningLegacy> X("JuliaMultiVersioning", "JuliaMulti
 
 } // anonymous namespace
 
-void replaceUsesWithLoad(Function &F, function_ref<GlobalVariable *(Instruction &I)> should_replace, MDNode *tbaa_const) {
-    bool changed;
-    do {
-        changed = false;
-        for (auto uses = ConstantUses<Instruction>(&F, *F.getParent()); !uses.done(); uses.next()) {
-            auto info = uses.get_info();
-            auto use_i = info.val;
-            GlobalVariable *slot = should_replace(*use_i);
-            if (!slot)
-                continue;
-            Instruction *insert_before = use_i;
-            if (auto phi = dyn_cast<PHINode>(use_i))
-                insert_before = phi->getIncomingBlock(*info.use)->getTerminator();
-            Instruction *ptr = new LoadInst(F.getType(), slot, "", false, insert_before);
-            ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-            ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None));
-            use_i->setOperand(info.use->getOperandNo(),
-                                rewrite_inst_use(uses.get_stack(), ptr,
-                                                insert_before));
-            changed = true;
-        }
-    } while (changed);
+void multiversioning_preannotate(Module &M)
+{
+    annotate_module_clones(M);
+    M.addModuleFlag(Module::ModFlagBehavior::Error, "julia.mv.enable", 1);
 }
 
 PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM)
 {
-    auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-    auto GetLI = [&](Function &F) -> LoopInfo & {
-        return FAM.getResult<LoopAnalysis>(F);
-    };
-    auto GetCG = [&]() -> CallGraph & {
-        return AM.getResult<CallGraphAnalysis>(M);
-    };
-    if (runMultiVersioning(M, GetLI, GetCG, external_use)) {
+    if (runMultiVersioning(M, external_use)) {
         auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
         preserved.preserve<LoopAnalysis>();
         return preserved;
@@ -1233,7 +1155,8 @@ Pass *createMultiVersioningPass(bool allow_bad_fvars)
     return new MultiVersioningLegacy(allow_bad_fvars);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createMultiVersioningPass(false));
 }
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index 1e1ae4bc7eada..df3ffa5e27486 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -101,7 +101,8 @@ llvm::Function *JuliaPassContext::getOrDeclare(
     else {
         // Otherwise, we'll declare it and add it to the module.
         // Declare the function.
-        auto func = desc.declare(*this);
+        auto T_size = module->getDataLayout().getIntPtrType(module->getContext());
+        auto func = desc.declare(T_size);
         // Add it to the function list.
         module->getFunctionList().push_back(func);
         // Return the newly created function.
@@ -127,7 +128,7 @@ namespace jl_intrinsics {
     // Annotates a function with attributes suitable for GC allocation
     // functions. Specifically, the return value is marked noalias and nonnull.
     // The allocation size is set to the first argument.
-    static Function *addGCAllocAttributes(Function *target, LLVMContext &context)
+    static Function *addGCAllocAttributes(Function *target)
     {
         addRetAttr(target, Attribute::NoAlias);
         addRetAttr(target, Attribute::NonNull);
@@ -136,11 +137,13 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription getGCFrameSlot(
         GET_GC_FRAME_SLOT_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             return Function::Create(
                 FunctionType::get(
-                    PointerType::get(context.T_prjlvalue, 0),
-                    {PointerType::get(context.T_prjlvalue, 0), Type::getInt32Ty(context.getLLVMContext())},
+                    T_pprjlvalue,
+                    {T_pprjlvalue, Type::getInt32Ty(ctx)},
                     false),
                 Function::ExternalLinkage,
                 GET_GC_FRAME_SLOT_NAME);
@@ -148,26 +151,27 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription GCAllocBytes(
         GC_ALLOC_BYTES_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    context.T_prjlvalue,
-                    { Type::getInt8PtrTy(context.getLLVMContext()),
-                        sizeof(size_t) == sizeof(uint32_t) ?
-                        Type::getInt32Ty(context.getLLVMContext()) :
-                        Type::getInt64Ty(context.getLLVMContext()) },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_BYTES_NAME);
-            intrinsic->addFnAttr(Attribute::getWithAllocSizeArgs(context.getLLVMContext(), 1, None));
-            return addGCAllocAttributes(intrinsic, context.getLLVMContext());
+            intrinsic->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None));
+            return addGCAllocAttributes(intrinsic);
         });
 
     const IntrinsicDescription newGCFrame(
         NEW_GC_FRAME_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
-                FunctionType::get(PointerType::get(context.T_prjlvalue, 0), {Type::getInt32Ty(context.getLLVMContext())}, false),
+                FunctionType::get(T_pprjlvalue, {Type::getInt32Ty(ctx)}, false),
                 Function::ExternalLinkage,
                 NEW_GC_FRAME_NAME);
             addRetAttr(intrinsic, Attribute::NoAlias);
@@ -178,11 +182,13 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription pushGCFrame(
         PUSH_GC_FRAME_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             return Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    {PointerType::get(context.T_prjlvalue, 0), Type::getInt32Ty(context.getLLVMContext())},
+                    Type::getVoidTy(ctx),
+                    {T_pprjlvalue, Type::getInt32Ty(ctx)},
                     false),
                 Function::ExternalLinkage,
                 PUSH_GC_FRAME_NAME);
@@ -190,11 +196,13 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription popGCFrame(
         POP_GC_FRAME_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             return Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    {PointerType::get(context.T_prjlvalue, 0)},
+                    Type::getVoidTy(ctx),
+                    {T_pprjlvalue},
                     false),
                 Function::ExternalLinkage,
                 POP_GC_FRAME_NAME);
@@ -202,11 +210,13 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription queueGCRoot(
         QUEUE_GC_ROOT_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 QUEUE_GC_ROOT_NAME);
@@ -216,12 +226,12 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription safepoint(
         SAFEPOINT_NAME,
-        [](const JuliaPassContext &context) {
-            auto T_size = getSizeTy(context.getLLVMContext());
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
             auto T_psize = T_size->getPointerTo();
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
+                    Type::getVoidTy(ctx),
                     {T_psize},
                     false),
                 Function::ExternalLinkage,
@@ -233,11 +243,13 @@ namespace jl_intrinsics {
 #ifdef MMTK_GC
     const IntrinsicDescription writeBarrier1(
         WRITE_BARRIER_1_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 WRITE_BARRIER_1_NAME);
@@ -246,11 +258,13 @@ namespace jl_intrinsics {
         });
     const IntrinsicDescription writeBarrier2(
         WRITE_BARRIER_2_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue, context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue, T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 WRITE_BARRIER_2_NAME);
@@ -259,11 +273,13 @@ namespace jl_intrinsics {
         });
     const IntrinsicDescription writeBarrier1Slow(
         WRITE_BARRIER_1_SLOW_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 WRITE_BARRIER_1_SLOW_NAME);
@@ -272,11 +288,13 @@ namespace jl_intrinsics {
         });
     const IntrinsicDescription writeBarrier2Slow(
         WRITE_BARRIER_2_SLOW_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue, context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue, T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 WRITE_BARRIER_2_SLOW_NAME);
@@ -290,6 +308,7 @@ namespace jl_well_known {
     static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
     static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
     static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
+    static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);
 #ifdef MMTK_GC
     static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline);
     static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline);
@@ -301,42 +320,45 @@ namespace jl_well_known {
 
     const WellKnownFunctionDescription GCBigAlloc(
         GC_BIG_ALLOC_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto bigAllocFunc = Function::Create(
                 FunctionType::get(
-                    context.T_prjlvalue,
-                    { Type::getInt8PtrTy(context.getLLVMContext()),
-                        sizeof(size_t) == sizeof(uint32_t) ?
-                        Type::getInt32Ty(context.getLLVMContext()) :
-                        Type::getInt64Ty(context.getLLVMContext()) },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
                 GC_BIG_ALLOC_NAME);
-            bigAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(context.getLLVMContext(), 1, None));
-            return addGCAllocAttributes(bigAllocFunc, context.getLLVMContext());
+            bigAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None));
+            return addGCAllocAttributes(bigAllocFunc);
         });
 
     const WellKnownFunctionDescription GCPoolAlloc(
         GC_POOL_ALLOC_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto poolAllocFunc = Function::Create(
                 FunctionType::get(
-                    context.T_prjlvalue,
-                    { Type::getInt8PtrTy(context.getLLVMContext()), Type::getInt32Ty(context.getLLVMContext()), Type::getInt32Ty(context.getLLVMContext()) },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx) },
                     false),
                 Function::ExternalLinkage,
                 GC_POOL_ALLOC_NAME);
-            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(context.getLLVMContext(), 2, None));
-            return addGCAllocAttributes(poolAllocFunc, context.getLLVMContext());
+            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
+            return addGCAllocAttributes(poolAllocFunc);
         });
 
     const WellKnownFunctionDescription GCQueueRoot(
         GC_QUEUE_ROOT_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto func = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 GC_QUEUE_ROOT_NAME);
@@ -344,14 +366,34 @@ namespace jl_well_known {
             return func;
         });
 
+    const WellKnownFunctionDescription GCAllocTyped(
+        GC_ALLOC_TYPED_NAME,
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
+            auto allocTypedFunc = Function::Create(
+                FunctionType::get(
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx),
+                        T_size,
+                        Type::getInt8PtrTy(ctx) },
+                    false),
+                Function::ExternalLinkage,
+                GC_ALLOC_TYPED_NAME);
+            allocTypedFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None));
+            return addGCAllocAttributes(allocTypedFunc);
+        });
+
 #ifdef MMTK_GC
     const WellKnownFunctionDescription GCWriteBarrier1(
         GC_WB_1_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto func = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 GC_WB_1_NAME);
@@ -361,11 +403,13 @@ namespace jl_well_known {
 
     const WellKnownFunctionDescription GCWriteBarrier2(
         GC_WB_2_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto func = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue, context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue, T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 GC_WB_2_NAME);
@@ -375,11 +419,13 @@ namespace jl_well_known {
 
     const WellKnownFunctionDescription GCWriteBarrier1Slow(
         GC_WB_1_SLOW_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto func = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 GC_WB_1_SLOW_NAME);
@@ -389,11 +435,13 @@ namespace jl_well_known {
 
     const WellKnownFunctionDescription GCWriteBarrier2Slow(
         GC_WB_2_SLOW_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto func = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue, context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue, T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 GC_WB_2_SLOW_NAME);
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index d6e4be7e05338..c7995fcc9becc 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -20,7 +20,7 @@ namespace jl_intrinsics {
     // intrinsics and declare new intrinsics if necessary.
     struct IntrinsicDescription final {
         // The type of function that declares an intrinsic.
-        typedef llvm::Function *(*DeclarationFunction)(const JuliaPassContext&) JL_NOTSAFEPOINT;
+        typedef llvm::Function *(*DeclarationFunction)(llvm::Type *T_size) JL_NOTSAFEPOINT;
 
         // Creates an intrinsic description with a particular
         // name and declaration function.
@@ -157,6 +157,9 @@ namespace jl_well_known {
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
 
+    // `jl_gc_alloc_typed`: allocates bytes.
+    extern const WellKnownFunctionDescription GCAllocTyped;
+
 #ifdef MMTK_GC
     extern const WellKnownFunctionDescription GCWriteBarrier1;
     extern const WellKnownFunctionDescription GCWriteBarrier2;
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index 91bec48bca861..2158109cea120 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -330,7 +330,8 @@ PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysi
     }
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createPropagateJuliaAddrspaces());
 }
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index ea92e1709c597..84f8d7121ff03 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -9,6 +9,7 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/Triple.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
@@ -37,25 +38,27 @@ namespace {
 
 struct LowerPTLS {
     LowerPTLS(Module &M, bool imaging_mode=false)
-        : imaging_mode(imaging_mode), M(&M)
+        : imaging_mode(imaging_mode), M(&M), TargetTriple(M.getTargetTriple())
     {}
 
     bool run(bool *CFGModified);
 private:
     const bool imaging_mode;
     Module *M;
+    Triple TargetTriple;
     MDNode *tbaa_const{nullptr};
     MDNode *tbaa_gcframe{nullptr};
     FunctionType *FT_pgcstack_getter{nullptr};
     PointerType *T_pgcstack_getter{nullptr};
     PointerType *T_pppjlvalue{nullptr};
+    Type *T_size{nullptr};
     GlobalVariable *pgcstack_func_slot{nullptr};
     GlobalVariable *pgcstack_key_slot{nullptr};
     GlobalVariable *pgcstack_offset{nullptr};
     void set_pgcstack_attrs(CallInst *pgcstack) const;
     Instruction *emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const;
     template<typename T> T *add_comdat(T *G) const;
-    GlobalVariable *create_aliased_global(Type *T, StringRef name) const;
+    GlobalVariable *create_hidden_global(Type *T, StringRef name) const;
     void fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, bool or_new, bool *CFGModified);
 };
 
@@ -68,25 +71,17 @@ void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
 Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const
 {
     Value *tls;
-#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
-    if (insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
+    if (TargetTriple.isX86() && insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
         // Workaround LLVM bug by hiding the offset computation
         // (and therefore the optimization opportunity) from LLVM.
         // Ref https://github.com/JuliaLang/julia/issues/17288
-        static const std::string const_asm_str = [&] () {
-            std::string stm;
-#  if defined(_CPU_X86_64_)
-            raw_string_ostream(stm) << "movq %fs:0, $0;\naddq $$" << jl_tls_offset << ", $0";
-#  else
-            raw_string_ostream(stm) << "movl %gs:0, $0;\naddl $$" << jl_tls_offset << ", $0";
-#  endif
-            return stm;
-        }();
-#  if defined(_CPU_X86_64_)
-        const char *dyn_asm_str = "movq %fs:0, $0;\naddq $1, $0";
-#  else
-        const char *dyn_asm_str = "movl %gs:0, $0;\naddl $1, $0";
-#  endif
+        std::string const_asm_str;
+        raw_string_ostream(const_asm_str) << (TargetTriple.getArch() == Triple::x86_64 ?
+            "movq %fs:0, $0;\naddq $$" : "movl %gs:0, $0;\naddl $$")
+            << jl_tls_offset << ", $0";
+        const char *dyn_asm_str = TargetTriple.getArch() == Triple::x86_64 ?
+            "movq %fs:0, $0;\naddq $1, $0" :
+            "movl %gs:0, $0;\naddl $1, $0";
 
         // The add instruction clobbers flags
         if (offset) {
@@ -102,30 +97,27 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
                                      false);
             tls = CallInst::Create(tp, "pgcstack_i8", insertBefore);
         }
-    }
-    else
-#endif
-    {
+    } else {
         // AArch64/ARM doesn't seem to have this issue.
         // (Possibly because there are many more registers and the offset is
         // positive and small)
         // It's also harder to emit the offset in a generic way on ARM/AArch64
         // (need to generate one or two `add` with shift) so let llvm emit
         // the add for now.
-#if defined(_CPU_AARCH64_)
-        const char *asm_str = "mrs $0, tpidr_el0";
-#elif defined(__ARM_ARCH) && __ARM_ARCH >= 7
-        const char *asm_str = "mrc p15, 0, $0, c13, c0, 3";
-#elif defined(_CPU_X86_64_)
-        const char *asm_str = "movq %fs:0, $0";
-#elif defined(_CPU_X86_)
-        const char *asm_str = "movl %gs:0, $0";
-#else
-        const char *asm_str = nullptr;
-        assert(0 && "Cannot emit thread pointer for this architecture.");
-#endif
+        const char *asm_str;
+        if (TargetTriple.isAArch64()) {
+            asm_str = "mrs $0, tpidr_el0";
+        } else if (TargetTriple.isARM()) {
+            asm_str = "mrc p15, 0, $0, c13, c0, 3";
+        } else if (TargetTriple.getArch() == Triple::x86_64) {
+            asm_str = "movq %fs:0, $0";
+        } else if (TargetTriple.getArch() == Triple::x86) {
+            asm_str = "movl %gs:0, $0";
+        } else {
+            llvm_unreachable("Cannot emit thread pointer for this architecture.");
+        }
         if (!offset)
-            offset = ConstantInt::getSigned(getSizeTy(insertBefore->getContext()), jl_tls_offset);
+            offset = ConstantInt::getSigned(T_size, jl_tls_offset);
         auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), asm_str, "=r", false);
         tls = CallInst::Create(tp, "thread_ptr", insertBefore);
         tls = GetElementPtrInst::Create(Type::getInt8Ty(insertBefore->getContext()), tls, {offset}, "ppgcstack_i8", insertBefore);
@@ -134,32 +126,15 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
     return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore);
 }
 
-GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const
+GlobalVariable *LowerPTLS::create_hidden_global(Type *T, StringRef name) const
 {
-    // Create a static global variable and points a global alias to it so that
-    // the address is visible externally but LLVM can still assume that the
-    // address of this variable doesn't need dynamic relocation
-    // (can be accessed with a single PC-rel load).
-    auto GV = new GlobalVariable(*M, T, false, GlobalVariable::InternalLinkage,
-                                 Constant::getNullValue(T), name + ".real");
-    add_comdat(GlobalAlias::create(T, 0, GlobalVariable::ExternalLinkage,
-                                   name, GV, M));
+    auto GV = new GlobalVariable(*M, T, false, GlobalVariable::ExternalLinkage,
+                                 nullptr, name);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+    GV->setDSOLocal(true);
     return GV;
 }
 
-template<typename T>
-inline T *LowerPTLS::add_comdat(T *G) const
-{
-#if defined(_OS_WINDOWS_)
-    // add __declspec(dllexport) to everything marked for export
-    if (G->getLinkage() == GlobalValue::ExternalLinkage)
-        G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
-    else
-        G->setDLLStorageClass(GlobalValue::DefaultStorageClass);
-#endif
-    return G;
-}
-
 void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, bool or_new, bool *CFGModified)
 {
     if (pgcstack->use_empty()) {
@@ -185,6 +160,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         SmallVector<uint32_t, 2> Weights{9, 1};
         TerminatorInst *fastTerm;
         TerminatorInst *slowTerm;
+        assert(pgcstack->getType()); // Static analyzer
         auto cmp = new ICmpInst(phi, CmpInst::ICMP_NE, pgcstack, Constant::getNullValue(pgcstack->getType()));
         SplitBlockAndInsertIfThenElse(cmp, phi, &fastTerm, &slowTerm,
                                       MDB.createBranchWeights(Weights));
@@ -207,7 +183,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         IRBuilder<> builder(fastTerm->getParent());
         fastTerm->removeFromParent();
         MDNode *tbaa = tbaa_gcframe;
-        Value *prior = emit_gc_unsafe_enter(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
+        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
         builder.Insert(fastTerm);
         phi->addIncoming(pgcstack, fastTerm->getParent());
         // emit pre-return cleanup
@@ -219,7 +195,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             for (auto &BB : *pgcstack->getParent()->getParent()) {
                 if (isa<ReturnInst>(BB.getTerminator())) {
                     IRBuilder<> builder(BB.getTerminator());
-                    emit_gc_unsafe_leave(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
+                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
                 }
             }
         }
@@ -231,7 +207,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             //     pgcstack = tp + offset; // fast
             // else
             //     pgcstack = getter();    // slow
-            auto offset = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_offset, "", false, pgcstack);
+            auto offset = new LoadInst(T_size, pgcstack_offset, "", false, pgcstack);
             offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
             auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset,
@@ -267,18 +243,18 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
         getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
         getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
-#if defined(_OS_DARWIN_)
-        auto key = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_key_slot, "", false, pgcstack);
-        key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-        key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
-        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack);
-        new_pgcstack->takeName(pgcstack);
-        pgcstack->replaceAllUsesWith(new_pgcstack);
-        pgcstack->eraseFromParent();
-        pgcstack = new_pgcstack;
-#else
-        pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
-#endif
+        if (TargetTriple.isOSDarwin()) {
+            auto key = new LoadInst(T_size, pgcstack_key_slot, "", false, pgcstack);
+            key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+            key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
+            auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack);
+            new_pgcstack->takeName(pgcstack);
+            pgcstack->replaceAllUsesWith(new_pgcstack);
+            pgcstack->eraseFromParent();
+            pgcstack = new_pgcstack;
+        } else {
+            pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
+        }
         set_pgcstack_attrs(pgcstack);
     }
     else if (jl_tls_offset != -1) {
@@ -290,19 +266,19 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         jl_get_pgcstack_func *f;
         jl_pgcstack_key_t k;
         jl_pgcstack_getkey(&f, &k);
-        Constant *val = ConstantInt::get(getSizeTy(pgcstack->getContext()), (uintptr_t)f);
+        Constant *val = ConstantInt::get(T_size, (uintptr_t)f);
         val = ConstantExpr::getIntToPtr(val, T_pgcstack_getter);
-#if defined(_OS_DARWIN_)
-        assert(sizeof(k) == sizeof(uintptr_t));
-        Constant *key = ConstantInt::get(getSizeTy(pgcstack->getContext()), (uintptr_t)k);
-        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
-        new_pgcstack->takeName(pgcstack);
-        pgcstack->replaceAllUsesWith(new_pgcstack);
-        pgcstack->eraseFromParent();
-        pgcstack = new_pgcstack;
-#else
-        pgcstack->setCalledFunction(pgcstack->getFunctionType(), val);
-#endif
+        if (TargetTriple.isOSDarwin()) {
+            assert(sizeof(k) == sizeof(uintptr_t));
+            Constant *key = ConstantInt::get(T_size, (uintptr_t)k);
+            auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
+            new_pgcstack->takeName(pgcstack);
+            pgcstack->replaceAllUsesWith(new_pgcstack);
+            pgcstack->eraseFromParent();
+            pgcstack = new_pgcstack;
+        } else {
+            pgcstack->setCalledFunction(pgcstack->getFunctionType(), val);
+        }
         set_pgcstack_attrs(pgcstack);
     }
 }
@@ -318,18 +294,19 @@ bool LowerPTLS::run(bool *CFGModified)
         if (need_init) {
             tbaa_const = tbaa_make_child_with_context(M->getContext(), "jtbaa_const", nullptr, true).first;
             tbaa_gcframe = tbaa_make_child_with_context(M->getContext(), "jtbaa_gcframe").first;
+            T_size = M->getDataLayout().getIntPtrType(M->getContext());
 
             FT_pgcstack_getter = pgcstack_getter->getFunctionType();
-#if defined(_OS_DARWIN_)
-            assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
-            FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {getSizeTy(M->getContext())}, false);
-#endif
+            if (TargetTriple.isOSDarwin()) {
+                assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
+                FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {T_size}, false);
+            }
             T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
             T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
             if (imaging_mode) {
-                pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
-                pgcstack_key_slot = create_aliased_global(getSizeTy(M->getContext()), "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
-                pgcstack_offset = create_aliased_global(getSizeTy(M->getContext()), "jl_tls_offset");
+                pgcstack_func_slot = create_hidden_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
+                pgcstack_key_slot = create_hidden_global(T_size, "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
+                pgcstack_offset = create_hidden_global(T_size, "jl_tls_offset");
             }
             need_init = false;
         }
@@ -395,7 +372,8 @@ Pass *createLowerPTLSPass(bool imaging_mode)
     return new LowerPTLSLegacy(imaging_mode);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
 {
     unwrap(PM)->add(createLowerPTLSPass(imaging_mode));
 }
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index e263467ba600c..b964c20e3353e 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -51,7 +51,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
             else {
                 //Remove once opaque pointer transition is complete
                 DstTy = PointerType::get(
-                        remapType(Ty->getPointerElementType()),
+                        remapType(Ty->getNonOpaquePointerElementType()),
                         ASRemapper(Ty->getAddressSpace()));
             }
         }
@@ -161,7 +161,7 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                     auto ptrty = cast<PointerType>(Src->getType()->getScalarType());
                     //Remove once opaque pointer transition is complete
                     if (!ptrty->isOpaque()) {
-                        Type *SrcTy = remapType(ptrty->getPointerElementType());
+                        Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType());
                         DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
                     }
                 }
@@ -343,11 +343,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
         for (auto MD : MDs)
             NGV->addMetadata(
                     MD.first,
-#if JL_LLVM_VERSION >= 130000
                     *MapMetadata(MD.second, VMap));
-#else
-                    *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
-#endif
 
         copyComdat(NGV, GV);
 
@@ -356,11 +352,9 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
     // Similarly, copy over and rewrite function bodies
     for (Function *F : Functions) {
-        if (F->isDeclaration())
-            continue;
-
         Function *NF = cast<Function>(VMap[F]);
         LLVM_DEBUG(dbgs() << "Processing function " << NF->getName() << "\n");
+        // we also need this to run for declarations, or attributes won't be copied
 
         Function::arg_iterator DestI = NF->arg_begin();
         for (Function::const_arg_iterator I = F->arg_begin(); I != F->arg_end();
@@ -374,46 +368,29 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
                 NF,
                 F,
                 VMap,
-#if JL_LLVM_VERSION >= 130000
                 CloneFunctionChangeType::GlobalChanges,
-#else
-                /*ModuleLevelChanges=*/true,
-#endif
                 Returns,
                 "",
                 nullptr,
                 &TypeRemapper,
                 &Materializer);
 
-        // CloneFunctionInto unconditionally copies the attributes from F to NF,
-        // without considering e.g. the byval attribute type.
+        // Update function attributes that contain types
         AttributeList Attrs = F->getAttributes();
         LLVMContext &C = F->getContext();
         for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
             for (Attribute::AttrKind TypedAttr :
                  {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
-#if JL_LLVM_VERSION >= 140000
                 auto Attr = Attrs.getAttributeAtIndex(i, TypedAttr);
-#else
-                auto Attr = Attrs.getAttribute(i, TypedAttr);
-#endif
                 if (Type *Ty = Attr.getValueAsType()) {
-#if JL_LLVM_VERSION >= 140000
                     Attrs = Attrs.replaceAttributeTypeAtIndex(
                         C, i, TypedAttr, TypeRemapper.remapType(Ty));
-#else
-                    Attrs = Attrs.replaceAttributeType(
-                        C, i, TypedAttr, TypeRemapper.remapType(Ty));
-#endif
                     break;
                 }
             }
         }
         NF->setAttributes(Attrs);
 
-        if (F->hasPersonalityFn())
-            NF->setPersonalityFn(MapValue(F->getPersonalityFn(), VMap));
-
         copyComdat(NF, F);
 
         RemoveNoopAddrSpaceCasts(NF);
@@ -537,7 +514,8 @@ PreservedAnalyses RemoveJuliaAddrspacesPass::run(Module &M, ModuleAnalysisManage
     return RemoveAddrspacesPass(removeJuliaAddrspaces).run(M, AM);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createRemoveJuliaAddrspacesPass());
 }
diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp
index d9e3357524a9a..b767074202eb2 100644
--- a/src/llvm-remove-ni.cpp
+++ b/src/llvm-remove-ni.cpp
@@ -68,7 +68,8 @@ Pass *createRemoveNIPass()
     return new RemoveNILegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createRemoveNIPass());
 }
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index 2f0375e39e1a3..fcb05ba7c6805 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -20,6 +20,7 @@
 
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/LoopPass.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Metadata.h>
@@ -42,6 +43,11 @@ STATISTIC(MaxChainLength, "Max length of reduction chain");
 STATISTIC(AddChains, "Addition reduction chains");
 STATISTIC(MulChains, "Multiply reduction chains");
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
 namespace {
 
 static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFEPOINT
@@ -67,7 +73,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE
 /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
 /// mark the ops as permitting reassociation/commuting.
 /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
-static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) JL_NOTSAFEPOINT
+static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
 {
     typedef SmallVector<Instruction*, 8> chainVector;
     chainVector chain;
@@ -81,6 +87,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) JL_NOTSAFEPOIN
             if (L->contains(U)) {
                 if (J) {
                     LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n");
+                    REMARK([&]() {
+                        return OptimizationRemarkMissed(DEBUG_TYPE, "NotReductionVar", U)
+                               << "not a reduction variable because operation has two internal uses";
+                    });
                     return;
                 }
                 J = U;
@@ -88,6 +98,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) JL_NOTSAFEPOIN
         }
         if (!J) {
             LLVM_DEBUG(dbgs() << "LSL: chain prematurely terminated at " << *I << "\n");
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "ChainPrematurelyTerminated", I)
+                       << "chain prematurely terminated at " << ore::NV("Instruction", I);
+            });
             return;
         }
         if (J == Phi) {
@@ -98,6 +112,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) JL_NOTSAFEPOIN
             // Check that arithmetic op matches prior arithmetic ops in the chain.
             if (getReduceOpcode(J, I) != opcode) {
                 LLVM_DEBUG(dbgs() << "LSL: chain broke at " << *J << " because of wrong opcode\n");
+                REMARK([&](){
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "ChainBroke", J)
+                           << "chain broke at " << ore::NV("Instruction", J) << " because of wrong opcode";
+                });
                 return;
             }
         }
@@ -106,6 +124,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) JL_NOTSAFEPOIN
             opcode = getReduceOpcode(J, I);
             if (!opcode) {
                 LLVM_DEBUG(dbgs() << "LSL: first arithmetic op in chain is uninteresting" << *J << "\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "FirstArithmeticOpInChainIsUninteresting", J)
+                           << "first arithmetic op in chain is uninteresting";
+                });
                 return;
             }
         }
@@ -123,6 +145,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) JL_NOTSAFEPOIN
     int length = 0;
     for (chainVector::const_iterator K=chain.begin(); K!=chain.end(); ++K) {
         LLVM_DEBUG(dbgs() << "LSL: marking " << **K << "\n");
+        REMARK([&]() {
+            return OptimizationRemark(DEBUG_TYPE, "MarkedUnsafeAlgebra", *K)
+                   << "marked unsafe algebra on " << ore::NV("Instruction", *K);
+        });
         (*K)->setFast(true);
         ++length;
     }
@@ -139,11 +165,14 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
         Instruction *I = cast<Instruction>(U);
         ToDelete.push_back(I);
 
-        LoopInfo &LI = GetLI(*I->getParent()->getParent());
-        Loop *L = LI.getLoopFor(I->getParent());
-        I->removeFromParent();
-        if (!L)
+        BasicBlock *B = I->getParent();
+        OptimizationRemarkEmitter ORE(B->getParent());
+        LoopInfo &LI = GetLI(*B->getParent());
+        Loop *L = LI.getLoopFor(B);
+        if (!L) {
+            I->removeFromParent();
             continue;
+        }
 
         LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n");
         bool simd = false;
@@ -182,6 +211,11 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
 
         LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
 
+        REMARK([=]() {
+            return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", I->getDebugLoc(), B)
+                << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }";
+        });
+
         MDNode *n = L->getLoopID();
         if (n) {
             // Loop already has a LoopID so copy over Metadata
@@ -220,12 +254,14 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
             // Mark floating-point reductions as okay to reassociate/commute.
             for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
                 if (PHINode *Phi = dyn_cast<PHINode>(I))
-                    enableUnsafeAlgebraIfReduction(Phi, L);
+                    enableUnsafeAlgebraIfReduction(Phi, L, ORE);
                 else
                     break;
             }
         }
 
+        I->removeFromParent();
+
         Changed = true;
     }
 
@@ -311,12 +347,13 @@ static RegisterPass<LowerSIMDLoopLegacy> X("LowerSIMDLoop", "LowerSIMDLoop Pass"
                                      false /* Only looks at CFG */,
                                      false /* Analysis Pass */);
 
-JL_DLLEXPORT Pass *createLowerSimdLoopPass()
+Pass *createLowerSimdLoopPass()
 {
     return new LowerSIMDLoopLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT_CODEGEN
+void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLowerSimdLoopPass());
 }
diff --git a/src/llvm-version.h b/src/llvm-version.h
index 4e15e787b7de8..01638b8d44a6e 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -2,6 +2,7 @@
 
 #include <llvm/Config/llvm-config.h>
 #include "julia_assert.h"
+#include "platform.h"
 
 // The LLVM version used, JL_LLVM_VERSION, is represented as a 5-digit integer
 // of the form ABBCC, where A is the major version, B is minor, and C is patch.
@@ -9,14 +10,23 @@
 #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \
                         + LLVM_VERSION_PATCH)
 
-#if JL_LLVM_VERSION < 120000
-    #error Only LLVM versions >= 12.0.0 are supported by Julia
+#if JL_LLVM_VERSION < 140000
+    #error Only LLVM versions >= 14.0.0 are supported by Julia
 #endif
 
-#if JL_LLVM_VERSION >= 150000
+#if JL_LLVM_VERSION >= 160000
 #define JL_LLVM_OPAQUE_POINTERS 1
 #endif
 
+// Pre GCC 12 libgcc defined the ABI for Float16->Float32
+// to take an i16. GCC 12 silently changed the ABI to now pass
+// Float16 in Float32 registers.
+#if JL_LLVM_VERSION < 150000 || defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+#define JULIA_FLOAT16_ABI 1
+#else
+#define JULIA_FLOAT16_ABI 2
+#endif
+
 #ifdef __cplusplus
 #if defined(__GNUC__) && (__GNUC__ >= 9)
 // Added in GCC 9, this warning is annoying
diff --git a/src/llvmcalltest.cpp b/src/llvmcalltest.cpp
index 352c4695f2f20..93c442445d79a 100644
--- a/src/llvmcalltest.cpp
+++ b/src/llvmcalltest.cpp
@@ -17,11 +17,7 @@ using namespace llvm;
 #ifdef _OS_WINDOWS_
 #  define DLLEXPORT __declspec(dllexport)
 #else
-# if defined(_OS_LINUX_)
-#  define DLLEXPORT __attribute__ ((visibility("protected")))
-# else
 #  define DLLEXPORT __attribute__ ((visibility("default")))
-# endif
 #endif
 
 extern "C" {
diff --git a/src/mach_dyld_atfork.tbd b/src/mach_dyld_atfork.tbd
index 9a5d18099dbcf..c2cda4417ec38 100644
--- a/src/mach_dyld_atfork.tbd
+++ b/src/mach_dyld_atfork.tbd
@@ -21,5 +21,6 @@ install-name:    '/usr/lib/libSystem.B.dylib'
 exports:
   - targets:         [ arm64-macos, arm64e-macos, x86_64-macos, x86_64-maccatalyst,
                        arm64-maccatalyst, arm64e-maccatalyst ]
-    symbols:         [ __dyld_atfork_parent, __dyld_atfork_prepare ]
+    symbols:         [ __dyld_atfork_parent, __dyld_atfork_prepare,
+                      __dyld_dlopen_atfork_parent, __dyld_dlopen_atfork_prepare ]
 ...
diff --git a/src/method.c b/src/method.c
index b1f4051e28a82..c207149032fb9 100644
--- a/src/method.c
+++ b/src/method.c
@@ -18,6 +18,7 @@ extern "C" {
 extern jl_value_t *jl_builtin_getfield;
 extern jl_value_t *jl_builtin_tuple;
 jl_methtable_t *jl_kwcall_mt;
+jl_method_t *jl_opaque_closure_method;
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
     int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
@@ -99,14 +100,16 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                     jl_error("opaque_closure_method: invalid syntax");
                 }
                 jl_value_t *name = jl_exprarg(e, 0);
-                jl_value_t *nargs = jl_exprarg(e, 1);
+                jl_value_t *oc_nargs = jl_exprarg(e, 1);
                 int isva = jl_exprarg(e, 2) == jl_true;
                 jl_value_t *functionloc = jl_exprarg(e, 3);
                 jl_value_t *ci = jl_exprarg(e, 4);
                 if (!jl_is_code_info(ci)) {
                     jl_error("opaque_closure_method: lambda should be a CodeInfo");
+                } else if (!jl_is_long(oc_nargs)) {
+                    jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
                 }
-                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(nargs), functionloc, (jl_code_info_t*)ci, isva);
+                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva);
                 return (jl_value_t*)m;
             }
             if (e->head == jl_cfunction_sym) {
@@ -127,7 +130,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -139,7 +142,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -160,7 +163,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate ccall return type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -172,7 +175,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -312,9 +315,7 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             jl_array_t *meta = ((jl_expr_t*)st)->args;
             for (k = 0; k < na; k++) {
                 jl_value_t *ma = jl_array_ptr_ref(meta, k);
-                if (ma == (jl_value_t*)jl_pure_sym)
-                    li->pure = 1;
-                else if (ma == (jl_value_t*)jl_inline_sym)
+                if (ma == (jl_value_t*)jl_inline_sym)
                     li->inlining = 1;
                 else if (ma == (jl_value_t*)jl_noinline_sym)
                     li->inlining = 2;
@@ -449,6 +450,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     mi->callbacks = NULL;
     jl_atomic_store_relaxed(&mi->cache, NULL);
     mi->inInference = 0;
+    mi->cache_with_orig = 0;
     jl_atomic_store_relaxed(&mi->precompiled, 0);
     return mi;
 }
@@ -474,7 +476,6 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     src->max_world = ~(size_t)0;
     src->inferred = 0;
     src->propagate_inbounds = 0;
-    src->pure = 0;
     src->has_fcall = 0;
     src->edges = jl_nothing;
     src->constprop = 0;
@@ -494,45 +495,46 @@ jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
     return src;
 }
 
-void jl_add_function_name_to_lineinfo(jl_code_info_t *ci, jl_value_t *name)
+void jl_add_function_to_lineinfo(jl_code_info_t *ci, jl_value_t *func)
 {
+    // func may contain jl_symbol (function name), jl_method_t, or jl_method_instance_t
     jl_array_t *li = (jl_array_t*)ci->linetable;
     size_t i, n = jl_array_len(li);
     jl_value_t *rt = NULL, *lno = NULL, *inl = NULL;
     JL_GC_PUSH3(&rt, &lno, &inl);
     for (i = 0; i < n; i++) {
         jl_value_t *ln = jl_array_ptr_ref(li, i);
-        assert(jl_typeis(ln, jl_lineinfonode_type));
+        assert(jl_typetagis(ln, jl_lineinfonode_type));
         jl_value_t *mod = jl_fieldref_noalloc(ln, 0);
         jl_value_t *file = jl_fieldref_noalloc(ln, 2);
         lno = jl_fieldref(ln, 3);
         inl = jl_fieldref(ln, 4);
         // respect a given linetable if available
-        jl_value_t *ln_name = jl_fieldref_noalloc(ln, 1);
-        if (jl_is_symbol(ln_name) && (jl_sym_t*)ln_name == jl_symbol("none") && jl_is_int32(inl) && jl_unbox_int32(inl) == 0)
-            ln_name = name;
-        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_name, file, lno, inl);
+        jl_value_t *ln_func = jl_fieldref_noalloc(ln, 1);
+        if (jl_is_symbol(ln_func) && (jl_sym_t*)ln_func == jl_symbol("none") && jl_is_int32(inl) && jl_unbox_int32(inl) == 0)
+            ln_func = func;
+        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_func, file, lno, inl);
         jl_array_ptr_set(li, i, rt);
     }
     JL_GC_POP();
 }
 
 // invoke (compiling if necessary) the jlcall function pointer for a method template
-STATIC_INLINE jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator, jl_svec_t *sparam_vals,
-                                         jl_value_t **args, uint32_t nargs)
+static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
+        size_t world, jl_svec_t *sparam_vals, jl_value_t **args, uint32_t nargs)
 {
     size_t n_sparams = jl_svec_len(sparam_vals);
     jl_value_t **gargs;
-    size_t totargs = 1 + n_sparams + nargs + def->isva;
+    size_t totargs = 2 + n_sparams + def->nargs;
     JL_GC_PUSHARGS(gargs, totargs);
-    gargs[0] = generator;
-    memcpy(&gargs[1], jl_svec_data(sparam_vals), n_sparams * sizeof(void*));
-    memcpy(&gargs[1 + n_sparams], args, nargs * sizeof(void*));
-    if (def->isva) {
-        gargs[totargs-1] = jl_f_tuple(NULL, &gargs[1 + n_sparams + def->nargs - 1], nargs - (def->nargs - 1));
-        gargs[1 + n_sparams + def->nargs - 1] = gargs[totargs - 1];
-    }
-    jl_value_t *code = jl_apply(gargs, 1 + n_sparams + def->nargs);
+    gargs[0] = jl_box_ulong(world);
+    gargs[1] = jl_box_long(def->line);
+    gargs[1] = jl_new_struct(jl_linenumbernode_type, gargs[1], def->file);
+    memcpy(&gargs[2], jl_svec_data(sparam_vals), n_sparams * sizeof(void*));
+    memcpy(&gargs[2 + n_sparams], args, (def->nargs - def->isva) * sizeof(void*));
+    if (def->isva)
+        gargs[totargs - 1] = jl_f_tuple(NULL, &args[def->nargs - 1], nargs - def->nargs + 1);
+    jl_value_t *code = jl_apply_generic(generator, gargs, totargs);
     JL_GC_POP();
     return code;
 }
@@ -556,7 +558,7 @@ JL_DLLEXPORT jl_code_info_t *jl_expand_and_resolve(jl_value_t *ex, jl_module_t *
 
 // Return a newly allocated CodeInfo for the function signature
 // effectively described by the tuple (specTypes, env, Method) inside linfo
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world)
 {
     jl_value_t *uninferred = jl_atomic_load_relaxed(&linfo->uninferred);
     if (uninferred) {
@@ -564,9 +566,10 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
         return (jl_code_info_t*)jl_copy_ast((jl_value_t*)uninferred);
     }
 
-    JL_TIMING(STAGED_FUNCTION);
+    JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION);
     jl_value_t *tt = linfo->specTypes;
     jl_method_t *def = linfo->def.method;
+    jl_timing_show_method_instance(linfo, JL_TIMING_CURRENT_BLOCK);
     jl_value_t *generator = def->generator;
     assert(generator != NULL);
     assert(jl_is_method(def));
@@ -580,13 +583,13 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
     JL_TRY {
         ct->ptls->in_pure_callback = 1;
-        // and the right world
         ct->world_age = def->primary_world;
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
-        ex = jl_call_staged(def, generator, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
+        ex = jl_call_staged(def, generator, world, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
 
+        // do some post-processing
         if (jl_is_code_info(ex)) {
             func = (jl_code_info_t*)ex;
             jl_array_t *stmts = (jl_array_t*)func->code;
@@ -603,14 +606,15 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
                 jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
             }
         }
-
-        jl_add_function_name_to_lineinfo(func, (jl_value_t*)def->name);
+        jl_add_function_to_lineinfo(func, (jl_value_t*)def->name);
 
         // If this generated function has an opaque closure, cache it for
         // correctness of method identity
         for (int i = 0; i < jl_array_len(func->code); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
             if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
+                if (jl_options.incremental && jl_generating_output())
+                    jl_error("Impossible to correctly handle OpaqueClosure inside @generated returned during precompile process.");
                 jl_value_t *uninferred = jl_copy_ast((jl_value_t*)func);
                 jl_value_t *old = NULL;
                 if (jl_atomic_cmpswap(&linfo->uninferred, &old, uninferred)) {
@@ -678,15 +682,14 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
         }
     }
     m->called = called;
-    m->pure = src->pure;
     m->constprop = src->constprop;
     m->purity.bits = src->purity.bits;
-    jl_add_function_name_to_lineinfo(src, (jl_value_t*)m->name);
+    jl_add_function_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
     jl_svec_t *sparam_vars = jl_outer_unionall_vars(m->sig);
     JL_GC_PUSH3(&copy, &sparam_vars, &src);
-    assert(jl_typeis(src->code, jl_array_any_type));
+    assert(jl_typetagis(src->code, jl_array_any_type));
     jl_array_t *stmts = (jl_array_t*)src->code;
     size_t i, n = jl_array_len(stmts);
     copy = jl_alloc_vec_any(n);
@@ -700,7 +703,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 size_t j;
                 for (j = 1; j < nargs; j++) {
                     jl_value_t *aj = jl_exprarg(st, j);
-                    if (!jl_is_slot(aj) && !jl_is_argument(aj))
+                    if (!jl_is_slotnumber(aj) && !jl_is_argument(aj))
                         continue;
                     int sn = (int)jl_slot_number(aj) - 2;
                     if (sn < 0) // @nospecialize on self is valid but currently ignored
@@ -723,7 +726,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                     m->nospecialize = 0;
                 for (j = 1; j < nargs; j++) {
                     jl_value_t *aj = jl_exprarg(st, j);
-                    if (!jl_is_slot(aj) && !jl_is_argument(aj))
+                    if (!jl_is_slotnumber(aj) && !jl_is_argument(aj))
                         continue;
                     int sn = (int)jl_slot_number(aj) - 2;
                     if (sn < 0) // @specialize on self is valid but currently ignored
@@ -742,20 +745,12 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 st = jl_nothing;
             }
             else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_sym) {
-                m->generator = NULL;
+                if (m->generator != NULL)
+                    jl_error("duplicate @generated function body");
                 jl_value_t *gexpr = jl_exprarg(st, 1);
-                if (jl_expr_nargs(gexpr) == 7) {
-                    // expects (new (core GeneratedFunctionStub) funcname argnames sp line file expandearly)
-                    jl_value_t *funcname = jl_exprarg(gexpr, 1);
-                    assert(jl_is_symbol(funcname));
-                    if (jl_get_global(m->module, (jl_sym_t*)funcname) != NULL) {
-                        m->generator = jl_toplevel_eval(m->module, gexpr);
-                        jl_gc_wb(m, m->generator);
-                    }
-                }
-                if (m->generator == NULL) {
-                    jl_error("invalid @generated function; try placing it in global scope");
-                }
+                // the frontend would put (new (core GeneratedFunctionStub) funcname argnames sp) here, for example
+                m->generator = jl_toplevel_eval(m->module, gexpr);
+                jl_gc_wb(m, m->generator);
                 st = jl_nothing;
             }
             else if (nargs == 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_only_sym) {
@@ -790,7 +785,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     jl_task_t *ct = jl_current_task;
     jl_method_t *m =
         (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
-    jl_atomic_store_relaxed(&m->specializations, jl_emptysvec);
+    jl_atomic_store_relaxed(&m->specializations, (jl_value_t*)jl_emptysvec);
     jl_atomic_store_relaxed(&m->speckeyset, (jl_array_t*)jl_an_empty_vec_any);
     m->sig = NULL;
     m->slot_syms = NULL;
@@ -817,7 +812,9 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->deleted_world = ~(size_t)0;
     m->is_for_opaque_closure = 0;
     m->constprop = 0;
-    JL_MUTEX_INIT(&m->writelock);
+    m->purity.bits = 0;
+    m->max_varargs = UINT8_MAX;
+    JL_MUTEX_INIT(&m->writelock, "method->writelock");
     return m;
 }
 
@@ -967,12 +964,6 @@ JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGAT
     return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
 }
 
-// get the MethodTable implied by a single given type, or `nothing`
-JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
-{
-    return nth_methtable(argt, 0);
-}
-
 jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 
 JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
@@ -997,7 +988,9 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     JL_GC_PUSH3(&f, &m, &argtype);
     size_t i, na = jl_svec_len(atypes);
 
-    argtype = (jl_value_t*)jl_apply_tuple_type(atypes);
+    argtype = jl_apply_tuple_type(atypes);
+    if (!jl_is_datatype(argtype))
+        jl_error("invalid type in method definition (Union{})");
 
     jl_methtable_t *external_mt = mt;
     if (!mt)
@@ -1032,49 +1025,19 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
         }
     }
 
-    for (i = jl_svec_len(tvars); i > 0; i--) {
-        jl_value_t *tv = jl_svecref(tvars, i - 1);
-        if (!jl_is_typevar(tv))
-            jl_type_error("method signature", (jl_value_t*)jl_tvar_type, tv);
-        if (!jl_has_typevar(argtype, (jl_tvar_t*)tv)) // deprecate this to an error in v2
-            jl_printf(JL_STDERR,
-                      "WARNING: method definition for %s at %s:%d declares type variable %s but does not use it.\n",
-                      jl_symbol_name(name),
-                      jl_symbol_name(file),
-                      line,
-                      jl_symbol_name(((jl_tvar_t*)tv)->name));
-        argtype = jl_new_struct(jl_unionall_type, tv, argtype);
-    }
-    if (jl_has_free_typevars(argtype)) {
-        jl_exceptionf(jl_argumenterror_type,
-                      "method definition for %s at %s:%d has free type variables",
-                      jl_symbol_name(name),
-                      jl_symbol_name(file),
-                      line);
-    }
-
-
     if (!jl_is_code_info(f)) {
         // this occurs when there is a closure being added to an out-of-scope function
         // the user should only do this at the toplevel
         // the result is that the closure variables get interpolated directly into the IR
         f = jl_new_code_info_from_ir((jl_expr_t*)f);
     }
-    m = jl_new_method_uninit(module);
-    m->external_mt = (jl_value_t*)external_mt;
-    if (external_mt)
-        jl_gc_wb(m, external_mt);
-    m->sig = argtype;
-    m->name = name;
-    m->isva = isva;
-    m->nargs = nargs;
-    m->file = file;
-    m->line = line;
-    jl_method_set_source(m, f);
 
     for (i = 0; i < na; i++) {
         jl_value_t *elt = jl_svecref(atypes, i);
-        if (!jl_is_type(elt) && !jl_is_typevar(elt) && !jl_is_vararg(elt)) {
+        int isvalid = jl_is_type(elt) || jl_is_typevar(elt) || jl_is_vararg(elt);
+        if (elt == jl_bottom_type || (jl_is_vararg(elt) && jl_unwrap_vararg(elt) == jl_bottom_type))
+            isvalid = 0;
+        if (!isvalid) {
             jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(f->slotnames, i);
             if (argname == jl_unused_sym)
                 jl_exceptionf(jl_argumenterror_type,
@@ -1098,6 +1061,38 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
                           jl_symbol_name(file),
                           line);
     }
+    for (i = jl_svec_len(tvars); i > 0; i--) {
+        jl_value_t *tv = jl_svecref(tvars, i - 1);
+        if (!jl_is_typevar(tv))
+            jl_type_error("method signature", (jl_value_t*)jl_tvar_type, tv);
+        if (!jl_has_typevar(argtype, (jl_tvar_t*)tv)) // deprecate this to an error in v2
+            jl_printf(JL_STDERR,
+                      "WARNING: method definition for %s at %s:%d declares type variable %s but does not use it.\n",
+                      jl_symbol_name(name),
+                      jl_symbol_name(file),
+                      line,
+                      jl_symbol_name(((jl_tvar_t*)tv)->name));
+        argtype = jl_new_struct(jl_unionall_type, tv, argtype);
+    }
+    if (jl_has_free_typevars(argtype)) {
+        jl_exceptionf(jl_argumenterror_type,
+                      "method definition for %s at %s:%d has free type variables",
+                      jl_symbol_name(name),
+                      jl_symbol_name(file),
+                      line);
+    }
+
+    m = jl_new_method_uninit(module);
+    m->external_mt = (jl_value_t*)external_mt;
+    if (external_mt)
+        jl_gc_wb(m, external_mt);
+    m->sig = argtype;
+    m->name = name;
+    m->isva = isva;
+    m->nargs = nargs;
+    m->file = file;
+    m->line = line;
+    jl_method_set_source(m, f);
 
 #ifdef RECORD_METHOD_ORDER
     if (jl_all_methods == NULL)
diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c
index a9feeb6ef4921..5e868ef11c1d2 100644
--- a/src/mmtk-gc.c
+++ b/src/mmtk-gc.c
@@ -72,7 +72,7 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
 
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
 {
-    register_finalizer(v, f, 1);
+    mmtk_register_finalizer(v, f, 1);
 }
 
 // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
@@ -87,13 +87,13 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct
         jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
     }
     else {
-        register_finalizer(v, f, 0);
+        mmtk_register_finalizer(v, f, 0);
     }
 }
 
 JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
 {
-    run_finalizers_for_obj(o);
+    mmtk_run_finalizers_for_obj(o);
 }
 
 void jl_gc_run_all_finalizers(jl_task_t *ct)
@@ -103,7 +103,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
 
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
 {
-    register_finalizer(v, f, 0);
+    mmtk_register_finalizer(v, f, 0);
 }
 
 
@@ -192,13 +192,13 @@ void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
 {
-    unreachable();
+    mmtk_unreachable();
 }
 
 // TODO: exported, but not MMTk-specific?
 JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
-    unreachable();
+    mmtk_unreachable();
 }
 
 
@@ -207,13 +207,13 @@ JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value
 
 JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
 {
-    unreachable();
+    mmtk_unreachable();
     return 0;
 }
 JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
                                             jl_value_t **objs, size_t nobjs)
 {
-    unreachable();
+    mmtk_unreachable();
 }
 
 
@@ -231,7 +231,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
         jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
     }
-    handle_user_collection_request(ptls, collection);
+    mmtk_handle_user_collection_request(ptls, collection);
 }
 
 // Per-thread initialization
@@ -266,7 +266,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
     jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
 
-    MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid);
+    MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid);
     ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator);
 }
 
@@ -277,7 +277,7 @@ void jl_gc_init(void)
     if (jl_options.heap_size_hint)
         jl_gc_set_max_memory(jl_options.heap_size_hint);
 
-    JL_MUTEX_INIT(&heapsnapshot_lock);
+    JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
     uv_mutex_init(&gc_perm_lock);
 
     gc_num.interval = default_collect_interval;
@@ -335,11 +335,14 @@ void jl_gc_init(void)
         max_heap_size = uv_get_free_memory() * 70 / 100;
     }
 
+    // If the two values are the same, we can use either. Otherwise, we need to be careful.
+    assert(jl_n_gcthreads == jl_options.ngcthreads);
+
     // if only max size is specified initialize MMTk with a fixed size heap
     if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) {
-        gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)));
+        mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)));
     } else {
-        gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)));
+        mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)));
     }
 }
 
@@ -470,6 +473,7 @@ void jl_print_gc_stats(JL_STREAM *s)
 {
 }
 
+#ifdef OBJPROFILE
 void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT
 {
 }
@@ -481,6 +485,23 @@ void objprofile_printall(void)
 void objprofile_reset(void)
 {
 }
+#endif
+
+// gc thread function
+void jl_gc_threadfun(void *arg)
+{
+    mmtk_unreachable();
+}
+
+// added for MMTk integration
+void enable_collection(void)
+{
+    mmtk_enable_collection();
+}
+void disable_collection(void)
+{
+    mmtk_disable_collection();
+}
 
 JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT
 {
@@ -516,7 +537,7 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE
 void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    void* addr = alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1);
+    void* addr = mmtk_alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1);
     return addr;
 }
 
diff --git a/src/module.c b/src/module.c
index 9a8285ad003f6..04d3970f9b460 100644
--- a/src/module.c
+++ b/src/module.c
@@ -17,6 +17,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui
     const jl_uuid_t uuid_zero = {0, 0};
     jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t),
                                                jl_module_type);
+    jl_set_typetagof(m, jl_module_tag, 0);
     assert(jl_is_symbol(name));
     m->name = name;
     m->parent = parent;
@@ -36,7 +37,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui
     m->max_methods = -1;
     m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) :
         bitmix(name->hash, parent->hash);
-    JL_MUTEX_INIT(&m->lock);
+    JL_MUTEX_INIT(&m->lock, "module->lock");
     jl_atomic_store_relaxed(&m->bindings, jl_emptysvec);
     jl_atomic_store_relaxed(&m->bindingkeyset, (jl_array_t*)jl_an_empty_vec_any);
     arraylist_new(&m->usings, 0);
@@ -182,6 +183,7 @@ static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name)
     b->imported = 0;
     b->deprecated = 0;
     b->usingfailed = 0;
+    b->padding = 0;
     JL_GC_PUSH1(&b);
     b->globalref = jl_new_globalref(mod, name, b);
     JL_GC_POP();
@@ -927,6 +929,15 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
 
 JL_DLLEXPORT jl_sym_t *jl_module_name(jl_module_t *m) { return m->name; }
 JL_DLLEXPORT jl_module_t *jl_module_parent(jl_module_t *m) { return m->parent; }
+jl_module_t *jl_module_root(jl_module_t *m)
+{
+    while (1) {
+        if (m->parent == NULL || m->parent == m)
+            return m;
+        m = m->parent;
+    }
+}
+
 JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
 JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; }
 
@@ -962,19 +973,15 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m)
     JL_UNLOCK(&m->lock);
 }
 
-JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)
+JL_DLLEXPORT void jl_init_restored_module(jl_value_t *mod)
 {
-    int i, l = jl_array_len(init_order);
-    for (i = 0; i < l; i++) {
-        jl_value_t *mod = jl_array_ptr_ref(init_order, i);
-        if (!jl_generating_output() || jl_options.incremental) {
-            jl_module_run_initializer((jl_module_t*)mod);
-        }
-        else {
-            if (jl_module_init_order == NULL)
-                jl_module_init_order = jl_alloc_vec_any(0);
-            jl_array_ptr_1d_push(jl_module_init_order, mod);
-        }
+    if (!jl_generating_output() || jl_options.incremental) {
+        jl_module_run_initializer((jl_module_t*)mod);
+    }
+    else {
+        if (jl_module_init_order == NULL)
+            jl_module_init_order = jl_alloc_vec_any(0);
+        jl_array_ptr_1d_push(jl_module_init_order, mod);
     }
 }
 
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index db596c2bb893f..d73beff0f8587 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -8,6 +8,11 @@ jl_value_t *jl_fptr_const_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **a
     return oc->captures;
 }
 
+jl_value_t *jl_fptr_const_opaque_closure_typeerror(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
+{
+    jl_type_error("OpaqueClosure", jl_tparam1(jl_typeof(oc)), oc->captures);
+}
+
 // determine whether `argt` is a valid argument type tuple for the given opaque closure method
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source)
 {
@@ -22,21 +27,6 @@ JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *sourc
     return 1;
 }
 
-static jl_value_t *prepend_type(jl_value_t *t0, jl_tupletype_t *t)
-{
-    jl_svec_t *sig_args = NULL;
-    JL_GC_PUSH1(&sig_args);
-    size_t nsig = 1 + jl_svec_len(t->parameters);
-    sig_args = jl_alloc_svec_uninit(nsig);
-    jl_svecset(sig_args, 0, t0);
-    for (size_t i = 0; i < nsig-1; ++i) {
-        jl_svecset(sig_args, 1+i, jl_tparam(t, i));
-    }
-    jl_value_t *sigtype = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
-    JL_GC_POP();
-    return sigtype;
-}
-
 static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
     jl_value_t *source_, jl_value_t *captures, int do_compile)
 {
@@ -56,39 +46,78 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     if (jl_nparams(argt) + 1 - jl_is_va_tuple(argt) < source->nargs - source->isva)
         jl_error("Argument type tuple has too few required arguments for method");
     jl_value_t *sigtype = NULL;
-    JL_GC_PUSH1(&sigtype);
-    sigtype = prepend_type(jl_typeof(captures), argt);
-
-    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE;
-    oc_type = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, rt_ub);
-    JL_GC_PROMISE_ROOTED(oc_type);
+    jl_value_t *selected_rt = rt_ub;
+    JL_GC_PUSH2(&sigtype, &selected_rt);
+    sigtype = jl_argtype_with_function(captures, (jl_value_t*)argt);
 
     jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_task_t *ct = jl_current_task;
+    size_t world = ct->world_age;
     jl_code_instance_t *ci = NULL;
-    if (do_compile)
+    if (do_compile) {
         ci = jl_compile_method_internal(mi, world);
+    }
 
-    jl_task_t *ct = jl_current_task;
+    jl_fptr_args_t invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+    void *specptr = NULL;
+
+    if (ci) {
+        invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
+        specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+
+        selected_rt = ci->rettype;
+        // If we're not allowed to generate a specsig with this, rt, fall
+        // back to the invoke wrapper. We could instead generate a specsig->specsig
+        // wrapper, but lets leave that for later.
+        if (!jl_subtype(rt_lb, selected_rt)) {
+            // TODO: It would be better to try to get a specialization with the
+            // correct rt check here (or we could codegen a wrapper).
+            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype};
+            selected_rt = jl_type_union(ts, 2);
+        }
+        if (!jl_subtype(ci->rettype, rt_ub)) {
+            // TODO: It would be better to try to get a specialization with the
+            // correct rt check here (or we could codegen a wrapper).
+            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            selected_rt = jl_type_intersection(rt_ub, selected_rt);
+        }
+
+        if (invoke == (jl_fptr_args_t) jl_fptr_interpret_call) {
+            invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+        }
+        else if (invoke == (jl_fptr_args_t)jl_fptr_args && specptr) {
+            invoke = (jl_fptr_args_t)specptr;
+        }
+        else if (invoke == (jl_fptr_args_t)jl_fptr_const_return) {
+            invoke = jl_isa(ci->rettype_const, selected_rt) ?
+                (jl_fptr_args_t)jl_fptr_const_opaque_closure :
+                (jl_fptr_args_t)jl_fptr_const_opaque_closure_typeerror;
+            captures = ci->rettype_const;
+        }
+    }
+
+    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt);
+    JL_GC_PROMISE_ROOTED(oc_type);
+
+    if (!specptr) {
+        sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt);
+        jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec);
+
+        // OC wrapper methods are not world dependent
+        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0);
+        if (!jl_atomic_load_acquire(&ci->invoke))
+            jl_generate_fptr_for_oc_wrapper(ci);
+        specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+    }
     jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type);
-    JL_GC_POP();
     oc->source = source;
     oc->captures = captures;
-    oc->specptr = NULL;
-    if (!ci || jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_interpret_call) {
-        oc->invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
-    }
-    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_args) {
-        oc->invoke = jl_atomic_load_relaxed(&ci->specptr.fptr1);
-    }
-    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
-        oc->invoke = (jl_fptr_args_t)jl_fptr_const_opaque_closure;
-        oc->captures = ci->rettype_const;
-    }
-    else {
-        oc->invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
-    }
     oc->world = world;
+    oc->invoke = invoke;
+    oc->specptr = specptr;
+
+    JL_GC_POP();
     return oc;
 }
 
@@ -122,12 +151,14 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet
     JL_GC_PUSH3(&root, &sigtype, &inst);
     root = jl_box_long(lineno);
     root = jl_new_struct(jl_linenumbernode_type, root, file);
-    root = (jl_value_t*)jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+    root = (jl_value_t*)meth;
+    meth->primary_world = jl_current_task->world_age;
 
-    sigtype = prepend_type(jl_typeof(env), argt);
+    sigtype = jl_argtype_with_function(env, (jl_value_t*)argt);
     jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
     inst = jl_new_codeinst(mi, rt_ub, NULL, (jl_value_t*)ci,
-        0, ((jl_method_t*)root)->primary_world, -1, 0, 0, jl_nothing, 0);
+        0, meth->primary_world, -1, 0, 0, jl_nothing, 0);
     jl_mi_cache_insert(mi, inst);
 
     jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile);
@@ -143,10 +174,9 @@ JL_CALLABLE(jl_new_opaque_closure_jlcall)
         args[1], args[2], args[3], &args[4], nargs-4, 1);
 }
 
-
 // check whether the specified number of arguments is compatible with the
 // specified number of parameters of the tuple type
-STATIC_INLINE int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT
+int jl_tupletype_length_compat(jl_value_t *v, size_t nargs)
 {
     v = jl_unwrap_unionall(v);
     assert(jl_is_tuple_type(v));
diff --git a/src/options.h b/src/options.h
index 82b71431ecea0..b535d5ad4566f 100644
--- a/src/options.h
+++ b/src/options.h
@@ -78,9 +78,6 @@
 // OBJPROFILE counts objects by type
 // #define OBJPROFILE
 
-// Automatic Instrumenting Profiler
-//#define ENABLE_TIMINGS
-
 
 // method dispatch profiling --------------------------------------------------
 
@@ -134,6 +131,9 @@
 // threadpools specification
 #define THREADPOOLS_NAME                "JULIA_THREADPOOLS"
 
+// GC threads
+#define NUM_GC_THREADS_NAME             "JULIA_NUM_GC_THREADS"
+
 // affinitization behavior
 #define MACHINE_EXCLUSIVE_NAME          "JULIA_EXCLUSIVE"
 #define DEFAULT_MACHINE_EXCLUSIVE       0
diff --git a/src/partr.c b/src/partr.c
index b51f5eee8089f..2c729add629e2 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -108,7 +108,7 @@ void jl_init_threadinginfra(void)
 
 void JL_NORETURN jl_finish_task(jl_task_t *t);
 
-// thread function: used by all except the main thread
+// thread function: used by all mutator threads except the main thread
 void jl_threadfun(void *arg)
 {
     jl_threadarg_t *targ = (jl_threadarg_t*)arg;
@@ -268,7 +268,7 @@ static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
 {
     jl_gc_safepoint();
     jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
-    if (jl_typeis(task, jl_task_type)) {
+    if (jl_is_task(task)) {
         int self = jl_atomic_load_relaxed(&jl_current_task->tid);
         jl_set_task_tid(task, self);
         return task;
@@ -448,7 +448,6 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                     break;
                 }
                 uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
-                // TODO: help with gc work here, if applicable
             }
             assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
             uv_mutex_unlock(&ptls->sleep_lock);
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
index ae2b1c3202f04..1007dfd35c1d6 100644
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -146,7 +146,7 @@ namespace {
             // Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
             // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
             // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
-            MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+            // MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
             // MPM.addPass(ModuleAddressSanitizerPass(
             //     Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
             //Let's assume the defaults are actually fine for our purposes
@@ -173,11 +173,13 @@ namespace {
         // }
     }
 
-    void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
+#ifdef JL_DEBUG_BUILD
+    static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
         if (!llvm_only)
             MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
         MPM.addPass(VerifierPass());
     }
+#endif
 
     auto basicSimplifyCFGOptions() JL_NOTSAFEPOINT {
         return SimplifyCFGOptions()
@@ -244,9 +246,9 @@ namespace {
 
 //Use for O1 and below
 static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
-// #ifdef JL_DEBUG_BUILD
+#ifdef JL_DEBUG_BUILD
     addVerificationPasses(MPM, options.llvm_only);
-// #endif
+#endif
     invokePipelineStartCallbacks(MPM, PB, O);
     MPM.addPass(ConstantMergePass());
     if (!options.dump_native) {
@@ -320,9 +322,9 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza
 
 //Use for O2 and above
 static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
-// #ifdef JL_DEBUG_BUILD
+#ifdef JL_DEBUG_BUILD
     addVerificationPasses(MPM, options.llvm_only);
-// #endif
+#endif
     invokePipelineStartCallbacks(MPM, PB, O);
     MPM.addPass(ConstantMergePass());
     {
@@ -359,7 +361,8 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
     {
         FunctionPassManager FPM;
         FPM.addPass(SROAPass());
-        FPM.addPass(InstSimplifyPass());
+        // SROA can duplicate PHI nodes which can block LowerSIMD
+        FPM.addPass(InstCombinePass());
         FPM.addPass(JumpThreadingPass());
         FPM.addPass(CorrelatedValuePropagationPass());
         FPM.addPass(ReassociatePass());
@@ -382,7 +385,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
 #endif
             LPM2.addPass(LICMPass(LICMOptions()));
             JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
-            LPM2.addPass(SimpleLoopUnswitchPass());
+            LPM2.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
             LPM2.addPass(LICMPass(LICMOptions()));
             JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
             //LICM needs MemorySSA now, so we must use it
@@ -395,11 +398,11 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
             LPM.addPass(LoopIdiomRecognizePass());
             LPM.addPass(IndVarSimplifyPass());
             LPM.addPass(LoopDeletionPass());
+            LPM.addPass(LoopFullUnrollPass());
             invokeLoopOptimizerEndCallbacks(LPM, PB, O);
             //We don't know if the loop end callbacks support MSSA
             FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
         }
-        FPM.addPass(LoopUnrollPass());
         JULIA_PASS(FPM.addPass(AllocOptPass()));
         FPM.addPass(SROAPass());
         FPM.addPass(InstSimplifyPass());
@@ -541,11 +544,8 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
         // Register the AA manager first so that our version is the one used.
         FAM.registerPass([&] JL_NOTSAFEPOINT {
             AAManager AA;
-            // TODO: Why are we only doing this for -O3?
-            if (O.getSpeedupLevel() >= 3) {
-                AA.registerFunctionAnalysis<BasicAA>();
-            }
             if (O.getSpeedupLevel() >= 2) {
+                AA.registerFunctionAnalysis<BasicAA>();
                 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
                 AA.registerFunctionAnalysis<TypeBasedAA>();
             }
@@ -603,6 +603,10 @@ void NewPM::run(Module &M) {
 #endif
 }
 
+void NewPM::printTimers() {
+    SI.getTimePasses().print();
+}
+
 OptimizationLevel getOptLevel(int optlevel) {
     switch (std::min(std::max(optlevel, 0), 3)) {
         case 0:
@@ -717,7 +721,7 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
         });
 }
 
-extern "C" JL_DLLEXPORT ::llvm::PassPluginLibraryInfo
-llvmGetPassPluginInfo() JL_NOTSAFEPOINT {
+extern "C" JL_DLLEXPORT_CODEGEN
+::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT {
       return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks};
 }
diff --git a/src/precompile.c b/src/precompile.c
index 75970a20237c2..4aac28ff9a790 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -99,7 +99,7 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
                 // since it's a slightly duplication of effort
                 jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f);
                 JL_GC_PUSH1(&tt);
-                tt = (jl_value_t*)jl_apply_tuple_type_v(&tt, 1);
+                tt = jl_apply_tuple_type_v(&tt, 1);
                 jl_compile_hint((jl_tupletype_t*)tt);
                 JL_GC_POP();
             }
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index 9f52ce911a92f..055ec4b3330f1 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -120,7 +120,7 @@ static void _compile_all_union(jl_value_t *sig)
                 jl_svecset(p, i, ty);
             }
         }
-        methsig = (jl_value_t*)jl_apply_tuple_type(p);
+        methsig = jl_apply_tuple_type(p);
         methsig = jl_rewrap_unionall(methsig, sig);
         _compile_all_tvar_union(methsig);
     }
@@ -186,8 +186,8 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur
             jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
             if (inferred &&
                 inferred != jl_nothing &&
-                jl_ir_flag_inferred((jl_array_t*)inferred) &&
-                (jl_ir_inlining_cost((jl_array_t*)inferred) == UINT16_MAX)) {
+                jl_ir_flag_inferred(inferred) &&
+                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
                 do_compile = 1;
             }
             else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
@@ -214,12 +214,17 @@ static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *c
         jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
     }
     else {
-        jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
-        size_t i, l = jl_svec_len(specializations);
-        for (i = 0; i < l; i++) {
-            jl_value_t *mi = jl_svecref(specializations, i);
-            if (mi != jl_nothing)
-                precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
+        jl_value_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
+        if (!jl_is_svec(specializations)) {
+            precompile_enq_specialization_((jl_method_instance_t*)specializations, closure);
+        }
+        else {
+            size_t i, l = jl_svec_len(specializations);
+            for (i = 0; i < l; i++) {
+                jl_value_t *mi = jl_svecref(specializations, i);
+                if (mi != jl_nothing)
+                    precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
+            }
         }
     }
     if (m->ccallable)
@@ -255,7 +260,8 @@ static void *jl_precompile_(jl_array_t *m, int external_linkage)
             jl_array_ptr_1d_push(m2, item);
         }
     }
-    void *native_code = jl_create_native(m2, NULL, NULL, 0, 1, external_linkage);
+    void *native_code = jl_create_native(m2, NULL, NULL, 0, 1, external_linkage,
+                                         jl_atomic_load_acquire(&jl_world_counter));
     JL_GC_POP();
     return native_code;
 }
@@ -291,12 +297,17 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met
     for (i = 0; i < n; i++) {
         jl_method_t *method = (jl_method_t*)jl_array_ptr_ref(extext_methods, i);
         assert(jl_is_method(method));
-        jl_svec_t *specializations = jl_atomic_load_relaxed(&method->specializations);
-        size_t j, l = jl_svec_len(specializations);
-        for (j = 0; j < l; j++) {
-            jl_value_t *mi = jl_svecref(specializations, j);
-            if (mi != jl_nothing)
-                precompile_enq_specialization_((jl_method_instance_t*)mi, m);
+        jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+        if (!jl_is_svec(specializations)) {
+            precompile_enq_specialization_((jl_method_instance_t*)specializations, m);
+        }
+        else {
+            size_t j, l = jl_svec_len(specializations);
+            for (j = 0; j < l; j++) {
+                jl_value_t *mi = jl_svecref(specializations, j);
+                if (mi != jl_nothing)
+                    precompile_enq_specialization_((jl_method_instance_t*)mi, m);
+            }
         }
     }
     n = jl_array_len(new_specializations);
diff --git a/src/processor.cpp b/src/processor.cpp
index 13b40ec4f7363..24a434af91ad3 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -17,6 +17,12 @@
 
 #include "julia_assert.h"
 
+#ifndef _OS_WINDOWS_
+#include <dlfcn.h>
+#endif
+
+#include <iostream>
+
 // CPU target string is a list of strings separated by `;` each string starts with a CPU
 // or architecture name and followed by an optional list of features separated by `,`.
 // A "generic" or empty CPU name means the basic required feature set of the target ISA
@@ -621,113 +627,193 @@ static inline std::vector<TargetData<n>> &get_cmdline_targets(F &&feature_cb)
 // Load sysimg, use the `callback` for dispatch and perform all relocations
 // for the selected target.
 template<typename F>
-static inline jl_image_fptrs_t parse_sysimg(void *hdl, F &&callback)
+static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
 {
-    jl_image_fptrs_t res = {nullptr, 0, nullptr, 0, nullptr, nullptr};
+    JL_TIMING(LOAD_IMAGE, LOAD_Processor);
+    jl_image_t res{};
 
-    // .data base
-    char *data_base;
-    if (!jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 0)) {
-        data_base = NULL;
-    }
-    // .text base
-    char *text_base;
-    if (!jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 0)) {
-        text_base = NULL;
-    }
-    res.base = text_base;
-
-    int32_t *offsets;
-    jl_dlsym(hdl, "jl_sysimg_fvars_offsets", (void**)&offsets, 1);
-    uint32_t nfunc = offsets[0];
-    res.offsets = offsets + 1;
+    const jl_image_pointers_t *pointers;
+    jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);
 
-    void *ids;
-    jl_dlsym(hdl, "jl_dispatch_target_ids", &ids, 1);
+    const void *ids = pointers->target_data;
     uint32_t target_idx = callback(ids);
 
-    int32_t *reloc_slots;
-    jl_dlsym(hdl, "jl_dispatch_reloc_slots", (void **)&reloc_slots, 1);
-    const uint32_t nreloc = reloc_slots[0];
-    reloc_slots += 1;
-    uint32_t *clone_idxs;
-    int32_t *clone_offsets;
-    jl_dlsym(hdl, "jl_dispatch_fvars_idxs", (void**)&clone_idxs, 1);
-    jl_dlsym(hdl, "jl_dispatch_fvars_offsets", (void**)&clone_offsets, 1);
-    uint32_t tag_len = clone_idxs[0];
-    clone_idxs += 1;
-
-    assert(tag_len & jl_sysimg_tag_mask);
-    std::vector<const int32_t*> base_offsets = {res.offsets};
-    // Find target
-    for (uint32_t i = 0;i < target_idx;i++) {
-        uint32_t len = jl_sysimg_val_mask & tag_len;
-        if (jl_sysimg_tag_mask & tag_len) {
-            if (i != 0)
-                clone_offsets += nfunc;
-            clone_idxs += len + 1;
-        }
-        else {
-            clone_offsets += len;
-            clone_idxs += len + 2;
-        }
-        tag_len = clone_idxs[-1];
-        base_offsets.push_back(tag_len & jl_sysimg_tag_mask ? clone_offsets : nullptr);
+    if (pointers->header->version != 1) {
+        jl_error("Image file is not compatible with this version of Julia");
     }
 
-    bool clone_all = (tag_len & jl_sysimg_tag_mask) != 0;
-    // Fill in return value
-    if (clone_all) {
-        // clone_all
-        if (target_idx != 0) {
-            res.offsets = clone_offsets;
-        }
-    }
-    else {
-        uint32_t base_idx = clone_idxs[0];
-        assert(base_idx < target_idx);
-        if (target_idx != 0) {
-            res.offsets = base_offsets[base_idx];
-            assert(res.offsets);
+    std::vector<const char *> fvars(pointers->header->nfvars);
+    std::vector<const char *> gvars(pointers->header->ngvars);
+
+    std::vector<std::pair<uint32_t, const char *>> clones;
+
+    for (unsigned i = 0; i < pointers->header->nshards; i++) {
+        auto shard = pointers->shards[i];
+
+        // .data base
+        char *data_base = (char *)shard.gvar_base;
+
+        // .text base
+        const char *text_base = shard.fvar_base;
+
+        const int32_t *offsets = shard.fvar_offsets;
+        uint32_t nfunc = offsets[0];
+        assert(nfunc <= pointers->header->nfvars);
+        offsets++;
+        const int32_t *reloc_slots = shard.clone_slots;
+        const uint32_t nreloc = reloc_slots[0];
+        reloc_slots += 1;
+        const uint32_t *clone_idxs = shard.clone_idxs;
+        const int32_t *clone_offsets = shard.clone_offsets;
+        uint32_t tag_len = clone_idxs[0];
+        clone_idxs += 1;
+
+        assert(tag_len & jl_sysimg_tag_mask);
+        std::vector<const int32_t*> base_offsets = {offsets};
+        // Find target
+        for (uint32_t i = 0;i < target_idx;i++) {
+            uint32_t len = jl_sysimg_val_mask & tag_len;
+            if (jl_sysimg_tag_mask & tag_len) {
+                if (i != 0)
+                    clone_offsets += nfunc;
+                clone_idxs += len + 1;
+            }
+            else {
+                clone_offsets += len;
+                clone_idxs += len + 2;
+            }
+            tag_len = clone_idxs[-1];
+            base_offsets.push_back(tag_len & jl_sysimg_tag_mask ? clone_offsets : nullptr);
         }
-        clone_idxs++;
-        res.nclones = tag_len;
-        res.clone_offsets = clone_offsets;
-        res.clone_idxs = clone_idxs;
-    }
-    // Do relocation
-    uint32_t reloc_i = 0;
-    uint32_t len = jl_sysimg_val_mask & tag_len;
-    for (uint32_t i = 0; i < len; i++) {
-        uint32_t idx = clone_idxs[i];
-        int32_t offset;
+
+        bool clone_all = (tag_len & jl_sysimg_tag_mask) != 0;
+        // Fill in return value
         if (clone_all) {
-            offset = res.offsets[idx];
-        }
-        else if (idx & jl_sysimg_tag_mask) {
-            idx = idx & jl_sysimg_val_mask;
-            offset = clone_offsets[i];
+            // clone_all
+            if (target_idx != 0) {
+                offsets = clone_offsets;
+            }
         }
         else {
-            continue;
+            uint32_t base_idx = clone_idxs[0];
+            assert(base_idx < target_idx);
+            if (target_idx != 0) {
+                offsets = base_offsets[base_idx];
+                assert(offsets);
+            }
+            clone_idxs++;
+            unsigned start = clones.size();
+            clones.resize(start + tag_len);
+            auto idxs = shard.fvar_idxs;
+            for (unsigned i = 0; i < tag_len; i++) {
+                clones[start + i] = {(clone_idxs[i] & ~jl_sysimg_val_mask) | idxs[clone_idxs[i] & jl_sysimg_val_mask], clone_offsets[i] + text_base};
+            }
         }
-        bool found = false;
-        for (; reloc_i < nreloc; reloc_i++) {
-            auto reloc_idx = ((const uint32_t*)reloc_slots)[reloc_i * 2];
-            if (reloc_idx == idx) {
-                found = true;
-                auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]);
-                assert(slot);
-                *slot = offset + res.base;
+        // Do relocation
+        uint32_t reloc_i = 0;
+        uint32_t len = jl_sysimg_val_mask & tag_len;
+        for (uint32_t i = 0; i < len; i++) {
+            uint32_t idx = clone_idxs[i];
+            int32_t offset;
+            if (clone_all) {
+                offset = offsets[idx];
+            }
+            else if (idx & jl_sysimg_tag_mask) {
+                idx = idx & jl_sysimg_val_mask;
+                offset = clone_offsets[i];
+            }
+            else {
+                continue;
             }
-            else if (reloc_idx > idx) {
-                break;
+            bool found = false;
+            for (; reloc_i < nreloc; reloc_i++) {
+                auto reloc_idx = ((const uint32_t*)reloc_slots)[reloc_i * 2];
+                if (reloc_idx == idx) {
+                    found = true;
+                    auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]);
+                    assert(slot);
+                    *slot = offset + text_base;
+                }
+                else if (reloc_idx > idx) {
+                    break;
+                }
             }
+            assert(found && "Cannot find GOT entry for cloned function.");
+            (void)found;
+        }
+
+        auto fidxs = shard.fvar_idxs;
+        for (uint32_t i = 0; i < nfunc; i++) {
+            fvars[fidxs[i]] = text_base + offsets[i];
+        }
+
+        auto gidxs = shard.gvar_idxs;
+        unsigned ngvars = shard.gvar_offsets[0];
+        assert(ngvars <= pointers->header->ngvars);
+        for (uint32_t i = 0; i < ngvars; i++) {
+            gvars[gidxs[i]] = data_base + shard.gvar_offsets[i+1];
         }
-        assert(found && "Cannot find GOT entry for cloned function.");
-        (void)found;
     }
 
+    if (!fvars.empty()) {
+        auto offsets = (int32_t *) malloc(sizeof(int32_t) * fvars.size());
+        res.fptrs.base = fvars[0];
+        for (size_t i = 0; i < fvars.size(); i++) {
+            assert(fvars[i] && "Missing function pointer!");
+            offsets[i] = fvars[i] - res.fptrs.base;
+        }
+        res.fptrs.offsets = offsets;
+        res.fptrs.noffsets = fvars.size();
+    }
+
+    if (!gvars.empty()) {
+        auto offsets = (int32_t *) malloc(sizeof(int32_t) * gvars.size());
+        res.gvars_base = (uintptr_t *)gvars[0];
+        for (size_t i = 0; i < gvars.size(); i++) {
+            assert(gvars[i] && "Missing global variable pointer!");
+            offsets[i] = gvars[i] - (const char *)res.gvars_base;
+        }
+        res.gvars_offsets = offsets;
+        res.ngvars = gvars.size();
+    }
+
+    if (!clones.empty()) {
+        assert(!fvars.empty());
+        std::sort(clones.begin(), clones.end());
+        auto clone_offsets = (int32_t *) malloc(sizeof(int32_t) * clones.size());
+        auto clone_idxs = (uint32_t *) malloc(sizeof(uint32_t) * clones.size());
+        for (size_t i = 0; i < clones.size(); i++) {
+            clone_idxs[i] = clones[i].first;
+            clone_offsets[i] = clones[i].second - res.fptrs.base;
+        }
+        res.fptrs.clone_idxs = clone_idxs;
+        res.fptrs.clone_offsets = clone_offsets;
+        res.fptrs.nclones = clones.size();
+    }
+
+#ifdef _OS_WINDOWS_
+    res.base = (intptr_t)hdl;
+#else
+    Dl_info dlinfo;
+    if (dladdr((void*)pointers, &dlinfo) != 0) {
+        res.base = (intptr_t)dlinfo.dli_fbase;
+    }
+    else {
+        res.base = 0;
+    }
+#endif
+
+    {
+        void *pgcstack_func_slot = pointers->ptls->pgcstack_func_slot;
+        void *pgcstack_key_slot = pointers->ptls->pgcstack_key_slot;
+        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
+
+        size_t *tls_offset_idx = pointers->ptls->tls_offset;
+        *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
+    }
+
+    res.small_typeof = pointers->small_typeof;
+
     return res;
 }
 
@@ -739,20 +825,24 @@ static inline void check_cmdline(T &&cmdline, bool imaging)
     // sysimg means. Make it an error for now.
     if (!imaging) {
         if (cmdline.size() > 1) {
-            jl_error("More than one command line CPU targets specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("More than one command line CPU targets specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
         if (cmdline[0].en.flags & JL_TARGET_CLONE_ALL) {
-            jl_error("\"clone_all\" feature specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("\"clone_all\" feature specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
         if (cmdline[0].en.flags & JL_TARGET_OPTSIZE) {
-            jl_error("\"opt_size\" feature specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("\"opt_size\" feature specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
         if (cmdline[0].en.flags & JL_TARGET_MINSIZE) {
-            jl_error("\"min_size\" feature specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("\"min_size\" feature specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
     }
 }
diff --git a/src/processor.h b/src/processor.h
index e3f3bd512c910..3e83bbb2247d6 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -14,82 +14,9 @@
 extern "C" {
 #endif
 
-/**
- * Related sysimg exported symbols
- *
- * In the following text, function refers to an abstract entity.
- * It corresponds to a `Function` that we emit in the codegen, and there might be multiple copies
- * of it in the system image. Only one of those copies will be used in a given session.
- * Function pointers refer to a real piece of code in the system image.
- * Each function might have multiple function pointers in the system image
- * and each function pointer will correspond to only one function.
- *
- * # Global function and base pointers
- * `jl_sysimg_gvars_base`:
- *     The address of this symbol is the base data pointer
- *     (all other data pointers are stored as offsets to this address)
- * `jl_sysimg_fvars_base`:
- *     The address of this symbol is the base function pointer
- *     (all other function pointers are stored as offsets to this address)
- * `jl_sysimg_fvars_offsets`: [static data]
- *     The array of function pointer offsets (`int32_t`) from the base pointer.
- *     This includes all julia functions in sysimg as well as all other functions that are cloned.
- *     The default function pointer is used if the function is cloned.
- *     The first element is the size of the array, which should **NOT** be used as the number
- *     of julia functions in the sysimg.
- *     Each entry in this array uniquely identifies a function we are interested in
- *     (the function may have multiple function pointers corresponding to different versions).
- *     In other sysimg info, all references to functions are stored as their `uint32_t` index
- *     in this array.
- *
- * # Target data and dispatch slots (Only needed by runtime during loading)
- * `jl_dispatch_target_ids`: [static data] serialize target data.
- *     This contains the number of targets which is needed to decode `jl_dispatch_fvars_idxs`
- *     in addition to the name and feature set of each target.
- * `jl_dispatch_reloc_slots`: [static data] location and index of relocation slots.
- *     Stored as pairs of function indices and `int32_t` offsets from `jl_sysimg_gvars_base`.
- *     The first element is an `uint32_t` giving the number of relocations.
- *     This is needed for functions whose address is used in a way that requires dispatch.
- *     We currently only support one type of relocation (i.e. absolute pointer) which is enough
- *     for all use in functions as well as GOT slot (for "PLT" callback).
- *     Note that not all functions being cloned are assigned a slot.
- *     This array is sorted by the function indices.
- *     There can be more than one slot per-function,
- *     i.e. there can be duplicated function indices.
- *
- * # Target functions
- * `jl_dispatch_fvars_idxs`: [static data] Target-specific function indices.
- *     For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
- *     of the base target followed by an array of tagged function indices.
- *     The base target index is required to be smaller than the index of the current target
- *     and must be the default (`0`) or a `clone_all` target.
- *     If it's not `0`, the function pointer array for the `clone_all` target will be used as
- *     the base function pointer offsets instead.
- *     The tag bits for both the length and the indices are the top bit.
- *     A tagged length indicates that all of the functions are cloned and the indices follows
- *     are the ones that requires relocation. The base target index is omitted in this case.
- *     Otherwise, the length is the total number of functions that we are interested in
- *     for this target, which includes all cloned julia functions and
- *     all other cloned functions that requires relocation.
- *     A tagged index means that the function pointer should be filled into the GOT slots
- *     identified by `jl_dispatch_reloc_slots`. There could be more than one slot per function.
- *     (Note that a tagged index could corresponds to a functions pointer that's the same as
- *     the base one since this is the only way we currently represent relocations.)
- *     A tagged length implicitly tags all the indices and the indices will not have the tag bit
- *     set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
- * `jl_dispatch_fvars_offsets`: [static data] Target-specific function pointer offsets.
- *     This contains all the cloned functions that we are interested in and it needs to be decoded
- *     and used along with `jl_dispatch_fvars_idxs`.
- *     For the default target, there's no entries in this variable, if there's any relocations
- *     needed for the default target, the function pointers are taken from the global offset
- *     arrays directly.
- *     For a `clone_all` target (i.e. with the length in `jl_dispatch_fvars_idxs` tagged), this
- *     variable contains an offset array of the same length as the global one. Only the indices
- *     appearing in `jl_dispatch_fvars_idxs` need relocation and the dispatch code should return
- *     this array as the original/base function offsets.
- *     For other targets, this variable contains an offset array with the length defined in
- *     `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
- */
+// Image metadata
+// Every image exports a `jl_image_pointers_t` as a global symbol `jl_image_pointers`.
+// This symbol acts as a root for all other code-related symbols in the image.
 
 enum {
     JL_TARGET_VEC_CALL = 1 << 0,
@@ -155,6 +82,130 @@ typedef struct _jl_image_fptrs_t {
     const uint32_t *clone_idxs;
 } jl_image_fptrs_t;
 
+typedef struct {
+    uint64_t base;
+    uintptr_t *gvars_base;
+    const int32_t *gvars_offsets;
+    uint32_t ngvars;
+    jl_image_fptrs_t fptrs;
+    void **small_typeof;
+} jl_image_t;
+
+// The header for each image
+// Details important counts about the image
+typedef struct {
+    // The version of the image format
+    // Most up-to-date version is 1
+    uint32_t version;
+    // The number of shards in this image
+    uint32_t nshards;
+    // The total number of fvars in this image among all shards
+    uint32_t nfvars;
+    // The total number of gvars in this image among all shards
+    uint32_t ngvars;
+} jl_image_header_t;
+
+// Per-shard data for image shards. Each image contains header->nshards of these.
+typedef struct {
+
+    // This is the base function pointer
+    // (all other function pointers are stored as offsets to this address)
+    const char *fvar_base;
+    // The array of function pointer offsets (`int32_t`) from the base pointer.
+    // This includes all julia functions in sysimg as well as all other functions that are cloned.
+    // The default function pointer is used if the function is cloned.
+    // The first element is the size of the array, which should **NOT** be used as the number
+    // of julia functions in the sysimg.
+    // Each entry in this array uniquely identifies a function we are interested in
+    // (the function may have multiple function pointers corresponding to different versions).
+    // In other sysimg info, all references to functions are stored as their `uint32_t` index
+    // in this array.
+    const int32_t *fvar_offsets;
+    // This is the mapping of shard function index -> global function index
+    // staticdata.c relies on the same order of functions in the global function array being
+    // the same as what it saw when serializing the global function array. However, partitioning
+    // into multiple shards will cause functions to be reordered. This array is used to map
+    // back to the original function array for loading.
+    const uint32_t *fvar_idxs;
+    // This is the base data pointer
+    // (all other data pointers in this shard are stored as offsets to this address)
+    uintptr_t *gvar_base;
+    // This array of global variable offsets (`int32_t`) from the base pointer.
+    // Similar to fvar_offsets, but for gvars
+    const int32_t *gvar_offsets;
+    // This is the mapping of shard global variable index -> global global variable index
+    // Similar to fvar_idxs, but for gvars
+    const uint32_t *gvar_idxs;
+    // location and index of relocation slots.
+    // Stored as pairs of function indices and `int32_t` offsets from `jl_sysimg_gvars_base`.
+    // The first element is an `uint32_t` giving the number of relocations.
+    // This is needed for functions whose address is used in a way that requires dispatch.
+    // We currently only support one type of relocation (i.e. absolute pointer) which is enough
+    // for all use in functions as well as GOT slot (for "PLT" callback).
+    // Note that not all functions being cloned are assigned a slot.
+    // This array is sorted by the function indices.
+    // There can be more than one slot per-function,
+    // i.e. there can be duplicated function indices.
+    const int32_t *clone_slots;
+    //  Target-specific function pointer offsets.
+    //  This contains all the cloned functions that we are interested in and it needs to be decoded
+    //  and used along with `jl_dispatch_fvars_idxs`.
+    //  For the default target, there's no entries in this variable, if there's any relocations
+    //  needed for the default target, the function pointers are taken from the global offset
+    //  arrays directly.
+    //  For a `clone_all` target (i.e. with the length in `jl_dispatch_fvars_idxs` tagged), this
+    //  variable contains an offset array of the same length as the global one. Only the indices
+    //  appearing in `jl_dispatch_fvars_idxs` need relocation and the dispatch code should return
+    //  this array as the original/base function offsets.
+    //  For other targets, this variable contains an offset array with the length defined in
+    //  `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
+    const int32_t *clone_offsets;
+    //  Target-specific function indices.
+    //  For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
+    //  of the base target followed by an array of tagged function indices.
+    //  The base target index is required to be smaller than the index of the current target
+    //  and must be the default (`0`) or a `clone_all` target.
+    //  If it's not `0`, the function pointer array for the `clone_all` target will be used as
+    //  the base function pointer offsets instead.
+    //  The tag bits for both the length and the indices are the top bit.
+    //  A tagged length indicates that all of the functions are cloned and the indices follows
+    //  are the ones that requires relocation. The base target index is omitted in this case.
+    //  Otherwise, the length is the total number of functions that we are interested in
+    //  for this target, which includes all cloned julia functions and
+    //  all other cloned functions that requires relocation.
+    //  A tagged index means that the function pointer should be filled into the GOT slots
+    //  identified by `jl_dispatch_reloc_slots`. There could be more than one slot per function.
+    //  (Note that a tagged index could corresponds to a functions pointer that's the same as
+    //  the base one since this is the only way we currently represent relocations.)
+    //  A tagged length implicitly tags all the indices and the indices will not have the tag bit
+    //  set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
+    const uint32_t *clone_idxs;
+} jl_image_shard_t;
+
+// The TLS data for each image
+typedef struct {
+    void *pgcstack_func_slot;
+    void *pgcstack_key_slot;
+    size_t *tls_offset;
+} jl_image_ptls_t;
+
+//The root struct for images, points to all the other globals
+typedef struct {
+    // The image header, contains numerical global data
+    const jl_image_header_t *header;
+    // The shard table, contains per-shard data
+    const jl_image_shard_t *shards; // points to header->nshards length array
+    // The TLS data pointer
+    const jl_image_ptls_t *ptls;
+    // A copy of small_typeof[]
+    void **small_typeof;
+
+    //  serialized target data
+    //  This contains the number of targets
+    //  in addition to the name and feature set of each target.
+    const void *target_data;
+} jl_image_pointers_t;
+
 /**
  * Initialize the processor dispatch system with sysimg `hdl` (also initialize the sysimg itself).
  * The dispatch system will find the best implementation to be used in this session.
@@ -165,8 +216,8 @@ typedef struct _jl_image_fptrs_t {
  *
  * Return the data about the function pointers selected.
  */
-jl_image_fptrs_t jl_init_processor_sysimg(void *hdl);
-jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl);
+jl_image_t jl_init_processor_sysimg(void *hdl);
+jl_image_t jl_init_processor_pkgimg(void *hdl);
 
 // Return the name of the host CPU as a julia string.
 JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index 3e7b22caf00d4..0797fa4381f9d 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -1802,14 +1802,14 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
     return jl_cstr_to_string(host_cpu_name().c_str());
 }
 
-jl_image_fptrs_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(void *hdl)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl)
+jl_image_t jl_init_processor_pkgimg(void *hdl)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index c1353e1bb43b0..1aebde6dab90a 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -112,14 +112,14 @@ get_llvm_target_str(const TargetData<1> &data)
 
 using namespace Fallback;
 
-jl_image_fptrs_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(void *hdl)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl)
+jl_image_t jl_init_processor_pkgimg(void *hdl)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index c61712ada787a..30a6ff9b3dede 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -154,6 +154,9 @@ static constexpr FeatureDep deps[] = {
     {avx512vnni, avx512f},
     {avx512vp2intersect, avx512f},
     {avx512vpopcntdq, avx512f},
+    {avx512fp16, avx512bw},
+    {avx512fp16, avx512dq},
+    {avx512fp16, avx512vl},
     {amx_int8, amx_tile},
     {amx_bf16, amx_tile},
     {sse4a, sse3},
@@ -208,8 +211,8 @@ constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdi
 constexpr auto alderlake = skylake | get_feature_masks(clwb, sha, waitpkg, shstk, gfni, vaes, vpclmulqdq, pconfig,
                                                        rdpid, movdiri, pku, movdir64b, serialize, ptwrite, avxvnni);
 constexpr auto sapphirerapids = icelake_server |
-    get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, serialize, cldemote, waitpkg,
-                      ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);
+    get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, avx512fp16, serialize, cldemote, waitpkg,
+                      avxvnni, uintr, ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);
 
 constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
 constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
@@ -930,10 +933,10 @@ static void ensure_jit_target(bool imaging)
                                                   Feature::avx512pf, Feature::avx512er,
                                                   Feature::avx512cd, Feature::avx512bw,
                                                   Feature::avx512vl, Feature::avx512vbmi,
-                                                  Feature::avx512vpopcntdq,
+                                                  Feature::avx512vpopcntdq, Feature::avxvnni,
                                                   Feature::avx512vbmi2, Feature::avx512vnni,
                                                   Feature::avx512bitalg, Feature::avx512bf16,
-                                                  Feature::avx512vp2intersect};
+                                                  Feature::avx512vp2intersect, Feature::avx512fp16};
         for (auto fe: clone_math) {
             if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
                 t.en.flags |= JL_TARGET_CLONE_MATH;
@@ -946,6 +949,13 @@ static void ensure_jit_target(bool imaging)
                 break;
             }
         }
+        static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
     }
 }
 
@@ -1029,14 +1039,14 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
     return jl_cstr_to_string(host_cpu_name().c_str());
 }
 
-jl_image_fptrs_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(void *hdl)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-jl_image_fptrs_t jl_init_processor_pkgimg(void *hdl)
+jl_image_t jl_init_processor_pkgimg(void *hdl)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
diff --git a/src/rtutils.c b/src/rtutils.c
index dd606f38d065c..01ea11014a6db 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -129,6 +129,8 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error(const char *fname,
 
 JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var)
 {
+    if (!jl_undefvarerror_type)
+        jl_errorf("UndefVarError(%s)", jl_symbol_name(var));
     jl_throw(jl_new_struct(jl_undefvarerror_type, var));
 }
 
@@ -538,14 +540,23 @@ JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
 
 // toys for debugging ---------------------------------------------------------
 
-static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const char *opn, const char *cls) JL_NOTSAFEPOINT
+struct recur_list {
+    struct recur_list *prev;
+    jl_value_t *v;
+};
+
+static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+
+static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const char *opn, const char *cls, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     size_t i, n=0, len = jl_svec_len(t);
     n += jl_printf(out, "%s", head);
     n += jl_printf(out, "%s", opn);
     for (i = 0; i < len; i++) {
         jl_value_t *v = jl_svecref(t,i);
-        n += jl_static_show(out, v);
+        n += jl_static_show_x(out, v, 0, ctx);
         if (i != len-1)
             n += jl_printf(out, ", ");
     }
@@ -553,14 +564,6 @@ static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const
     return n;
 }
 
-struct recur_list {
-    struct recur_list *prev;
-    jl_value_t *v;
-};
-
-static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth) JL_NOTSAFEPOINT;
-static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth) JL_NOTSAFEPOINT;
-
 JL_DLLEXPORT int jl_id_start_char(uint32_t wc) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_id_char(uint32_t wc) JL_NOTSAFEPOINT;
 
@@ -697,7 +700,7 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
 // This is necessary to make sure that this function doesn't allocate any
 // memory through the Julia GC
 static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt,
-                                struct recur_list *depth) JL_NOTSAFEPOINT
+                                struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     size_t n = 0;
     if ((uintptr_t)vt < 4096U) {
@@ -705,7 +708,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if ((uintptr_t)v < 4096U) {
         n += jl_printf(out, "<?#%p::", (void*)v);
-        n += jl_static_show_x(out, (jl_value_t*)vt, depth);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         n += jl_printf(out, ">");
     }
     else if (vt == (jl_datatype_t*)jl_buff_tag) {
@@ -746,17 +749,17 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_static_show_func_sig(out, li->def.method->sig);
         }
         else {
-            n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth);
+            n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth, ctx);
             n += jl_printf(out, ".<toplevel thunk> -> ");
-            n += jl_static_show_x(out, jl_atomic_load_relaxed(&li->uninferred), depth);
+            n += jl_static_show_x(out, jl_atomic_load_relaxed(&li->uninferred), depth, ctx);
         }
     }
     else if (vt == jl_typename_type) {
-        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth);
+        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth, ctx);
         n += jl_printf(out, ".name");
     }
     else if (vt == jl_simplevector_type) {
-        n += jl_show_svec(out, (jl_svec_t*)v, "svec", "(", ")");
+        n += jl_show_svec(out, (jl_svec_t*)v, "svec", "(", ")", ctx);
     }
     else if (v == (jl_value_t*)jl_unionall_type) {
         // avoid printing `typeof(Type)` for `UnionAll`.
@@ -767,10 +770,10 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, "Vararg");
         if (vm->T) {
             n += jl_printf(out, "{");
-            n += jl_static_show_x(out, vm->T, depth);
+            n += jl_static_show_x(out, vm->T, depth, ctx);
             if (vm->N) {
                 n += jl_printf(out, ", ");
-                n += jl_static_show_x(out, vm->N, depth);
+                n += jl_static_show_x(out, vm->N, depth, ctx);
             }
             n += jl_printf(out, "}");
         }
@@ -797,7 +800,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
             if (taillen == tlen && taillen > 3) {
                 n += jl_printf(out, "NTuple{%d, ", tlen);
-                n += jl_static_show_x(out, jl_tparam0(dv), depth);
+                n += jl_static_show_x(out, jl_tparam0(dv), depth, ctx);
                 n += jl_printf(out, "}");
             }
             else {
@@ -805,22 +808,25 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 for (i = 0; i < (taillen > 3 ? tlen-taillen : tlen); i++) {
                     if (i > 0)
                         n += jl_printf(out, ", ");
-                    n += jl_static_show_x(out, jl_tparam(dv, i), depth);
+                    n += jl_static_show_x(out, jl_tparam(dv, i), depth, ctx);
                 }
                 if (taillen > 3) {
                     n += jl_printf(out, ", Vararg{");
-                    n += jl_static_show_x(out, jl_tparam(dv, tlen-1), depth);
+                    n += jl_static_show_x(out, jl_tparam(dv, tlen-1), depth, ctx);
                     n += jl_printf(out, ", %d}", taillen);
                 }
                 n += jl_printf(out, "}");
             }
             return n;
         }
+        if (ctx.quiet) {
+            return jl_printf(out, "%s", jl_symbol_name(dv->name->name));
+        }
         if (globfunc) {
             n += jl_printf(out, "typeof(");
         }
         if (jl_core_module && (dv->name->module != jl_core_module || !jl_module_exports_p(jl_core_module, sym))) {
-            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth);
+            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth, ctx);
             n += jl_printf(out, ".");
             size_t i = 0;
             if (globfunc && !jl_id_start_char(u8_nextchar(sn, &i))) {
@@ -841,7 +847,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, "{");
                 for (j = 0; j < tlen; j++) {
                     jl_value_t *p = jl_tparam(dv,j);
-                    n += jl_static_show_x(out, p, depth);
+                    n += jl_static_show_x(out, p, depth, ctx);
                     if (j != tlen-1)
                         n += jl_printf(out, ", ");
                 }
@@ -908,22 +914,22 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, "Union{");
         while (jl_is_uniontype(v)) {
             // tail-recurse on b to flatten the printing of the Union structure in the common case
-            n += jl_static_show_x(out, ((jl_uniontype_t*)v)->a, depth);
+            n += jl_static_show_x(out, ((jl_uniontype_t*)v)->a, depth, ctx);
             n += jl_printf(out, ", ");
             v = ((jl_uniontype_t*)v)->b;
         }
-        n += jl_static_show_x(out, v, depth);
+        n += jl_static_show_x(out, v, depth, ctx);
         n += jl_printf(out, "}");
     }
     else if (vt == jl_unionall_type) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
-        n += jl_static_show_x(out, ua->body, depth);
+        n += jl_static_show_x(out, ua->body, depth, ctx);
         n += jl_printf(out, " where ");
-        n += jl_static_show_x(out, (jl_value_t*)ua->var, depth->prev);
+        n += jl_static_show_x(out, (jl_value_t*)ua->var, depth->prev, ctx);
     }
     else if (vt == jl_typename_type) {
         n += jl_printf(out, "typename(");
-        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth);
+        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth, ctx);
         n += jl_printf(out, ")");
     }
     else if (vt == jl_tvar_type) {
@@ -943,7 +949,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             int ua = jl_is_unionall(lb);
             if (ua)
                 n += jl_printf(out, "(");
-            n += jl_static_show_x(out, lb, depth);
+            n += jl_static_show_x(out, lb, depth, ctx);
             if (ua)
                 n += jl_printf(out, ")");
             n += jl_printf(out, "<:");
@@ -955,7 +961,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_printf(out, "<:");
             if (ua)
                 n += jl_printf(out, "(");
-            n += jl_static_show_x(out, ub, depth);
+            n += jl_static_show_x(out, ub, depth, ctx);
             if (ua)
                 n += jl_printf(out, ")");
         }
@@ -963,7 +969,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     else if (vt == jl_module_type) {
         jl_module_t *m = (jl_module_t*)v;
         if (m->parent != m && m->parent != jl_main_module) {
-            n += jl_static_show_x(out, (jl_value_t*)m->parent, depth);
+            n += jl_static_show_x(out, (jl_value_t*)m->parent, depth, ctx);
             n += jl_printf(out, ".");
         }
         n += jl_printf(out, "%s", jl_symbol_name(m->name));
@@ -984,7 +990,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                        (uintptr_t)((jl_ssavalue_t*)v)->id);
     }
     else if (vt == jl_globalref_type) {
-        n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth);
+        n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth, ctx);
         char *name = jl_symbol_name(jl_globalref_name(v));
         n += jl_printf(out, jl_is_identifier(name) ? ".%s" : ".:(%s)", name);
     }
@@ -999,7 +1005,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         else {
             n += jl_printf(out, ":(");
         }
-        n += jl_static_show_x(out, qv, depth);
+        n += jl_static_show_x(out, qv, depth, ctx);
         if (!jl_is_symbol(qv)) {
             n += jl_printf(out, " end");
         }
@@ -1009,20 +1015,20 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_newvarnode_type) {
         n += jl_printf(out, "<newvar ");
-        n += jl_static_show_x(out, *(jl_value_t**)v, depth);
+        n += jl_static_show_x(out, *(jl_value_t**)v, depth, ctx);
         n += jl_printf(out, ">");
     }
     else if (vt == jl_linenumbernode_type) {
         n += jl_printf(out, "#= ");
-        n += jl_static_show_x(out, jl_linenode_file(v), depth);
+        n += jl_static_show_x(out, jl_linenode_file(v), depth, ctx);
         n += jl_printf(out, ":%" PRIuPTR " =#", jl_linenode_line(v));
     }
     else if (vt == jl_expr_type) {
         jl_expr_t *e = (jl_expr_t*)v;
         if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
-            n += jl_static_show_x(out, jl_exprarg(e,0), depth);
+            n += jl_static_show_x(out, jl_exprarg(e,0), depth, ctx);
             n += jl_printf(out, " = ");
-            n += jl_static_show_x(out, jl_exprarg(e,1), depth);
+            n += jl_static_show_x(out, jl_exprarg(e,1), depth, ctx);
         }
         else {
             char sep = ' ';
@@ -1030,14 +1036,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             size_t i, len = jl_array_len(e->args);
             for (i = 0; i < len; i++) {
                 n += jl_printf(out, ",%c", sep);
-                n += jl_static_show_x(out, jl_exprarg(e,i), depth);
+                n += jl_static_show_x(out, jl_exprarg(e,i), depth, ctx);
             }
             n += jl_printf(out, ")");
         }
     }
     else if (jl_array_type && jl_is_array_type(vt)) {
         n += jl_printf(out, "Array{");
-        n += jl_static_show_x(out, (jl_value_t*)jl_tparam0(vt), depth);
+        n += jl_static_show_x(out, (jl_value_t*)jl_tparam0(vt), depth, ctx);
         n += jl_printf(out, ", (");
         size_t i, ndims = jl_array_ndims(v);
         if (ndims == 1)
@@ -1072,13 +1078,13 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         for (j = 0; j < tlen; j++) {
             if (av->flags.ptrarray) {
                 jl_value_t **ptr = ((jl_value_t**)av->data) + j;
-                n += jl_static_show_x(out, *ptr, depth);
+                n += jl_static_show_x(out, *ptr, depth, ctx);
             }
             else {
                 char *ptr = ((char*)av->data) + j * av->elsize;
                 n += jl_static_show_x_(out, (jl_value_t*)ptr,
                         typetagdata ? (jl_datatype_t*)jl_nth_union_component(el_type, typetagdata[j]) : (jl_datatype_t*)el_type,
-                        depth);
+                        depth, ctx);
             }
             if (j != tlen - 1)
                 n += jl_printf(out, nlsep ? ",\n  " : ", ");
@@ -1087,16 +1093,16 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_loaderror_type) {
         n += jl_printf(out, "LoadError(at ");
-        n += jl_static_show_x(out, *(jl_value_t**)v, depth);
+        n += jl_static_show_x(out, *(jl_value_t**)v, depth, ctx);
         // Access the field directly to avoid allocation
         n += jl_printf(out, " line %" PRIdPTR, ((intptr_t*)v)[1]);
         n += jl_printf(out, ": ");
-        n += jl_static_show_x(out, ((jl_value_t**)v)[2], depth);
+        n += jl_static_show_x(out, ((jl_value_t**)v)[2], depth, ctx);
         n += jl_printf(out, ")");
     }
     else if (vt == jl_errorexception_type) {
         n += jl_printf(out, "ErrorException(");
-        n += jl_static_show_x(out, *(jl_value_t**)v, depth);
+        n += jl_static_show_x(out, *(jl_value_t**)v, depth, ctx);
         n += jl_printf(out, ")");
     }
     else if (jl_static_is_function_(vt) && is_globname_binding(v, (jl_datatype_t*)vt)) {
@@ -1106,7 +1112,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         int globfunc = is_globfunction(v, dv, &sym);
         int quote = 0;
         if (jl_core_module && (dv->name->module != jl_core_module || !jl_module_exports_p(jl_core_module, sym))) {
-            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth);
+            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth, ctx);
             n += jl_printf(out, ".");
 
             size_t i = 0;
@@ -1136,7 +1142,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, "NamedTuple");
         }
         else if (!istuple) {
-            n += jl_static_show_x(out, (jl_value_t*)vt, depth);
+            n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         }
         n += jl_printf(out, "(");
         size_t nb = jl_datatype_size(vt);
@@ -1159,7 +1165,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 size_t offs = jl_field_offset(vt, i);
                 char *fld_ptr = (char*)v + offs;
                 if (jl_field_isptr(vt, i)) {
-                    n += jl_static_show_x(out, *(jl_value_t**)fld_ptr, depth);
+                    n += jl_static_show_x(out, *(jl_value_t**)fld_ptr, depth, ctx);
                 }
                 else {
                     jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(vt, i);
@@ -1167,7 +1173,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                         uint8_t sel = ((uint8_t*)fld_ptr)[jl_field_size(vt, i) - 1];
                         ft = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)ft, sel);
                     }
-                    n += jl_static_show_x_(out, (jl_value_t*)fld_ptr, ft, depth);
+                    n += jl_static_show_x_(out, (jl_value_t*)fld_ptr, ft, depth, ctx);
                 }
                 if ((istuple || isnamedtuple) && tlen == 1)
                     n += jl_printf(out, ",");
@@ -1177,26 +1183,26 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             if (vt == jl_typemap_entry_type) {
                 n += jl_printf(out, ", next=↩︎\n  ");
                 jl_value_t *next = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)v)->next);
-                n += jl_static_show_next_(out, next, v, depth);
+                n += jl_static_show_next_(out, next, v, depth, ctx);
             }
         }
         n += jl_printf(out, ")");
     }
     else {
         n += jl_printf(out, "<?#%p::", (void*)v);
-        n += jl_static_show_x(out, (jl_value_t*)vt, depth);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         n += jl_printf(out, ">");
     }
     return n;
 }
 
-static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth) JL_NOTSAFEPOINT
+static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     // show values without calling a julia method or allocating through the GC
-    return jl_static_show_next_(out, v, NULL, depth);
+    return jl_static_show_next_(out, v, NULL, depth, ctx);
 }
 
-static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth) JL_NOTSAFEPOINT
+static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     // helper for showing a typemap list by following the next pointers
     // while being careful about avoiding any recursion due to malformed (circular) references
@@ -1211,13 +1217,13 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
                       *newdepth = &this_item,
                       *p = depth;
     while (p) {
-        if (jl_typeis(v, jl_typemap_entry_type) && newdepth == &this_item) {
+        if (jl_typetagis(v, jl_typemap_entry_type) && newdepth == &this_item) {
             jl_value_t *m = p->v;
             unsigned nid = 1;
-            while (m && jl_typeis(m, jl_typemap_entry_type)) {
+            while (m && jl_typetagis(m, jl_typemap_entry_type)) {
                 if (m == v) {
                     return jl_printf(out, "<typemap reference #%u @-%u ", nid, dist) +
-                           jl_static_show_x(out, (jl_value_t*)((jl_typemap_entry_t*)m)->sig, depth) +
+                           jl_static_show_x(out, (jl_value_t*)((jl_typemap_entry_t*)m)->sig, depth, ctx) +
                            jl_printf(out, ">");
                 }
                 if (m == prev) {
@@ -1230,7 +1236,7 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
                 jl_value_t *m2 = p->v;
                 if (m2 == mnext)
                     break;
-                while (m2 && jl_typeis(m2, jl_typemap_entry_type)) {
+                while (m2 && jl_typetagis(m2, jl_typemap_entry_type)) {
                     jl_value_t *mnext2 = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)m2)->next);
                     if (mnext2 == mnext) {
                         if (m2 != m)
@@ -1248,15 +1254,22 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
         dist++;
         p = p->prev;
     }
-    return jl_static_show_x_(out, v, (jl_datatype_t*)jl_typeof(v), newdepth);
+    return jl_static_show_x_(out, v, (jl_datatype_t*)jl_typeof(v), newdepth, ctx);
 }
 
 JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_static_show_x(out, v, 0);
+    jl_static_show_config_t ctx = { /* quiet */ 0 };
+    return jl_static_show_x(out, v, 0, ctx);
 }
 
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT
+{
+    jl_static_show_config_t ctx = { /* quiet */ 0 };
+    return jl_static_show_func_sig_(s, type, ctx);
+}
+
+size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     size_t n = 0;
     size_t i;
@@ -1286,7 +1299,7 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
     }
     else {
         n += jl_printf(s, "(::");
-        n += jl_static_show_x(s, ftype, depth);
+        n += jl_static_show_x(s, ftype, depth, ctx);
         n += jl_printf(s, ")");
     }
     size_t tl = jl_nparams(type);
@@ -1294,7 +1307,7 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
     for (i = 1; i < tl; i++) {
         jl_value_t *tp = jl_tparam(type, i);
         if (i != tl - 1) {
-            n += jl_static_show_x(s, tp, depth);
+            n += jl_static_show_x(s, tp, depth, ctx);
             n += jl_printf(s, ", ");
         }
         else {
@@ -1302,13 +1315,13 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
                 tp = jl_unwrap_vararg(tp);
                 if (jl_is_unionall(tp))
                     n += jl_printf(s, "(");
-                n += jl_static_show_x(s, tp, depth);
+                n += jl_static_show_x(s, tp, depth, ctx);
                 if (jl_is_unionall(tp))
                     n += jl_printf(s, ")");
                 n += jl_printf(s, "...");
             }
             else {
-                n += jl_static_show_x(s, tp, depth);
+                n += jl_static_show_x(s, tp, depth, ctx);
             }
         }
     }
@@ -1320,7 +1333,7 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
         while (jl_is_unionall(tvars)) {
             if (!first)
                 n += jl_printf(s, ", ");
-            n += jl_static_show_x(s, (jl_value_t*)tvars->var, first ? NULL : depth);
+            n += jl_static_show_x(s, (jl_value_t*)tvars->var, first ? NULL : depth,  ctx);
             tvars = (jl_unionall_t*)tvars->body;
             if (!first)
                 depth += 1;
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index e3543c9f62656..23793254c205d 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -31,14 +31,12 @@ void *jl_get_library_(const char *f_lib, int throw_err)
 {
     if (f_lib == NULL)
         return jl_RTLD_DEFAULT_handle;
-#ifdef _OS_WINDOWS_
     if (f_lib == JL_EXE_LIBNAME)
         return jl_exe_handle;
     if (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME)
         return jl_libjulia_internal_handle;
     if (f_lib == JL_LIBJULIA_DL_LIBNAME)
         return jl_libjulia_handle;
-#endif
     JL_LOCK(&libmap_lock);
     // This is the only operation we do on the map, which doesn't invalidate
     // any references or iterators.
@@ -362,6 +360,6 @@ JL_GCC_IGNORE_STOP
 
 void jl_init_runtime_ccall(void)
 {
-    JL_MUTEX_INIT(&libmap_lock);
+    JL_MUTEX_INIT(&libmap_lock, "libmap_lock");
     uv_mutex_init(&trampoline_lock);
 }
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 40ba036edebfd..9babdf89f098b 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -490,14 +490,14 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 
     char *f_lib = NULL;
     if (jl_is_tuple(v) && jl_nfields(v) > 1) {
-        jl_value_t *t1 = jl_fieldref_noalloc(v, 1);
-        v = jl_fieldref(v, 0);
+        jl_value_t *t1 = jl_fieldref(v, 1);
         if (jl_is_symbol(t1))
             f_lib = jl_symbol_name((jl_sym_t*)t1);
         else if (jl_is_string(t1))
             f_lib = jl_string_data(t1);
         else
             JL_TYPECHK(cglobal, symbol, t1)
+        v = jl_fieldref(v, 0);
     }
 
     char *f_name = NULL;
@@ -508,10 +508,8 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
     else
         JL_TYPECHK(cglobal, symbol, v)
 
-#ifdef _OS_WINDOWS_
     if (!f_lib)
-        f_lib = (char*)jl_dlfind_win32(f_name);
-#endif
+        f_lib = (char*)jl_dlfind(f_name);
 
     void *ptr;
     jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
@@ -717,7 +715,7 @@ SELECTOR_FUNC(intrinsic_1)
 #define un_iintrinsic(name, u) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
 { \
-    return jl_iintrinsic_1(jl_typeof(a), a, #name, u##signbitbyte, jl_intrinsiclambda_ty1, name##_list); \
+    return jl_iintrinsic_1(a, #name, u##signbitbyte, jl_intrinsiclambda_ty1, name##_list); \
 }
 #define un_iintrinsic_fast(LLVMOP, OP, name, u) \
 un_iintrinsic_ctype(OP, name, 8, u##int##8_t) \
@@ -743,7 +741,7 @@ SELECTOR_FUNC(intrinsic_u1)
 #define uu_iintrinsic(name, u) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
 { \
-    return jl_iintrinsic_1(jl_typeof(a), a, #name, u##signbitbyte, jl_intrinsiclambda_u1, name##_list); \
+    return jl_iintrinsic_1(a, #name, u##signbitbyte, jl_intrinsiclambda_u1, name##_list); \
 }
 #define uu_iintrinsic_fast(LLVMOP, OP, name, u) \
 uu_iintrinsic_ctype(OP, name, 8, u##int##8_t) \
@@ -765,14 +763,13 @@ static const select_intrinsic_u1_t name##_list = { \
 uu_iintrinsic(name, u)
 
 static inline
-jl_value_t *jl_iintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name,
+jl_value_t *jl_iintrinsic_1(jl_value_t *a, const char *name,
                             char (*getsign)(void*, unsigned),
                             jl_value_t *(*lambda1)(jl_value_t*, void*, unsigned, unsigned, const void*), const void *list)
 {
-    if (!jl_is_primitivetype(jl_typeof(a)))
-        jl_errorf("%s: value is not a primitive type", name);
+    jl_value_t *ty = jl_typeof(a);
     if (!jl_is_primitivetype(ty))
-        jl_errorf("%s: type is not a primitive type", name);
+        jl_errorf("%s: value is not a primitive type", name);
     void *pa = jl_data_ptr(a);
     unsigned isize = jl_datatype_size(jl_typeof(a));
     unsigned isize2 = next_power_of_two(isize);
@@ -833,11 +830,12 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
 
 static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const char *name, intrinsic_cvt_t op)
 {
+    JL_TYPECHKS(name, datatype, ty);
+    if (!jl_is_concrete_type(ty) || !jl_is_primitivetype(ty))
+        jl_errorf("%s: target type not a leaf primitive type", name);
     jl_value_t *aty = jl_typeof(a);
     if (!jl_is_primitivetype(aty))
         jl_errorf("%s: value is not a primitive type", name);
-    if (!jl_is_primitivetype(ty))
-        jl_errorf("%s: type is not a primitive type", name);
     void *pa = jl_data_ptr(a);
     unsigned isize = jl_datatype_size(aty);
     unsigned osize = jl_datatype_size(ty);
@@ -1035,7 +1033,7 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v
     jl_value_t *params[2];
     params[0] = ty;
     params[1] = (jl_value_t*)jl_bool_type;
-    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
     JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
     jl_task_t *ct = jl_current_task;
     jl_value_t *newv = jl_gc_alloc(ct->ptls, jl_datatype_size(tuptyp), tuptyp);
@@ -1182,7 +1180,6 @@ bi_fintrinsic(add,add_float)
 bi_fintrinsic(sub,sub_float)
 bi_fintrinsic(mul,mul_float)
 bi_fintrinsic(div,div_float)
-bi_fintrinsic(frem,rem_float)
 
 // ternary operators //
 // runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
diff --git a/src/safepoint.c b/src/safepoint.c
index 1ff26d616a5d8..19eca4bf6f00d 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -150,8 +150,10 @@ void jl_safepoint_end_gc(void)
 
 void jl_safepoint_wait_gc(void)
 {
+    jl_task_t *ct = jl_current_task; (void)ct;
+    JL_TIMING_SUSPEND(GC_SAFEPOINT, ct);
     // The thread should have set this is already
-    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) != 0);
+    assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0);
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 2b1da43b71f63..073ab2ebc33a6 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -36,6 +36,9 @@ extern int _keymgr_set_lockmode_processwide_ptr(unsigned int key, unsigned int m
 extern void _dyld_atfork_prepare(void) __attribute__((weak_import));
 extern void _dyld_atfork_parent(void) __attribute__((weak_import));
 //extern void _dyld_fork_child(void) __attribute__((weak_import));
+extern void _dyld_dlopen_atfork_prepare(void) __attribute__((weak_import));
+extern void _dyld_dlopen_atfork_parent(void) __attribute__((weak_import));
+//extern void _dyld_dlopen_atfork_child(void) __attribute__((weak_import));
 
 static void attach_exception_port(thread_port_t thread, int segv_only);
 
@@ -104,7 +107,7 @@ void *mach_segv_listener(void *arg)
 }
 
 
-static void allocate_mach_handler()
+static void allocate_mach_handler(void)
 {
     // ensure KEYMGR_GCC3_DW2_OBJ_LIST is initialized, as this requires malloc
     // and thus can deadlock when used without first initializing it.
@@ -564,7 +567,12 @@ static int jl_lock_profile_mach(int dlsymlock)
     // workaround for old keymgr bugs
     void *unused = NULL;
     int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
-    // workaround for new dlsym4 bugs (API and bugs introduced in macOS 12.1)
+    // workaround for new dlsym4 bugs in the workaround for dlsym bugs: _dyld_atfork_prepare
+    // acquires its locks in the wrong order, but fortunately we happen to able to guard it
+    // with this call to force it to prevent that TSAN violation from causing a deadlock
+    if (dlsymlock && _dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_prepare();
+    // workaround for new dlsym4 bugs (API and bugs introduced circa macOS 12.1)
     if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
         _dyld_atfork_prepare();
     return keymgr_locked;
@@ -572,8 +580,10 @@ static int jl_lock_profile_mach(int dlsymlock)
 
 static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
 {
-    if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL) \
-        _dyld_atfork_parent(); \
+    if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_parent();
+    if (dlsymlock && _dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_parent();
     if (keymgr_locked)
         _keymgr_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
     jl_unlock_profile();
@@ -611,15 +621,19 @@ void *mach_profile_listener(void *arg)
                 break;
             }
 
+            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+                _dyld_dlopen_atfork_prepare();
             if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
                 _dyld_atfork_prepare(); // briefly acquire the dlsym lock
             host_thread_state_t state;
-            if (!jl_thread_suspend_and_get_state2(i, &state))
-                continue;
+            int valid_thread = jl_thread_suspend_and_get_state2(i, &state);
             unw_context_t *uc = (unw_context_t*)&state;
             if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
                 _dyld_atfork_parent(); // quickly release the dlsym lock
-
+            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+                _dyld_dlopen_atfork_parent();
+            if (!valid_thread)
+                continue;
             if (running) {
 #ifdef LLVMLIBUNWIND
                 /*
diff --git a/src/signals-unix.c b/src/signals-unix.c
index 6ed664199fd2b..4c21d25d3622c 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -27,9 +27,7 @@
 #ifdef __APPLE__ // Darwin's mach ports allow signal-free thread management
 #define HAVE_MACH
 #define HAVE_KEVENT
-#elif defined(__FreeBSD__) // generic bsd
-#define HAVE_ITIMER
-#else // generic linux
+#else // generic Linux or BSD
 #define HAVE_TIMER
 #endif
 
@@ -301,8 +299,8 @@ int is_write_fault(void *context) {
 }
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
 struct linux_aarch64_ctx_header {
-	uint32_t magic;
-	uint32_t size;
+    uint32_t magic;
+    uint32_t size;
 };
 const uint32_t linux_esr_magic = 0x45535201;
 
@@ -597,37 +595,6 @@ JL_DLLEXPORT void jl_profile_stop_timer(void)
     }
 }
 
-#elif defined(HAVE_ITIMER)
-// BSD-style timers
-#include <string.h>
-#include <sys/time.h>
-struct itimerval timerprof;
-
-JL_DLLEXPORT int jl_profile_start_timer(void)
-{
-    timerprof.it_interval.tv_sec = 0;
-    timerprof.it_interval.tv_usec = 0;
-    timerprof.it_value.tv_sec = nsecprof / GIGA;
-    timerprof.it_value.tv_usec = ((nsecprof % GIGA) + 999) / 1000;
-    // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
-    running = 1;
-    if (setitimer(ITIMER_PROF, &timerprof, NULL) == -1) {
-        running = 0;
-        return -3;
-    }
-    return 0;
-}
-
-JL_DLLEXPORT void jl_profile_stop_timer(void)
-{
-    if (running) {
-        memset(&timerprof, 0, sizeof(timerprof));
-        setitimer(ITIMER_PROF, &timerprof, NULL);
-        last_timer_delete_time = jl_hrtime();
-        running = 0;
-    }
-}
-
 #else
 
 #error no profile tools available
@@ -654,7 +621,7 @@ static void allocate_segv_handler(void)
 static void *alloc_sigstack(size_t *ssize)
 {
     void *stk = jl_malloc_stack(ssize, NULL);
-    if (stk == MAP_FAILED)
+    if (stk == NULL)
         jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
     return stk;
 }
@@ -678,7 +645,7 @@ void jl_install_thread_signal_handler(jl_ptls_t ptls)
 }
 
 const static int sigwait_sigs[] = {
-    SIGINT, SIGTERM, SIGABRT, SIGQUIT,
+    SIGINT, SIGTERM, SIGQUIT,
 #ifdef SIGINFO
     SIGINFO,
 #else
@@ -686,8 +653,6 @@ const static int sigwait_sigs[] = {
 #endif
 #if defined(HAVE_TIMER)
     SIGUSR1,
-#elif defined(HAVE_ITIMER)
-    SIGPROF,
 #endif
     0
 };
@@ -802,11 +767,9 @@ static void *signal_listener(void *arg)
         profile = (sig == SIGUSR1);
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L
         if (profile && !(info.si_code == SI_TIMER &&
-	            info.si_value.sival_ptr == &timerprof))
+                info.si_value.sival_ptr == &timerprof))
             profile = 0;
 #endif
-#elif defined(HAVE_ITIMER)
-        profile = (sig == SIGPROF);
 #endif
 #endif
 
@@ -954,8 +917,6 @@ static void *signal_listener(void *arg)
             jl_check_profile_autostop();
 #if defined(HAVE_TIMER)
             timer_settime(timerprof, 0, &itsprof, NULL);
-#elif defined(HAVE_ITIMER)
-            setitimer(ITIMER_PROF, &timerprof, NULL);
 #endif
         }
 #endif
@@ -1089,11 +1050,6 @@ void jl_install_default_signal_handlers(void)
     }
     // need to ensure the following signals are not SIG_IGN, even though they will be blocked
     act_die.sa_flags = SA_SIGINFO | SA_RESTART | SA_RESETHAND;
-#if defined(HAVE_ITIMER)
-    if (sigaction(SIGPROF, &act_die, NULL) < 0) {
-        jl_errorf("fatal error: sigaction: %s", strerror(errno));
-    }
-#endif
 #ifdef SIGINFO
     if (sigaction(SIGINFO, &act_die, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
diff --git a/src/signals-win.c b/src/signals-win.c
index f20a4d5287669..5dd6b34558ca6 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -483,11 +483,15 @@ void jl_install_default_signal_handlers(void)
 
 void jl_install_thread_signal_handler(jl_ptls_t ptls)
 {
-    size_t ssize = sig_stack_size;
-    void *stk = jl_malloc_stack(&ssize, NULL);
-    collect_backtrace_fiber.uc_stack.ss_sp = (void*)stk;
-    collect_backtrace_fiber.uc_stack.ss_size = ssize;
-    jl_makecontext(&collect_backtrace_fiber, start_backtrace_fiber);
-    uv_mutex_init(&backtrace_lock);
-    have_backtrace_fiber = 1;
+    if (!have_backtrace_fiber) {
+        size_t ssize = sig_stack_size;
+        void *stk = jl_malloc_stack(&ssize, NULL);
+        if (stk == NULL)
+            jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
+        collect_backtrace_fiber.uc_stack.ss_sp = (void*)stk;
+        collect_backtrace_fiber.uc_stack.ss_size = ssize;
+        jl_makecontext(&collect_backtrace_fiber, start_backtrace_fiber);
+        uv_mutex_init(&backtrace_lock);
+        have_backtrace_fiber = 1;
+    }
 }
diff --git a/src/simplevector.c b/src/simplevector.c
index cb65646e00936..65217715ae55f 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -23,6 +23,7 @@ jl_svec_t *(jl_perm_symsvec)(size_t n, ...)
 {
     if (n == 0) return jl_emptysvec;
     jl_svec_t *jv = (jl_svec_t*)jl_gc_permobj((n + 1) * sizeof(void*), jl_simplevector_type);
+    jl_set_typetagof(jv, jl_simplevector_tag, jl_astaggedvalue(jv)->bits.gc);
     jl_svec_set_len_unsafe(jv, n);
     va_list args;
     va_start(args, n);
@@ -37,6 +38,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec1(void *a)
     jl_task_t *ct = jl_current_task;
     jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 2,
                                            jl_simplevector_type);
+    jl_set_typetagof(v, jl_simplevector_tag, 0);
     jl_svec_set_len_unsafe(v, 1);
     jl_svec_data(v)[0] = (jl_value_t*)a;
     return v;
@@ -47,6 +49,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b)
     jl_task_t *ct = jl_current_task;
     jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 3,
                                            jl_simplevector_type);
+    jl_set_typetagof(v, jl_simplevector_tag, 0);
     jl_svec_set_len_unsafe(v, 2);
     jl_svec_data(v)[0] = (jl_value_t*)a;
     jl_svec_data(v)[1] = (jl_value_t*)b;
@@ -59,6 +62,7 @@ JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n)
     if (n == 0) return jl_emptysvec;
     jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ct->ptls, (n + 1) * sizeof(void*),
                                             jl_simplevector_type);
+    jl_set_typetagof(jv, jl_simplevector_tag, 0);
     jl_svec_set_len_unsafe(jv, n);
     return jv;
 }
diff --git a/src/stackwalk.c b/src/stackwalk.c
index caf0705b85be7..18bf4b2126938 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -321,6 +321,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
 
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 {
+    JL_TIMING(STACKWALK, STACKWALK_Backtrace);
     jl_excstack_t *s = jl_current_task->excstack;
     jl_bt_element_t *bt_data = NULL;
     size_t bt_size = 0;
@@ -343,6 +344,7 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries)
 {
     JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
+    JL_TIMING(STACKWALK, STACKWALK_Excstack);
     jl_task_t *ct = jl_current_task;
     if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) {
         jl_error("Inspecting the exception stack of a task which might "
@@ -671,7 +673,7 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
             while (debuginfoloc != 0) {
                 jl_line_info_node_t *locinfo = (jl_line_info_node_t*)
                     jl_array_ptr_ref(src->linetable, debuginfoloc - 1);
-                assert(jl_typeis(locinfo, jl_lineinfonode_type));
+                assert(jl_typetagis(locinfo, jl_lineinfonode_type));
                 const char *func_name = "Unknown";
                 jl_value_t *method = locinfo->method;
                 if (jl_is_method_instance(method))
diff --git a/src/staticdata.c b/src/staticdata.c
index 16b4791bb4200..49b97480b5165 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -118,7 +118,6 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_slotnumber_type);
         INSERT_TAG(jl_simplevector_type);
         INSERT_TAG(jl_array_type);
-        INSERT_TAG(jl_typedslot_type);
         INSERT_TAG(jl_expr_type);
         INSERT_TAG(jl_binding_type);
         INSERT_TAG(jl_globalref_type);
@@ -165,7 +164,6 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_abstractstring_type);
         INSERT_TAG(jl_array_any_type);
         INSERT_TAG(jl_intrinsic_type);
-        INSERT_TAG(jl_abstractslot_type);
         INSERT_TAG(jl_methtable_type);
         INSERT_TAG(jl_typemap_level_type);
         INSERT_TAG(jl_typemap_entry_type);
@@ -241,7 +239,9 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_typeinf_func);
         INSERT_TAG(jl_type_type_mt);
         INSERT_TAG(jl_nonfunction_mt);
+        INSERT_TAG(jl_kwcall_mt);
         INSERT_TAG(jl_kwcall_func);
+        INSERT_TAG(jl_opaque_closure_method);
 
         // some Core.Builtin Functions that we want to be able to reference:
         INSERT_TAG(jl_builtin_throw);
@@ -303,12 +303,128 @@ static arraylist_t layout_table;     // cache of `position(s)` for each `id` in
 static arraylist_t object_worklist;  // used to mimic recursion by jl_serialize_reachable
 
 // Permanent list of void* (begin, end+1) pairs of system/package images we've loaded previously
-// togther with their module build_ids (used for external linkage)
-// jl_linkage_blobs.items[2i:2i+1] correspond to jl_build_ids[i]   (0-offset indexing)
-// TODO: Keep this sorted so that we can use binary-search
+// together with their module build_ids (used for external linkage)
+// jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i]   (0-offset indexing)
 arraylist_t jl_linkage_blobs;
 arraylist_t jl_image_relocs;
-jl_array_t *jl_build_ids JL_GLOBALLY_ROOTED = NULL;
+
+// Eytzinger tree of images. Used for very fast jl_object_in_image queries
+// See https://algorithmica.org/en/eytzinger
+arraylist_t eytzinger_image_tree;
+arraylist_t eytzinger_idxs;
+static uintptr_t img_min;
+static uintptr_t img_max;
+
+static int ptr_cmp(const void *l, const void *r)
+{
+    uintptr_t left = *(const uintptr_t*)l;
+    uintptr_t right = *(const uintptr_t*)r;
+    return (left > right) - (left < right);
+}
+
+// Build an eytzinger tree from a sorted array
+static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
+{
+    if (k <= n) {
+        i = eytzinger(src, dest, i, 2 * k, n);
+        dest[k-1] = src[i];
+        i++;
+        i = eytzinger(src, dest, i, 2 * k + 1, n);
+    }
+    return i;
+}
+
+static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
+{
+    size_t n = eytzinger_image_tree.len - 1;
+    if (n == 0)
+        return n;
+    assert(n % 2 == 0 && "Eytzinger tree not even length!");
+    uintptr_t cmp = (uintptr_t) obj;
+    if (cmp <= img_min || cmp > img_max)
+        return n;
+    uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
+    size_t k = 1;
+    // note that k preserves the history of how we got to the current node
+    while (k <= n) {
+        int greater = (cmp > tree[k - 1]);
+        k <<= 1;
+        k |= greater;
+    }
+    // Free to assume k is nonzero, since we start with k = 1
+    // and cmp > gc_img_min
+    // This shift does a fast revert of the path until we get
+    // to a node that evaluated less than cmp.
+    k >>= (__builtin_ctzll(k) + 1);
+    assert(k != 0);
+    assert(k <= n && "Eytzinger tree index out of bounds!");
+    assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
+    return k - 1;
+}
+
+//used in staticdata.c after we add an image
+void rebuild_image_blob_tree(void)
+{
+    size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
+    assert(eytzinger_idxs.len == eytzinger_image_tree.len);
+    assert(eytzinger_idxs.max == eytzinger_image_tree.max);
+    arraylist_grow(&eytzinger_idxs, inc);
+    arraylist_grow(&eytzinger_image_tree, inc);
+    eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
+    eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
+    for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
+        assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
+        // We abuse the pointer here a little so that a couple of properties are true:
+        // 1. a start and an end are never the same value. This simplifies the binary search.
+        // 2. ends are always after starts. This also simplifies the binary search.
+        // We assume that there exist no 0-size blobs, but that's a safe assumption
+        // since it means nothing could be there anyways
+        uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
+        eytzinger_idxs.items[i] = (void*)(val + (i & 1));
+    }
+    qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
+    img_min = (uintptr_t) eytzinger_idxs.items[0];
+    img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
+    eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
+    // Reuse the scratch memory to store the indices
+    // Still O(nlogn) because binary search
+    for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
+        uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
+        // This is the same computation as in the prior for loop
+        uintptr_t eyt_val = val + (i & 1);
+        size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
+        assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
+        if (i & 1)
+            eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
+        else
+            eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
+    }
+}
+
+static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
+{
+    assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
+    int idx = eyt_obj_idx(obj);
+    // Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
+    // start (0) or an end (1) of a blob. If it's a start, then the object is
+    // in the image, otherwise it is not.
+    int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
+    return in_image;
+}
+
+size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
+    int eyt_idx = eyt_obj_idx(v);
+    // We fill the invalid slots with the length, so we can just return that
+    size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
+    return idx;
+}
+
+uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
+{
+    return eyt_obj_in_img(obj);
+}
 
 // hash of definitions for predefined function pointers
 static htable_t fptr_to_id;
@@ -317,13 +433,6 @@ void *native_functions;   // opaque jl_native_code_desc_t blob used for fetching
 // table of struct field addresses to rewrite during saving
 static htable_t field_replace;
 
-typedef struct {
-    uint64_t base;
-    uintptr_t *gvars_base;
-    int32_t *gvars_offsets;
-    jl_image_fptrs_t fptrs;
-} jl_image_t;
-
 // array of definitions for the predefined function pointers
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
@@ -344,7 +453,7 @@ static const jl_fptr_args_t id_to_fptrs[] = {
 
 typedef struct {
     ios_t *s;                   // the main stream
-    ios_t *const_data;          // codegen-invisible internal data (e.g., datatype layouts, list-like typename fields, foreign types, internal arrays)
+    ios_t *const_data;          // GC-invisible internal data (e.g., datatype layouts, list-like typename fields, foreign types, internal arrays)
     ios_t *symbols;             // names (char*) of symbols (some may be referenced by pointer in generated code)
     ios_t *relocs;              // for (de)serializing relocs_list and gctags_list
     ios_t *gvar_record;         // serialized array mapping gvid => spos
@@ -356,9 +465,11 @@ typedef struct {
     arraylist_t fixup_types;    // a list of locations of types requiring (re)caching
     arraylist_t fixup_objs;     // a list of locations of objects requiring (re)caching
     arraylist_t ccallable_list; // @ccallable entry points to install
+    // mapping from a buildid_idx to a depmods_idx
+    jl_array_t *buildid_depmods_idxs;
     // record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg
     // conceptually, the base pointer for the jth externally-linked item is determined from
-    //     i = findfirst(==(link_ids[j]), jl_build_ids)
+    //     i = findfirst(==(link_ids[j]), build_ids)
     //     blob_base = jl_linkage_blobs.items[2i]                     # 0-offset indexing
     // We need separate lists since they are intermingled at creation but split when written.
     jl_array_t *link_ids_relocs;
@@ -377,6 +488,16 @@ static jl_value_t *jl_bigint_type = NULL;
 static int gmp_limb_size = 0;
 static jl_sym_t *jl_docmeta_sym = NULL;
 
+#ifdef _P64
+#define RELOC_TAG_OFFSET 61
+#define DEPS_IDX_OFFSET 40    // only on 64-bit can we encode the dependency-index as part of the tagged reloc
+#else
+// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
+#define RELOC_TAG_OFFSET 29
+#define DEPS_IDX_OFFSET RELOC_TAG_OFFSET
+#endif
+
+
 // Tags of category `t` are located at offsets `t << RELOC_TAG_OFFSET`
 // Consequently there is room for 2^RELOC_TAG_OFFSET pointers, etc
 enum RefTags {
@@ -384,9 +505,9 @@ enum RefTags {
     ConstDataRef,       // constant data (e.g., layouts)
     TagRef,             // items serialized via their tags
     SymbolRef,          // symbols
-    FunctionRef,        // generic functions
-    BuiltinFunctionRef, // builtin functions
-    ExternalLinkage     // items defined externally (used when serializing packages)
+    FunctionRef,        // functions
+    SysimageLinkage,    // reference to the sysimage (from pkgimage)
+    ExternalLinkage     // reference to some other pkgimage
 };
 
 // calling conventions for internal entry points.
@@ -401,13 +522,9 @@ typedef enum {
     JL_API_MAX
 } jl_callingconv_t;
 
+// Sub-divisions of some RefTags
+const uintptr_t BuiltinFunctionTag = ((uintptr_t)1 << (RELOC_TAG_OFFSET - 1));
 
-#ifdef _P64
-#define RELOC_TAG_OFFSET 61
-#else
-// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
-#define RELOC_TAG_OFFSET 29
-#endif
 
 #if RELOC_TAG_OFFSET <= 32
 typedef uint32_t reloc_t;
@@ -440,7 +557,7 @@ typedef struct {
 static void *jl_sysimg_handle = NULL;
 static jl_image_t sysimage;
 
-static inline uintptr_t *sysimg_gvars(uintptr_t *base, int32_t *offsets, size_t idx)
+static inline uintptr_t *sysimg_gvars(uintptr_t *base, const int32_t *offsets, size_t idx)
 {
     return base + offsets[idx] / sizeof(base[0]);
 }
@@ -455,32 +572,7 @@ static void jl_load_sysimg_so(void)
     int imaging_mode = jl_generating_output() && !jl_options.incremental;
     // in --build mode only use sysimg data, not precompiled native code
     if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) {
-        jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimage.gvars_base, 1);
-        jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimage.gvars_offsets, 1);
-        sysimage.gvars_offsets += 1;
         assert(sysimage.fptrs.base);
-
-        void *pgcstack_func_slot;
-        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1);
-        void *pgcstack_key_slot;
-        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1);
-        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
-
-        size_t *tls_offset_idx;
-        jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1);
-        *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
-
-#ifdef _OS_WINDOWS_
-        sysimage.base = (intptr_t)jl_sysimg_handle;
-#else
-        Dl_info dlinfo;
-        if (dladdr((void*)sysimage.gvars_base, &dlinfo) != 0) {
-            sysimage.base = (intptr_t)dlinfo.dli_fbase;
-        }
-        else {
-            sysimage.base = 0;
-        }
-#endif
     }
     else {
         memset(&sysimage.fptrs, 0, sizeof(sysimage.fptrs));
@@ -507,20 +599,20 @@ static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) JL_NOTS
     if (v == NULL || jl_is_symbol(v) || v == jl_nothing) {
         return 0;
     }
-    else if (jl_typeis(v, jl_int64_type)) {
+    else if (jl_typetagis(v, jl_int64_tag << 4)) {
         int64_t i64 = *(int64_t*)v + NBOX_C / 2;
         if ((uint64_t)i64 < NBOX_C)
             return 0;
     }
-    else if (jl_typeis(v, jl_int32_type)) {
+    else if (jl_typetagis(v, jl_int32_tag << 4)) {
         int32_t i32 = *(int32_t*)v + NBOX_C / 2;
         if ((uint32_t)i32 < NBOX_C)
             return 0;
     }
-    else if (jl_typeis(v, jl_uint8_type)) {
+    else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         return 0;
     }
-    else if (jl_typeis(v, jl_task_type)) {
+    else if (jl_typetagis(v, jl_task_tag << 4)) {
         return 0;
     }
 
@@ -595,10 +687,10 @@ static uintptr_t jl_fptr_id(void *fptr)
 
 // `jl_queue_for_serialization` adds items to `serialization_order`
 #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
-static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate);
+static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
 
 
-static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m)
+static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED
 {
     jl_queue_for_serialization(s, m->name);
     jl_queue_for_serialization(s, m->parent);
@@ -629,7 +721,7 @@ static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_
 // you want to handle uniquing of `Dict{String,Float64}` before you tackle `Vector{Dict{String,Float64}}`.
 // Uniquing is done in `serialization_order`, so the very first mention of such an object must
 // be the "source" rather than merely a cross-reference.
-static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate)
+static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
     jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
     jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
@@ -654,6 +746,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     }
     if (s->incremental && jl_is_method_instance(v)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
+        jl_value_t *def = mi->def.value;
         if (needs_uniquing(v)) {
             // we only need 3 specific fields of this (the rest are not used)
             jl_queue_for_serialization(s, mi->def.value);
@@ -662,13 +755,24 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             recursive = 0;
             goto done_fields;
         }
-        else if (needs_recaching(v)) {
+        else if (jl_is_method(def) && jl_object_in_image(def)) {
             // we only need 3 specific fields of this (the rest are restored afterward, if valid)
+            // in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
+            // so must not be present here
             record_field_change((jl_value_t**)&mi->uninferred, NULL);
             record_field_change((jl_value_t**)&mi->backedges, NULL);
             record_field_change((jl_value_t**)&mi->callbacks, NULL);
             record_field_change((jl_value_t**)&mi->cache, NULL);
         }
+        else {
+            assert(!needs_recaching(v));
+        }
+        // n.b. opaque closures cannot be inspected and relied upon like a
+        // normal method since they can get improperly introduced by generated
+        // functions, so if they appeared at all, we will probably serialize
+        // them wrong and segfault. The jl_code_for_staged function should
+        // prevent this from happening, so we do not need to detect that user
+        // error now.
     }
     if (s->incremental && jl_is_globalref(v)) {
         jl_globalref_t *gr = (jl_globalref_t*)v;
@@ -686,6 +790,15 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             assert(!jl_object_in_image((jl_value_t*)tn->wrapper));
         }
     }
+    if (s->incremental && jl_is_code_instance(v)) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)v;
+        // make sure we don't serialize other reachable cache entries of foreign methods
+        if (jl_object_in_image((jl_value_t*)ci->def->def.value)) {
+            // TODO: if (ci in ci->defs->cache)
+            record_field_change((jl_value_t**)&ci->next, NULL);
+        }
+    }
+
 
     if (immediate) // must be things that can be recursively handled, and valid as type parameters
         assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));
@@ -726,7 +839,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             }
         }
     }
-    else if (jl_typeis(v, jl_module_type)) {
+    else if (jl_typetagis(v, jl_module_tag << 4)) {
         jl_queue_module_for_serialization(s, (jl_module_t*)v);
     }
     else if (layout->nfields > 0) {
@@ -764,7 +877,7 @@ done_fields: ;
     *bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
 }
 
-static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate)
+static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
     if (!jl_needs_serialization(s, v))
         return;
@@ -807,7 +920,7 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i
 // Do a pre-order traversal of the to-serialize worklist, in the identical order
 // to the calls to jl_queue_for_serialization would occur in a purely recursive
 // implementation, but without potentially running out of stack.
-static void jl_serialize_reachable(jl_serializer_state *s)
+static void jl_serialize_reachable(jl_serializer_state *s) JL_GC_DISABLED
 {
     size_t i, prevlen = 0;
     while (object_worklist.len) {
@@ -864,20 +977,22 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT
 static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) {
     size_t i = external_blob_index(v);
     if (i < n_linkage_blobs()) {
-        assert(link_ids && jl_is_array(link_ids));
-        assert(jl_build_ids && jl_is_array(jl_build_ids));
-        uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
         // We found the sysimg/pkg that this item links against
-        // Store the image key in `link_ids`
-        jl_array_grow_end(link_ids, 1);
-        uint64_t *link_id_data  = (uint64_t*)jl_array_data(link_ids);
-        link_id_data[jl_array_len(link_ids)-1] = build_id_data[i];
         // Compute the relocation code
         size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
         offset /= sizeof(void*);
-        assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
-        // jl_printf(JL_STDOUT, "External link %ld against blob %d with key %ld at position 0x%lx with offset 0x%lx to \n", jl_array_len(link_ids), i, build_id_data[i>>1], ios_pos(s->s), offset);
-        // jl_(v);
+        assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
+        assert(n_linkage_blobs() == jl_array_len(s->buildid_depmods_idxs));
+        size_t depsidx = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[i]; // map from build_id_idx -> deps_idx
+        assert(depsidx < INT32_MAX);
+        if (depsidx < ((uintptr_t)1 << (RELOC_TAG_OFFSET - DEPS_IDX_OFFSET)) && offset < ((uintptr_t)1 << DEPS_IDX_OFFSET))
+            // if it fits in a SysimageLinkage type, use that representation
+            return ((uintptr_t)SysimageLinkage << RELOC_TAG_OFFSET) + ((uintptr_t)depsidx << DEPS_IDX_OFFSET) + offset;
+        // otherwise, we store the image key in `link_ids`
+        assert(link_ids && jl_is_array(link_ids));
+        jl_array_grow_end(link_ids, 1);
+        uint32_t *link_id_data  = (uint32_t*)jl_array_data(link_ids);  // wait until after the `grow`
+        link_id_data[jl_array_len(link_ids) - 1] = depsidx;
         return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
     }
     return 0;
@@ -910,17 +1025,17 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
     else if (v == jl_nothing) {
         return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + 1;
     }
-    else if (jl_typeis(v, jl_int64_type)) {
+    else if (jl_typetagis(v, jl_int64_tag << 4)) {
         int64_t i64 = *(int64_t*)v + NBOX_C / 2;
         if ((uint64_t)i64 < NBOX_C)
             return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 2;
     }
-    else if (jl_typeis(v, jl_int32_type)) {
+    else if (jl_typetagis(v, jl_int32_tag << 4)) {
         int32_t i32 = *(int32_t*)v + NBOX_C / 2;
         if ((uint32_t)i32 < NBOX_C)
             return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 2 + NBOX_C;
     }
-    else if (jl_typeis(v, jl_uint8_type)) {
+    else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         uint8_t u8 = *(uint8_t*)v;
         return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C;
     }
@@ -976,6 +1091,7 @@ static void write_gctaggedfield(jl_serializer_state *s, jl_datatype_t *ref) JL_N
     write_pointer(s->s);
 }
 
+
 // Special handling from `jl_write_values` for modules
 static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m) JL_GC_DISABLED
 {
@@ -1049,7 +1165,7 @@ static void record_external_fns(jl_serializer_state *s, arraylist_t *external_fn
 #ifndef JL_NDEBUG
     for (size_t i = 0; i < external_fns->len; i++) {
         jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i];
-        assert(jl_object_in_image((jl_value_t*)ci));
+        assert(jl_atomic_load_relaxed(&ci->specsigflags) & 0b100);
     }
 #endif
 }
@@ -1073,41 +1189,57 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         assert(!(s->incremental && jl_object_in_image(v)));
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
         assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption");
+        ios_t *f = s->s;
+        if (t->smalltag) {
+            if (t->layout->npointers == 0 || t == jl_string_type) {
+                if (jl_datatype_nfields(t) == 0 || t->name->mutabl == 0 || t == jl_string_type) {
+                    f = s->const_data;
+                }
+            }
+        }
+
         // realign stream to expected gc alignment (16 bytes)
-        uintptr_t skip_header_pos = ios_pos(s->s) + sizeof(jl_taggedvalue_t);
-        write_padding(s->s, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);
+        uintptr_t skip_header_pos = ios_pos(f) + sizeof(jl_taggedvalue_t);
+        write_padding(f, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);
 
         // write header
         if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t))
-            arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(s->s)|1));
-        write_gctaggedfield(s, t);
-        size_t reloc_offset = ios_pos(s->s);
+            arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1));
+        if (f == s->const_data)
+            write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED);
+        else
+            write_gctaggedfield(s, t);
+        size_t reloc_offset = ios_pos(f);
         assert(item < layout_table.len && layout_table.items[item] == NULL);
-        layout_table.items[item] = (void*)reloc_offset;               // store the inverse mapping of `serialization_order` (`id` => object-as-streampos)
-
-        if (s->incremental && needs_uniquing(v)) {
-            if (jl_is_method_instance(v)) {
-                jl_method_instance_t *mi = (jl_method_instance_t*)v;
-                write_pointerfield(s, mi->def.value);
-                write_pointerfield(s, mi->specTypes);
-                write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
-                continue;
+        layout_table.items[item] = (void*)(reloc_offset | (f == s->const_data)); // store the inverse mapping of `serialization_order` (`id` => object-as-streampos)
+
+        if (s->incremental) {
+            if (needs_uniquing(v)) {
+                if (jl_is_method_instance(v)) {
+                    assert(f == s->s);
+                    jl_method_instance_t *mi = (jl_method_instance_t*)v;
+                    write_pointerfield(s, mi->def.value);
+                    write_pointerfield(s, mi->specTypes);
+                    write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
+                    continue;
+                }
+                else if (!jl_is_datatype(v)) {
+                    assert(jl_is_datatype_singleton(t) && "unreachable");
+                }
             }
-            else if (!jl_is_datatype(v)) {
-                assert(jl_is_datatype_singleton(t) && "unreachable");
+            else if (needs_recaching(v)) {
+                arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
+            }
+            else if (jl_typetagis(v, jl_binding_type)) {
+                jl_binding_t *b = (jl_binding_t*)v;
+                if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod))
+                    jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
             }
-        }
-        else if (s->incremental && needs_recaching(v)) {
-            arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
-        }
-        else if (s->incremental && jl_typeis(v, jl_binding_type)) {
-            jl_binding_t *b = (jl_binding_t*)v;
-            if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod))
-                jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
         }
 
         // write data
         if (jl_is_array(v)) {
+            assert(f == s->s);
             // Internal data for types in julia.h with `jl_array_t` field(s)
 #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
             jl_array_t *ar = (jl_array_t*)v;
@@ -1123,12 +1255,12 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             int ndimwords = jl_array_ndimwords(ar->flags.ndims);
             size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
             // copy header
-            ios_write(s->s, (char*)v, headersize);
+            ios_write(f, (char*)v, headersize);
             size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
             if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
                 alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
             // make some header modifications in-place
-            jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset];
+            jl_array_t *newa = (jl_array_t*)&f->buf[reloc_offset];
             if (newa->flags.ndims == 1)
                 newa->maxsize = alen;
             newa->offset = 0;
@@ -1170,17 +1302,17 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             }
             else {
                 // Pointer eltypes are encoded in the mutable data section
-                size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt);
-                size_t padding_amt = data - ios_pos(s->s);
+                size_t data = LLT_ALIGN(ios_pos(f), alignment_amt);
+                size_t padding_amt = data - ios_pos(f);
                 headersize += padding_amt;
                 newa->data = (void*)headersize; // relocation offset
                 arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
                 arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
-                write_padding(s->s, padding_amt);
+                write_padding(f, padding_amt);
                 if (ar->flags.hasptr) {
                     // copy all of the data first
                     const char *data = (const char*)jl_array_data(ar);
-                    ios_write(s->s, data, datasize);
+                    ios_write(f, data, datasize);
                     // the rewrite all of the embedded pointers to null+relocation
                     uint16_t elsz = ar->elsize;
                     size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
@@ -1195,7 +1327,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                                 arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
                                 record_uniquing(s, fld, fld_pos);
                             }
-                            memset(&s->s->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
+                            memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
                         }
                     }
                 }
@@ -1210,13 +1342,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             }
         }
         else if (jl_typeis(v, jl_module_type)) {
+            assert(f == s->s);
             jl_write_module(s, item, (jl_module_t*)v);
         }
-        else if (jl_typeis(v, jl_task_type)) {
+        else if (jl_typetagis(v, jl_task_tag << 4)) {
             jl_error("Task cannot be serialized");
         }
         else if (jl_is_svec(v)) {
-            ios_write(s->s, (char*)v, sizeof(void*));
+            assert(f == s->s);
+            ios_write(f, (char*)v, sizeof(void*));
             size_t ii, l = jl_svec_len(v);
             assert(l > 0 || (jl_svec_t*)v == jl_emptysvec);
             for (ii = 0; ii < l; ii++) {
@@ -1224,8 +1358,8 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             }
         }
         else if (jl_is_string(v)) {
-            ios_write(s->s, (char*)v, sizeof(void*) + jl_string_len(v));
-            write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility
+            ios_write(f, (char*)v, sizeof(void*) + jl_string_len(v));
+            write_uint8(f, '\0'); // null-terminated strings for easier C-compatibility
         }
         else if (jl_is_foreign_type(t) == 1) {
             jl_error("Cannot serialize instances of foreign datatypes");
@@ -1234,16 +1368,17 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             // The object has no fields, so we just snapshot its byte representation
             assert(!t->layout->npointers);
             assert(t->layout->npointers == 0);
-            ios_write(s->s, (char*)v, jl_datatype_size(t));
+            ios_write(f, (char*)v, jl_datatype_size(t));
         }
-        else if (jl_bigint_type && jl_typeis(v, jl_bigint_type)) {
+        else if (jl_bigint_type && jl_typetagis(v, jl_bigint_type)) {
             // foreign types require special handling
+            assert(f == s->s);
             jl_value_t *sizefield = jl_get_nth_field(v, 1);
             int32_t sz = jl_unbox_int32(sizefield);
             int32_t nw = (sz == 0 ? 1 : (sz < 0 ? -sz : sz));
             size_t nb = nw * gmp_limb_size;
-            ios_write(s->s, (char*)&nw, sizeof(int32_t));
-            ios_write(s->s, (char*)&sz, sizeof(int32_t));
+            ios_write(f, (char*)&nw, sizeof(int32_t));
+            ios_write(f, (char*)&sz, sizeof(int32_t));
             uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 8);
             write_padding(s->const_data, data - ios_pos(s->const_data));
             data /= sizeof(void*);
@@ -1252,7 +1387,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
             void *pdata = jl_unbox_voidpointer(jl_get_nth_field(v, 2));
             ios_write(s->const_data, (char*)pdata, nb);
-            write_pointer(s->s);
+            write_pointer(f);
         }
         else {
             // Generic object::DataType serialization by field
@@ -1262,16 +1397,16 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             for (i = 0; i < nf; i++) {
                 size_t offset = jl_field_offset(t, i);
                 const char *slot = data + offset;
-                write_padding(s->s, offset - tot);
+                write_padding(f, offset - tot);
                 tot = offset;
                 size_t fsz = jl_field_size(t, i);
                 if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(intptr_t*)slot != -1) {
                     // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                     assert(!jl_field_isptr(t, i));
-                    write_pointer(s->s);
+                    write_pointer(f);
                 }
                 else if (fsz > 0) {
-                    ios_write(s->s, slot, fsz);
+                    ios_write(f, slot, fsz);
                 }
                 tot += fsz;
             }
@@ -1289,12 +1424,13 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
                     record_uniquing(s, fld, fld_pos);
                 }
-                memset(&s->s->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
+                memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
             }
 
             // A few objects need additional handling beyond the generic serialization above
 
-            if (s->incremental && jl_typeis(v, jl_typemap_entry_type)) {
+            if (s->incremental && jl_typetagis(v, jl_typemap_entry_type)) {
+                assert(f == s->s);
                 jl_typemap_entry_t *newentry = (jl_typemap_entry_t*)&s->s->buf[reloc_offset];
                 if (newentry->max_world == ~(size_t)0) {
                     if (newentry->min_world > 1) {
@@ -1309,27 +1445,33 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
             }
             else if (jl_is_method(v)) {
-                write_padding(s->s, sizeof(jl_method_t) - tot); // hidden fields
+                assert(f == s->s);
+                write_padding(f, sizeof(jl_method_t) - tot); // hidden fields
                 jl_method_t *m = (jl_method_t*)v;
-                jl_method_t *newm = (jl_method_t*)&s->s->buf[reloc_offset];
+                jl_method_t *newm = (jl_method_t*)&f->buf[reloc_offset];
                 if (s->incremental) {
                     if (newm->deleted_world != ~(size_t)0)
                         newm->deleted_world = 1;
                     else
                         arraylist_push(&s->fixup_objs, (void*)reloc_offset);
                     newm->primary_world = ~(size_t)0;
+                } else {
+                    newm->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
                 }
                 if (m->ccallable)
                     arraylist_push(&s->ccallable_list, (void*)reloc_offset);
             }
             else if (jl_is_method_instance(v)) {
-                jl_method_instance_t *newmi = (jl_method_instance_t*)&s->s->buf[reloc_offset];
+                assert(f == s->s);
+                jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset];
                 jl_atomic_store_relaxed(&newmi->precompiled, 0);
             }
             else if (jl_is_code_instance(v)) {
+                assert(f == s->s);
                 // Handle the native-code pointers
+                assert(f == s->s);
                 jl_code_instance_t *m = (jl_code_instance_t*)v;
-                jl_code_instance_t *newm = (jl_code_instance_t*)&s->s->buf[reloc_offset];
+                jl_code_instance_t *newm = (jl_code_instance_t*)&f->buf[reloc_offset];
 
                 if (s->incremental) {
                     arraylist_push(&s->fixup_objs, (void*)reloc_offset);
@@ -1348,7 +1490,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
 
                 newm->invoke = NULL;
-                newm->isspecsig = 0;
+                newm->specsigflags = 0;
                 newm->specptr.fptr = NULL;
                 int8_t fptr_id = JL_API_NULL;
                 int8_t builtin_id = 0;
@@ -1399,17 +1541,19 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
                 newm->invoke = NULL; // relocation offset
                 if (fptr_id != JL_API_NULL) {
+                    assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize");
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location
                     arraylist_push(&s->relocs_list, (void*)(((uintptr_t)FunctionRef << RELOC_TAG_OFFSET) + fptr_id)); // relocation target
                 }
                 if (builtin_id >= 2) {
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, specptr.fptr))); // relocation location
-                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)BuiltinFunctionRef << RELOC_TAG_OFFSET) + builtin_id - 2)); // relocation target
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)FunctionRef << RELOC_TAG_OFFSET) + BuiltinFunctionTag + builtin_id - 2)); // relocation target
                 }
             }
             else if (jl_is_datatype(v)) {
+                assert(f == s->s);
                 jl_datatype_t *dt = (jl_datatype_t*)v;
-                jl_datatype_t *newdt = (jl_datatype_t*)&s->s->buf[reloc_offset];
+                jl_datatype_t *newdt = (jl_datatype_t*)&f->buf[reloc_offset];
 
                 if (dt->layout != NULL) {
                     size_t nf = dt->layout->nfields;
@@ -1440,8 +1584,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
             }
             else if (jl_is_typename(v)) {
+                assert(f == s->s);
                 jl_typename_t *tn = (jl_typename_t*)v;
-                jl_typename_t *newtn = (jl_typename_t*)&s->s->buf[reloc_offset];
+                jl_typename_t *newtn = (jl_typename_t*)&f->buf[reloc_offset];
                 if (tn->atomicfields != NULL) {
                     size_t nb = (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t);
                     uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
@@ -1464,6 +1609,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
             }
             else if (jl_is_globalref(v)) {
+                assert(f == s->s);
                 jl_globalref_t *gr = (jl_globalref_t*)v;
                 if (s->incremental && jl_object_in_image((jl_value_t*)gr->mod)) {
                     // will need to populate the binding field later
@@ -1471,11 +1617,12 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
             }
             else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
+                assert(f == s->s);
                 // will need to rehash this, later (after types are fully constructed)
                 arraylist_push(&s->fixup_objs, (void*)reloc_offset);
             }
             else {
-                write_padding(s->s, jl_datatype_size(t) - tot);
+                write_padding(f, jl_datatype_size(t) - tot);
             }
         }
     }
@@ -1511,6 +1658,14 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
         assert(reloc_item < layout_table.len);
         uintptr_t reloc_base = (uintptr_t)layout_table.items[reloc_item];
         assert(reloc_base != 0 && "layout offset missing for relocation item");
+        if (reloc_base & 1) {
+            // convert to a ConstDataRef
+            tag = ConstDataRef;
+            reloc_base &= ~(uintptr_t)1;
+            assert(LLT_ALIGN(reloc_base, sizeof(void*)) == reloc_base);
+            reloc_base /= sizeof(void*);
+            assert(reloc_offset == 0);
+        }
         // write reloc_offset into s->s at pos
         return ((uintptr_t)tag << RELOC_TAG_OFFSET) + reloc_base + reloc_offset;
     }
@@ -1528,11 +1683,16 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
         case TagRef:
             assert(offset < 2 * NBOX_C + 258 && "corrupt relocation item id");
             break;
-        case BuiltinFunctionRef:
-            assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer id");
-            break;
         case FunctionRef:
-            assert(offset < JL_API_MAX && "unknown function pointer id");
+            if (offset & BuiltinFunctionTag) {
+                offset &= ~BuiltinFunctionTag;
+                assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer id");
+            }
+            else {
+                assert(offset < JL_API_MAX && "unknown function pointer id");
+            }
+            break;
+        case SysimageLinkage:
             break;
         case ExternalLinkage:
             break;
@@ -1546,16 +1706,18 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
 }
 
 // Compute target location at deserialization
-static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) JL_NOTSAFEPOINT
+static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) JL_NOTSAFEPOINT
 {
     enum RefTags tag = (enum RefTags)(reloc_id >> RELOC_TAG_OFFSET);
     size_t offset = (reloc_id & (((uintptr_t)1 << RELOC_TAG_OFFSET) - 1));
     switch (tag) {
     case DataRef:
-        assert(offset <= size);
-        return base + offset;
+        assert(offset <= s->s->size);
+        return (uintptr_t)base + offset;
     case ConstDataRef:
-        return (uintptr_t)s->const_data->buf + (offset * sizeof(void*));
+        offset *= sizeof(void*);
+        assert(offset <= s->const_data->size);
+        return (uintptr_t)s->const_data->buf + offset;
     case SymbolRef:
         assert(offset < deser_sym.len && deser_sym.items[offset] && "corrupt relocation item id");
         return (uintptr_t)deser_sym.items[offset];
@@ -1576,10 +1738,12 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         // offset -= 256;
         assert(0 && "corrupt relocation item id");
         jl_unreachable(); // terminate control flow if assertion is disabled.
-    case BuiltinFunctionRef:
-        assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer ID");
-        return (uintptr_t)id_to_fptrs[offset];
     case FunctionRef:
+        if (offset & BuiltinFunctionTag) {
+            offset &= ~BuiltinFunctionTag;
+            assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer ID");
+            return (uintptr_t)id_to_fptrs[offset];
+        }
         switch ((jl_callingconv_t)offset) {
         case JL_API_BOXED:
             if (s->image->fptrs.base)
@@ -1600,39 +1764,51 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         //default:
             assert("corrupt relocation item id");
         }
-    case ExternalLinkage:
+    case SysimageLinkage: {
+#ifdef _P64
+        size_t depsidx = offset >> DEPS_IDX_OFFSET;
+        offset &= ((size_t)1 << DEPS_IDX_OFFSET) - 1;
+#else
+        size_t depsidx = 0;
+#endif
+        assert(s->buildid_depmods_idxs && depsidx < jl_array_len(s->buildid_depmods_idxs));
+        size_t i = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[depsidx];
+        assert(2*i < jl_linkage_blobs.len);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+    }
+    case ExternalLinkage: {
         assert(link_ids);
         assert(link_index);
-        assert(jl_build_ids);
-        uint64_t *link_id_data  = (uint64_t*)jl_array_data(link_ids);
-        uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
         assert(0 <= *link_index && *link_index < jl_array_len(link_ids));
-        uint64_t build_id = link_id_data[*link_index];
+        uint32_t depsidx = ((uint32_t*)jl_array_data(link_ids))[*link_index];
         *link_index += 1;
-        size_t i = 0, nids = jl_array_len(jl_build_ids);
-        while (i < nids) {
-            if (build_id == build_id_data[i])
-                break;
-            i++;
-        }
-        assert(i < nids);
+        assert(depsidx < jl_array_len(s->buildid_depmods_idxs));
+        size_t i = ((uint32_t*)jl_array_data(s->buildid_depmods_idxs))[depsidx];
         assert(2*i < jl_linkage_blobs.len);
         return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
     }
+    }
     abort();
 }
 
 
-static void jl_write_offsetlist(ios_t *s, char *base, size_t size, arraylist_t *list)
+static void jl_finish_relocs(char *base, size_t size, arraylist_t *list)
 {
     for (size_t i = 0; i < list->len; i += 2) {
-        size_t last_pos = i ? (size_t)list->items[i - 2] : 0;
         size_t pos = (size_t)list->items[i];
         size_t item = (size_t)list->items[i + 1];   // item is tagref-encoded
         uintptr_t *pv = (uintptr_t*)(base + pos);
         assert(pos < size && pos != 0);
         *pv = get_reloc_for_item(item, *pv);
+    }
+}
 
+static void jl_write_offsetlist(ios_t *s, size_t size, arraylist_t *list)
+{
+    for (size_t i = 0; i < list->len; i += 2) {
+        size_t last_pos = i ? (size_t)list->items[i - 2] : 0;
+        size_t pos = (size_t)list->items[i];
+        assert(pos < size && pos != 0);
         // write pos as compressed difference.
         size_t pos_diff = pos - last_pos;
         while (pos_diff) {
@@ -1661,23 +1837,9 @@ static void jl_write_arraylist(ios_t *s, arraylist_t *list)
     ios_write(s, (const char*)list->items, list->len * sizeof(void*));
 }
 
-static void jl_write_relocations(jl_serializer_state *s)
-{
-    char *base = &s->s->buf[0];
-    jl_write_offsetlist(s->relocs, base, s->s->size, &s->gctags_list);
-    jl_write_offsetlist(s->relocs, base, s->s->size, &s->relocs_list);
-    if (s->incremental) {
-        jl_write_arraylist(s->relocs, &s->uniquing_types);
-        jl_write_arraylist(s->relocs, &s->uniquing_objs);
-        jl_write_arraylist(s->relocs, &s->fixup_types);
-    }
-    jl_write_arraylist(s->relocs, &s->fixup_objs);
-}
-
 static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint8_t bits)
 {
     uintptr_t base = (uintptr_t)s->s->buf;
-    size_t size = s->s->size;
     uintptr_t last_pos = 0;
     uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos);
     int link_index = 0;
@@ -1702,7 +1864,9 @@ static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint
         last_pos = pos;
         uintptr_t *pv = (uintptr_t *)(base + pos);
         uintptr_t v = *pv;
-        v = get_item_for_reloc(s, base, size, v, link_ids, &link_index);
+        v = get_item_for_reloc(s, base, v, link_ids, &link_index);
+        if (bits && v && ((jl_datatype_t*)v)->smalltag)
+            v = (uintptr_t)((jl_datatype_t*)v)->smalltag << 4; // TODO: should we have a representation that supports sweep without a relocation step?
         *pv = v | bits;
     }
     assert(!link_ids || link_index == jl_array_len(link_ids));
@@ -1747,6 +1911,7 @@ void gc_sweep_sysimg(void)
             last_pos = pos;
             jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos);
             o->bits.gc = GC_OLD;
+            assert(o->bits.in_image == 1);
         }
     }
 }
@@ -1768,13 +1933,12 @@ static void _jl_write_value(jl_serializer_state *s, jl_value_t *v)
 
 static jl_value_t *jl_read_value(jl_serializer_state *s)
 {
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    size_t size = s->s->size;
+    uintptr_t base = (uintptr_t)s->s->buf;
     uintptr_t offset = *(reloc_t*)(base + (uintptr_t)s->s->bpos);
     s->s->bpos += sizeof(reloc_t);
     if (offset == 0)
         return NULL;
-    return (jl_value_t*)get_item_for_reloc(s, base, size, offset, NULL, NULL);
+    return (jl_value_t*)get_item_for_reloc(s, base, offset, NULL, NULL);
 }
 
 // The next two, `jl_read_offset` and `jl_delayed_reloc`, are essentially a split version
@@ -1797,11 +1961,10 @@ static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL
 {
     if (!offset)
         return NULL;
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    size_t size = s->s->size;
+    uintptr_t base = (uintptr_t)s->s->buf;
     int link_index = 0;
-    jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->link_ids_relocs, &link_index);
-    assert(link_index < jl_array_len(s->link_ids_relocs));
+    jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, offset, s->link_ids_relocs, &link_index);
+    assert(!s->link_ids_relocs || link_index < jl_array_len(s->link_ids_relocs));
     return ret;
 }
 
@@ -1814,6 +1977,9 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
     image->fptrs.base = NULL;
     if (fvars.base == NULL)
         return;
+
+    memcpy(image->small_typeof, &small_typeof, sizeof(small_typeof));
+
     int img_fvars_max = s->fptr_record->size / sizeof(void*);
     size_t i;
     uintptr_t base = (uintptr_t)&s->s->buf[0];
@@ -1848,7 +2014,7 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
             void *fptr = (void*)(base + offset);
             if (specfunc) {
                 codeinst->specptr.fptr = fptr;
-                codeinst->isspecsig = 1; // TODO: set only if confirmed to be true
+                codeinst->specsigflags = 0b111; // TODO: set only if confirmed to be true
             }
             else {
                 codeinst->invoke = (jl_callptr_t)fptr;
@@ -1872,11 +2038,12 @@ static uint32_t write_gvars(jl_serializer_state *s, arraylist_t *globals, arrayl
     }
     for (size_t i = 0; i < external_fns->len; i++) {
         jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i];
+        assert(ci && (jl_atomic_load_relaxed(&ci->specsigflags) & 0b001));
         uintptr_t item = backref_id(s, (void*)ci, s->link_ids_external_fnvars);
         uintptr_t reloc = get_reloc_for_item(item, 0);
         write_reloc_t(s->gvar_record, reloc);
     }
-    return globals->len + 1;
+    return globals->len;
 }
 
 // Pointer relocation for native-code referenced global variables
@@ -1884,20 +2051,21 @@ static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image, uint3
 {
     if (image->gvars_base == NULL)
         return;
+    uintptr_t base = (uintptr_t)s->s->buf;
     size_t i = 0;
     size_t l = s->gvar_record->size / sizeof(reloc_t);
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    size_t size = s->s->size;
     reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0];
     int gvar_link_index = 0;
     int external_fns_link_index = 0;
+    assert(l == image->ngvars);
     for (i = 0; i < l; i++) {
         uintptr_t offset = gvars[i];
         uintptr_t v = 0;
         if (i < external_fns_begin) {
-            v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &gvar_link_index);
-        } else {
-            v = get_item_for_reloc(s, base, size, offset, s->link_ids_external_fnvars, &external_fns_link_index);
+            v = get_item_for_reloc(s, base, offset, s->link_ids_gvars, &gvar_link_index);
+        }
+        else {
+            v = get_item_for_reloc(s, base, offset, s->link_ids_external_fnvars, &external_fns_link_index);
         }
         uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i);
         *gv = v;
@@ -1920,7 +2088,7 @@ static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_
                 v = (uintptr_t)jl_as_global_root((jl_value_t*)v);
         } else {
             jl_code_instance_t *codeinst = (jl_code_instance_t*) v;
-            assert(codeinst && codeinst->isspecsig);
+            assert(codeinst && (codeinst->specsigflags & 0b01) && codeinst->specptr.fptr);
             v = (uintptr_t)codeinst->specptr.fptr;
         }
         *gv = v;
@@ -1931,6 +2099,7 @@ static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_
 static void jl_compile_extern(jl_method_t *m, void *sysimg_handle) JL_GC_DISABLED
 {
     // install ccallable entry point in JIT
+    assert(m); // makes clang-sa happy
     jl_svec_t *sv = m->ccallable;
     int success = jl_compile_extern_c(NULL, NULL, sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1));
     if (!success)
@@ -1994,7 +2163,7 @@ static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig
     int compressed = 0;
     if (!jl_is_code_info(ci_)) {
         compressed = 1;
-        ci = jl_uncompress_ir(m, NULL, (jl_array_t*)ci_);
+        ci = jl_uncompress_ir(m, NULL, (jl_value_t*)ci_);
     }
     else {
         ci = (jl_code_info_t*)ci_;
@@ -2026,7 +2195,7 @@ static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig
 static void strip_specializations_(jl_method_instance_t *mi)
 {
     assert(jl_is_method_instance(mi));
-    jl_code_instance_t *codeinst = mi->cache;
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
         jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
         if (inferred && inferred != jl_nothing) {
@@ -2077,12 +2246,17 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
             jl_gc_wb(m, m->source);
         }
     }
-    jl_svec_t *specializations = m->specializations;
-    size_t i, l = jl_svec_len(specializations);
-    for (i = 0; i < l; i++) {
-        jl_value_t *mi = jl_svecref(specializations, i);
-        if (mi != jl_nothing)
-            strip_specializations_((jl_method_instance_t*)mi);
+    jl_value_t *specializations = m->specializations;
+    if (!jl_is_svec(specializations)) {
+        strip_specializations_((jl_method_instance_t*)specializations);
+    }
+    else {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_value_t *mi = jl_svecref(specializations, i);
+            if (mi != jl_nothing)
+                strip_specializations_((jl_method_instance_t*)mi);
+        }
     }
     if (m->unspecialized)
         strip_specializations_(m->unspecialized);
@@ -2103,32 +2277,10 @@ static void jl_strip_all_codeinfos(void)
     jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL);
 }
 
-// Method roots created during sysimg construction are exempted from
-// triggering non-relocatability of compressed CodeInfos.
-// Set the number of such roots in each method when the sysimg is
-// serialized.
-// TODO: move this to `jl_write_values`
-static int set_nroots_sysimg__(jl_typemap_entry_t *def, void *_env)
-{
-    jl_method_t *m = def->func.method;
-    m->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
-    return 1;
-}
-
-static int set_nroots_sysimg_(jl_methtable_t *mt, void *_env)
-{
-    return jl_typemap_visitor(mt->defs, set_nroots_sysimg__, NULL);
-}
-
-static void jl_set_nroots_sysimg(void)
-{
-    jl_foreach_reachable_mtable(set_nroots_sysimg_, NULL);
-}
-
 // --- entry points ---
 
 jl_array_t *jl_global_roots_table;
-static jl_mutex_t global_roots_lock;
+jl_mutex_t global_roots_lock;
 
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
@@ -2182,7 +2334,6 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
     // edges: [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
     assert(edges_map == NULL);
 
-    htable_new(&external_mis, 0);  // we need external_mis until after `jl_collect_edges` finishes
     // Save the inferred code from newly inferred, external methods
     *new_specializations = queue_external_cis(newly_inferred);
 
@@ -2202,6 +2353,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
     }
 
     if (edges) {
+        size_t world = jl_atomic_load_acquire(&jl_world_counter);
         jl_collect_missing_backedges(jl_type_type_mt);
         jl_collect_missing_backedges(jl_nonfunction_mt);
         // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in callers_with_edges.
@@ -2210,21 +2362,16 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
         *edges = jl_alloc_vec_any(0);
         *method_roots_list = jl_alloc_vec_any(0);
         // Collect the new method roots
-        htable_t methods_with_newspecs;
-        htable_new(&methods_with_newspecs, 0);
-        jl_collect_methods(&methods_with_newspecs, *new_specializations);
-        jl_collect_new_roots(*method_roots_list, &methods_with_newspecs, worklist_key);
-        htable_free(&methods_with_newspecs);
-        jl_collect_edges(*edges, *ext_targets);
-    }
-    htable_free(&external_mis);
+        jl_collect_new_roots(*method_roots_list, *new_specializations, worklist_key);
+        jl_collect_edges(*edges, *ext_targets, *new_specializations, world);
+    }
     assert(edges_map == NULL); // jl_collect_edges clears this when done
 
     JL_GC_POP();
 }
 
 // In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
-static void jl_save_system_image_to_stream(ios_t *f,
+static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                                            jl_array_t *worklist, jl_array_t *extext_methods,
                                            jl_array_t *new_specializations, jl_array_t *method_roots_list,
                                            jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
@@ -2233,8 +2380,6 @@ static void jl_save_system_image_to_stream(ios_t *f,
     // strip metadata and IR when requested
     if (jl_options.strip_metadata || jl_options.strip_ir)
         jl_strip_all_codeinfos();
-    if (worklist == NULL)
-        jl_set_nroots_sysimg();
 
     int en = jl_gc_enable(0);
     nsym_tag = 0;
@@ -2256,7 +2401,7 @@ static void jl_save_system_image_to_stream(ios_t *f,
     ios_mem(&relocs, 0);
     ios_mem(&gvar_record, 0);
     ios_mem(&fptr_record, 0);
-    jl_serializer_state s;
+    jl_serializer_state s = {0};
     s.incremental = !(worklist == NULL);
     s.s = &sysimg;
     s.const_data = &const_data;
@@ -2272,10 +2417,11 @@ static void jl_save_system_image_to_stream(ios_t *f,
     arraylist_new(&s.fixup_types, 0);
     arraylist_new(&s.fixup_objs, 0);
     arraylist_new(&s.ccallable_list, 0);
-    s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, 0);
-    s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, 0);
-    s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, 0);
-    s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, 0);
+    s.buildid_depmods_idxs = image_to_depmodidx(mod_array);
+    s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, 0);
+    s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, 0);
+    s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, 0);
+    s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, 0);
     htable_new(&s.callers_with_edges, 0);
     jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
 
@@ -2371,9 +2517,13 @@ static void jl_save_system_image_to_stream(ios_t *f,
         write_padding(&sysimg, sizeof(uintptr_t));
         jl_write_values(&s);
         external_fns_begin = write_gvars(&s, &gvars, &external_fns);
-        jl_write_relocations(&s);
     }
 
+    // This ensures that we can use the low bit of addresses for
+    // identifying end pointers in gc's eytzinger search.
+    write_padding(&sysimg, 4 - (sysimg.size % 4));
+    write_padding(&const_data, 4 - (const_data.size % 4));
+
     if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
         jl_printf(
             JL_STDERR,
@@ -2396,9 +2546,12 @@ static void jl_save_system_image_to_stream(ios_t *f,
 
     // step 3: combine all of the sections into one file
     assert(ios_pos(f) % JL_CACHE_BYTE_ALIGNMENT == 0);
+    ssize_t sysimg_offset = ios_pos(f);
     write_uint(f, sysimg.size - sizeof(uintptr_t));
     ios_seek(&sysimg, sizeof(uintptr_t));
     ios_copyall(f, &sysimg);
+    size_t sysimg_size = s.s->size;
+    assert(ios_pos(f) - sysimg_offset == sysimg_size);
     ios_close(&sysimg);
 
     write_uint(f, const_data.size);
@@ -2414,6 +2567,18 @@ static void jl_save_system_image_to_stream(ios_t *f,
     ios_copyall(f, &symbols);
     ios_close(&symbols);
 
+    // Prepare and write the relocations sections, now that the rest of the image is laid out
+    char *base = &f->buf[0];
+    jl_finish_relocs(base + sysimg_offset, sysimg_size, &s.gctags_list);
+    jl_finish_relocs(base + sysimg_offset, sysimg_size, &s.relocs_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.gctags_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.relocs_list);
+    if (s.incremental) {
+        jl_write_arraylist(s.relocs, &s.uniquing_types);
+        jl_write_arraylist(s.relocs, &s.uniquing_objs);
+        jl_write_arraylist(s.relocs, &s.fixup_types);
+    }
+    jl_write_arraylist(s.relocs, &s.fixup_objs);
     write_uint(f, relocs.size);
     write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f));
     ios_seek(&relocs, 0);
@@ -2465,22 +2630,16 @@ static void jl_save_system_image_to_stream(ios_t *f,
             jl_write_value(&s, edges);
         }
         write_uint32(f, jl_array_len(s.link_ids_gctags));
-        ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags)*sizeof(uint64_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_relocs));
-        ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs)*sizeof(uint64_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_relocs), jl_array_len(s.link_ids_relocs) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_gvars));
-        ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars) * sizeof(uint32_t));
         write_uint32(f, jl_array_len(s.link_ids_external_fnvars));
-        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars)*sizeof(uint64_t));
+        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars) * sizeof(uint32_t));
         write_uint32(f, external_fns_begin);
         jl_write_arraylist(s.s, &s.ccallable_list);
     }
-    // Write the build_id key
-    uint64_t buildid = 0;
-    if (worklist)
-        buildid = jl_worklist_key(worklist);
-    write_uint32(f, buildid >> 32);
-    write_uint32(f, buildid & (((uint64_t)1 << 32) - 1));
 
     assert(object_worklist.len == 0);
     arraylist_free(&object_worklist);
@@ -2529,7 +2688,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
 {
     jl_gc_collect(JL_GC_FULL);
     jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
-    JL_TIMING(SYSIMG_DUMP);
+    JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP);
 
     // iff emit_split
     // write header and src_text to one file f/s
@@ -2582,8 +2741,8 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     // Make sure we don't run any Julia code concurrently after this point
     // since it will invalidate our serialization preparations
     jl_gc_enable_finalizers(ct, 0);
-    assert(ct->reentrant_inference == 0);
-    ct->reentrant_inference = (uint16_t)-1;
+    assert((ct->reentrant_timing & 0b1110) == 0);
+    ct->reentrant_timing |= 0b1000;
     if (worklist) {
         jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
                                       &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
@@ -2598,13 +2757,13 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     }
     if (_native_data != NULL)
         native_functions = *_native_data;
-    jl_save_system_image_to_stream(ff, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
+    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
     if (_native_data != NULL)
         native_functions = NULL;
     // make sure we don't run any Julia code concurrently before this point
     // Re-enable running julia code for postoutput hooks, atexit, etc.
     jl_gc_enable_finalizers(ct, 1);
-    ct->reentrant_inference = 0;
+    ct->reentrant_timing &= ~0b1000u;
     jl_precompile_toplevel_module = NULL;
 
     if (worklist) {
@@ -2660,7 +2819,7 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
     if (jl_options.cpu_target == NULL)
         jl_options.cpu_target = "native";
     jl_sysimg_handle = handle;
-    sysimage.fptrs = jl_init_processor_sysimg(handle);
+    sysimage = jl_init_processor_sysimg(handle);
 }
 
 #ifndef JL_NDEBUG
@@ -2672,6 +2831,9 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
 // }
 #endif
 
+extern void rebuild_image_blob_tree(void);
+extern void export_small_typeof(void);
+
 static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
                                 /* outputs */    jl_array_t **restored,         jl_array_t **init_order,
                                                  jl_array_t **extext_methods,
@@ -2679,10 +2841,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                                                  jl_array_t **ext_targets, jl_array_t **edges,
                                                  char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
 {
-    JL_TIMING(SYSIMG_LOAD);
     int en = jl_gc_enable(0);
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
-    jl_serializer_state s;
+    jl_serializer_state s = {0};
     s.incremental = restored != NULL; // jl_linkage_blobs.len > 0;
     s.image = image;
     s.s = NULL;
@@ -2692,9 +2853,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
     s.ptls = jl_current_task->ptls;
-    arraylist_new(&s.relocs_list, 0);
-    arraylist_new(&s.gctags_list, 0);
-    s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = s.link_ids_external_fnvars = NULL;
     jl_value_t **const*const tags = get_tags();
     htable_t new_dt_objs;
     htable_new(&new_dt_objs, 0);
@@ -2702,9 +2860,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
 
     // step 1: read section map
     assert(ios_pos(f) == 0 && f->bm == bm_mem);
-    size_t sizeof_sysimg = read_uint(f);
-    ios_static_buffer(&sysimg, f->buf, sizeof_sysimg + sizeof(uintptr_t));
-    ios_skip(f, sizeof_sysimg);
+    size_t sizeof_sysdata = read_uint(f);
+    ios_static_buffer(&sysimg, f->buf, sizeof_sysdata + sizeof(uintptr_t));
+    ios_skip(f, sizeof_sysdata);
 
     size_t sizeof_constdata = read_uint(f);
     // realign stream to max-alignment for data
@@ -2712,6 +2870,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     ios_static_buffer(&const_data, f->buf + f->bpos, sizeof_constdata);
     ios_skip(f, sizeof_constdata);
 
+    size_t sizeof_sysimg = f->bpos;
+
     size_t sizeof_symbols = read_uint(f);
     ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     ios_static_buffer(&symbols, f->buf + f->bpos, sizeof_symbols);
@@ -2747,9 +2907,13 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             jl_value_t **tag = tags[i];
             *tag = jl_read_value(&s);
         }
+#define XX(name) \
+        small_typeof[(jl_##name##_tag << 4) / sizeof(*small_typeof)] = jl_##name##_type;
+        JL_SMALL_TYPEOF(XX)
+#undef XX
+        export_small_typeof();
         jl_global_roots_table = (jl_array_t*)jl_read_value(&s);
         // set typeof extra-special values now that we have the type set by tags above
-        jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
         jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
         s.ptls->root_task->tls = jl_read_value(&s);
         jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls);
@@ -2771,25 +2935,26 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         offset_ext_targets = jl_read_offset(&s);
         offset_edges = jl_read_offset(&s);
     }
+    s.buildid_depmods_idxs = depmod_to_imageidx(depmods);
     size_t nlinks_gctags = read_uint32(f);
     if (nlinks_gctags > 0) {
-        s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gctags);
-        ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint64_t));
+        s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, nlinks_gctags);
+        ios_read(f, (char*)jl_array_data(s.link_ids_gctags), nlinks_gctags * sizeof(uint32_t));
     }
     size_t nlinks_relocs = read_uint32(f);
     if (nlinks_relocs > 0) {
-        s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, nlinks_relocs);
-        ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint64_t));
+        s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, nlinks_relocs);
+        ios_read(f, (char*)jl_array_data(s.link_ids_relocs), nlinks_relocs * sizeof(uint32_t));
     }
     size_t nlinks_gvars = read_uint32(f);
     if (nlinks_gvars > 0) {
-        s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gvars);
-        ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint64_t));
+        s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_gvars);
+        ios_read(f, (char*)jl_array_data(s.link_ids_gvars), nlinks_gvars * sizeof(uint32_t));
     }
     size_t nlinks_external_fnvars = read_uint32(f);
     if (nlinks_external_fnvars > 0) {
-        s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_external_fnvars);
-        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint64_t));
+        s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_external_fnvars);
+        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint32_t));
     }
     uint32_t external_fns_begin = read_uint32(f);
     jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
@@ -2802,10 +2967,11 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         *method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list);
         *ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets);
         *edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges);
+        if (!*new_specializations)
+            *new_specializations = jl_alloc_vec_any(0);
     }
     s.s = NULL;
 
-
     // step 3: apply relocations
     assert(!ios_eof(f));
     jl_read_symbols(&s);
@@ -2817,7 +2983,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         *base = image_base;
 
     s.s = &sysimg;
-    jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags
+    jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD | GC_IN_IMAGE); // gctags
     size_t sizeof_tags = ios_pos(&relocs);
     (void)sizeof_tags;
     jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
@@ -2883,9 +3049,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                 continue;
             }
         }
-        jl_value_t *otyp = jl_typeof(obj);   // the original type of the object that was written here
-        assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg + sizeof(uintptr_t));
-        if (otyp == (jl_value_t*)jl_datatype_type) {
+        uintptr_t otyp = jl_typetagof(obj);   // the original type of the object that was written here
+        assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg);
+        if (otyp == jl_datatype_tag << 4) {
             jl_datatype_t *dt = (jl_datatype_t*)obj[0], *newdt;
             if (jl_is_datatype(dt)) {
                 newdt = dt; // already done
@@ -2921,18 +3087,18 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             newobj = (jl_value_t*)newdt;
         }
         else {
-            assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg + sizeof(uintptr_t)));
+            assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg));
             assert(jl_is_datatype_singleton((jl_datatype_t*)otyp) && "unreachable");
             newobj = ((jl_datatype_t*)otyp)->instance;
             assert(newobj != jl_nothing);
             arraylist_push(&cleanup_list, (void*)obj);
         }
         if (tag)
-            *pfld = (uintptr_t)newobj | GC_OLD;
+            *pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
         else
             *pfld = (uintptr_t)newobj;
-        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t)));
-        assert(jl_typeis(obj, otyp));
+        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
+        assert(jl_typetagis(obj, otyp));
     }
     // A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
     // They cannot be instances though, since the type must fully exist before the singleton field can be allocated
@@ -2944,7 +3110,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         assert(jl_is_datatype(dt));
         jl_value_t *newobj = (jl_value_t*)dt;
         *pfld = (uintptr_t)newobj;
-        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t)));
+        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
     }
     arraylist_free(&delay_list);
     // now that all the fields of dt are assigned and unique, copy them into
@@ -2971,6 +3137,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             memset(o, 0xba, sizeof(jl_value_t*) + sizeof(jl_datatype_t));
         else
             memset(o, 0xba, sizeof(jl_value_t*) + 0); // singleton
+        o->bits.in_image = 1;
     }
     arraylist_grow(&cleanup_list, -cleanup_list.len);
     // finally cache all our new types now
@@ -3008,9 +3175,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             pfld = (uintptr_t*)(image_base + item);
             obj = *(jl_value_t***)pfld;
         }
-        jl_value_t *otyp = jl_typeof(obj);   // the original type of the object that was written here
-        if (otyp == (jl_value_t*)jl_method_instance_type) {
-            assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg + sizeof(uintptr_t));
+        uintptr_t otyp = jl_typetagof(obj);   // the original type of the object that was written here
+        if (otyp == (uintptr_t)jl_method_instance_type) {
+            assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg);
             jl_value_t *m = obj[0];
             if (jl_is_method_instance(m)) {
                 newobj = m; // already done
@@ -3027,8 +3194,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             abort(); // should be unreachable
         }
         *pfld = (uintptr_t)newobj;
-        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg + sizeof(uintptr_t)));
-        assert(jl_typeis(obj, otyp));
+        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
+        assert(jl_typetagis(obj, otyp));
     }
     arraylist_free(&s.uniquing_types);
     arraylist_free(&s.uniquing_objs);
@@ -3038,12 +3205,13 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
         jl_value_t *t = jl_typeof(item);
         if (t == (jl_value_t*)jl_method_instance_type)
             memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored
+        o->bits.in_image = 1;
     }
     arraylist_free(&cleanup_list);
     for (size_t i = 0; i < s.fixup_objs.len; i++) {
         uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
         jl_value_t *obj = (jl_value_t*)(image_base + item);
-        if (jl_typeis(obj, jl_typemap_entry_type)) {
+        if (jl_typetagis(obj, jl_typemap_entry_type)) {
             jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj;
             entry->min_world = world;
         }
@@ -3060,19 +3228,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             jl_code_instance_t *ci = (jl_code_instance_t*)obj;
             assert(s.incremental);
             ci->min_world = world;
-            if (ci->max_world == 1) { // sentinel value: has edges to external callables
-                ptrhash_put(&new_code_instance_validate, ci, (void*)(~(uintptr_t)HT_NOTFOUND));   // "HT_FOUND"
-            }
-            else if (ci->max_world) {
-                // It's valid, but it may not be connected
-                if (!ci->def->cache)
-                    ci->def->cache = ci;
-            }
-            else {
-                // Ensure this code instance is not connected
-                if (ci->def->cache == ci)
-                    ci->def->cache = NULL;
-            }
+            if (ci->max_world != 0)
+                jl_array_ptr_1d_push(*new_specializations, (jl_value_t*)ci);
         }
         else if (jl_is_globalref(obj)) {
             continue; // wait until all the module binding tables have been initialized
@@ -3095,7 +3252,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             // rehash IdDict
             //assert(((jl_datatype_t*)(jl_typeof(obj)))->name == jl_idtable_typename);
             jl_array_t **a = (jl_array_t**)obj;
-            assert(jl_typeis(*a, jl_array_any_type));
+            assert(jl_typetagis(*a, jl_array_any_type));
             *a = jl_idtable_rehash(*a, jl_array_len(*a));
             jl_gc_wb(obj, *a);
         }
@@ -3135,7 +3292,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                "   reloc list: %8u\n"
                "    gvar list: %8u\n"
                "    fptr list: %8u\n",
-            (unsigned)sizeof_sysimg,
+            (unsigned)sizeof_sysdata,
             (unsigned)sizeof_constdata,
             (unsigned)sizeof_symbols,
             (unsigned)sizeof_tags,
@@ -3144,7 +3301,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
             (unsigned)sizeof_fptr_record);
     }
     if (cachesizes) {
-        cachesizes->sysdata = sizeof_sysimg;
+        cachesizes->sysdata = sizeof_sysdata;
         cachesizes->isbitsdata = sizeof_constdata;
         cachesizes->symboldata = sizeof_symbols;
         cachesizes->tagslist = sizeof_tags;
@@ -3172,16 +3329,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
     // Prepare for later external linkage against the sysimg
     // Also sets up images for protection against garbage collection
     arraylist_push(&jl_linkage_blobs, (void*)image_base);
-    arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t)));
+    arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
     arraylist_push(&jl_image_relocs, (void*)relocs_base);
+    jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
+    rebuild_image_blob_tree();
 
     // jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
-    uint64_t buildid = (((uint64_t)read_uint32(f)) << 32) | read_uint32(f);
-    if (!jl_build_ids)
-        jl_build_ids = jl_alloc_array_1d(jl_array_uint64_type, 0);
-    jl_array_grow_end(jl_build_ids, 1);
-    uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
-    build_id_data[jl_array_len(jl_build_ids)-1] = buildid;
     jl_gc_enable(en);
 }
 
@@ -3212,8 +3365,10 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
 }
 
 // TODO?: refactor to make it easier to create the "package inspector"
-static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int complete)
+static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname)
 {
+    JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
+    jl_timing_printf(JL_TIMING_CURRENT_BLOCK, pkgname);
     uint64_t checksum = 0;
     int64_t dataendpos = 0;
     int64_t datastartpos = 0;
@@ -3245,7 +3400,6 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
         else {
             ios_close(f);
             ios_static_buffer(f, sysimg, len);
-            htable_new(&new_code_instance_validate, 0);
             pkgcachesizes cachesizes;
             jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes);
             JL_SIGATOMIC_END();
@@ -3256,26 +3410,27 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
             // Add roots to methods
             jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
             // Handle edges
-            jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations); // restore external backedges (needs to be last)
-            // check new CodeInstances and validate any that lack external backedges
-            validate_new_code_instances();
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
+            jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations, world); // restore external backedges (needs to be last)
             // reinit ccallables
             jl_reinit_ccallable(&ccallable_list, base, NULL);
             arraylist_free(&ccallable_list);
-            htable_free(&new_code_instance_validate);
-            if (complete) {
-                cachesizes_sv = jl_alloc_svec_uninit(7);
-                jl_svec_data(cachesizes_sv)[0] = jl_box_long(cachesizes.sysdata);
-                jl_svec_data(cachesizes_sv)[1] = jl_box_long(cachesizes.isbitsdata);
-                jl_svec_data(cachesizes_sv)[2] = jl_box_long(cachesizes.symboldata);
-                jl_svec_data(cachesizes_sv)[3] = jl_box_long(cachesizes.tagslist);
-                jl_svec_data(cachesizes_sv)[4] = jl_box_long(cachesizes.reloclist);
-                jl_svec_data(cachesizes_sv)[5] = jl_box_long(cachesizes.gvarlist);
-                jl_svec_data(cachesizes_sv)[6] = jl_box_long(cachesizes.fptrlist);
+
+            if (completeinfo) {
+                cachesizes_sv = jl_alloc_svec(7);
+                jl_svecset(cachesizes_sv, 0, jl_box_long(cachesizes.sysdata));
+                jl_svecset(cachesizes_sv, 1, jl_box_long(cachesizes.isbitsdata));
+                jl_svecset(cachesizes_sv, 2, jl_box_long(cachesizes.symboldata));
+                jl_svecset(cachesizes_sv, 3, jl_box_long(cachesizes.tagslist));
+                jl_svecset(cachesizes_sv, 4, jl_box_long(cachesizes.reloclist));
+                jl_svecset(cachesizes_sv, 5, jl_box_long(cachesizes.gvarlist));
+                jl_svecset(cachesizes_sv, 6, jl_box_long(cachesizes.fptrlist));
                 restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list,
                                                    ext_targets, edges, cachesizes_sv);
-            } else
+            }
+            else {
                 restored = (jl_value_t*)jl_svec(2, restored, init_order);
+            }
         }
     }
 
@@ -3283,22 +3438,22 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im
     return restored;
 }
 
-static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image)
+static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uint32_t checksum)
 {
-    uint64_t checksum = 0; // TODO: make this real
-    jl_restore_system_image_from_stream_(f, image, NULL, checksum, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+    JL_TIMING(LOAD_IMAGE, LOAD_Sysimg);
+    jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int complete)
+JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname)
 {
     ios_t f;
     ios_static_buffer(&f, (char*)buf, sz);
-    jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, complete);
+    jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname);
     ios_close(&f);
     return ret;
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete)
+JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname)
 {
     ios_t f;
     if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) {
@@ -3306,7 +3461,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d
             "Cache file \"%s\" not found.\n", fname);
     }
     jl_image_t pkgimage = {};
-    jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, complete);
+    jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname);
     ios_close(&f);
     return ret;
 }
@@ -3338,8 +3493,9 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname)
         if (ios_readall(&f, sysimg, len) != len)
             jl_errorf("Error reading system image file.");
         ios_close(&f);
+        uint32_t checksum = jl_crc32c(0, sysimg, len);
         ios_static_buffer(&f, sysimg, len);
-        jl_restore_system_image_from_stream(&f, &sysimage);
+        jl_restore_system_image_from_stream(&f, &sysimage, checksum);
         ios_close(&f);
         JL_SIGATOMIC_END();
     }
@@ -3350,12 +3506,13 @@ JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len)
     ios_t f;
     JL_SIGATOMIC_BEGIN();
     ios_static_buffer(&f, (char*)buf, len);
-    jl_restore_system_image_from_stream(&f, &sysimage);
+    uint32_t checksum = jl_crc32c(0, buf, len);
+    jl_restore_system_image_from_stream(&f, &sysimage, checksum);
     ios_close(&f);
     JL_SIGATOMIC_END();
 }
 
-JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int complete)
+JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname)
 {
     void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY);
     if (!pkgimg_handle) {
@@ -3374,40 +3531,9 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j
     size_t *plen;
     jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1);
 
-    jl_image_t pkgimage;
-    pkgimage.fptrs = jl_init_processor_pkgimg(pkgimg_handle);
-    if (!jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_base", (void **)&pkgimage.gvars_base, 0)) {
-        pkgimage.gvars_base = NULL;
-    }
-
-    jl_dlsym(pkgimg_handle, "jl_sysimg_gvars_offsets", (void **)&pkgimage.gvars_offsets, 1);
-    pkgimage.gvars_offsets += 1;
-
-    void *pgcstack_func_slot;
-    jl_dlsym(pkgimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 0);
-    if (pgcstack_func_slot) { // Empty package images might miss these
-        void *pgcstack_key_slot;
-        jl_dlsym(pkgimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1);
-        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
-
-        size_t *tls_offset_idx;
-        jl_dlsym(pkgimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1);
-        *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
-    }
-
-    #ifdef _OS_WINDOWS_
-        pkgimage.base = (intptr_t)pkgimg_handle;
-    #else
-        Dl_info dlinfo;
-        if (dladdr((void*)pkgimage.gvars_base, &dlinfo) != 0) {
-            pkgimage.base = (intptr_t)dlinfo.dli_fbase;
-        }
-        else {
-            pkgimage.base = 0;
-        }
-    #endif
+    jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle);
 
-    jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, complete);
+    jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname);
 
     return mod;
 }
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index fc109836a03c0..bf1a830b608de 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -1,6 +1,3 @@
-static htable_t new_code_instance_validate;
-static htable_t external_mis;
-
 // inverse of backedges graph (caller=>callees hash)
 jl_array_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this
 
@@ -88,7 +85,7 @@ static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT
 
 static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/;
 // Mutex for newly_inferred
-static jl_mutex_t newly_inferred_mutex;
+jl_mutex_t newly_inferred_mutex;
 
 // Register array of newly-inferred MethodInstances
 // This gets called as the first step of Base.include_package_for_output
@@ -108,11 +105,6 @@ JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci)
 }
 
 
-static int method_instance_in_queue(jl_method_instance_t *mi)
-{
-    return ptrhash_get(&external_mis, mi) != HT_NOTFOUND;
-}
-
 // compute whether a type references something internal to worklist
 // and thus could not have existed before deserialize
 // and thus does not need delayed unique-ing
@@ -177,32 +169,33 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     // HT_NOTFOUND: not yet analyzed
     // HT_NOTFOUND + 1: no link back
     // HT_NOTFOUND + 2: does link back
-    // HT_NOTFOUND + 3 + depth: in-progress
+    // HT_NOTFOUND + 3: does link back, and included in new_specializations already
+    // HT_NOTFOUND + 4 + depth: in-progress
     int found = (char*)*bp - (char*)HT_NOTFOUND;
     if (found)
         return found - 1;
     arraylist_push(stack, (void*)mi);
     int depth = stack->len;
-    *bp = (void*)((char*)HT_NOTFOUND + 3 + depth); // preliminarily mark as in-progress
+    *bp = (void*)((char*)HT_NOTFOUND + 4 + depth); // preliminarily mark as in-progress
     size_t i = 0, n = jl_array_len(mi->backedges);
-    int cycle = 0;
+    int cycle = depth;
     while (i < n) {
         jl_method_instance_t *be;
         i = get_next_edge(mi->backedges, i, NULL, &be);
         int child_found = has_backedge_to_worklist(be, visited, stack);
-        if (child_found == 1) {
+        if (child_found == 1 || child_found == 2) {
             // found what we were looking for, so terminate early
             found = 1;
             break;
         }
-        else if (child_found >= 2 && child_found - 2 < cycle) {
+        else if (child_found >= 3 && child_found - 3 < cycle) {
             // record the cycle will resolve at depth "cycle"
-            cycle = child_found - 2;
+            cycle = child_found - 3;
             assert(cycle);
         }
     }
-    if (!found && cycle && cycle != depth)
-        return cycle + 2;
+    if (!found && cycle != depth)
+        return cycle + 3;
     // If we are the top of the current cycle, now mark all other parts of
     // our cycle with what we found.
     // Or if we found a backedge, also mark all of the other parts of the
@@ -210,16 +203,17 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     while (stack->len >= depth) {
         void *mi = arraylist_pop(stack);
         bp = ptrhash_bp(visited, mi);
-        assert((char*)*bp - (char*)HT_NOTFOUND == 4 + stack->len);
+        assert((char*)*bp - (char*)HT_NOTFOUND == 5 + stack->len);
         *bp = (void*)((char*)HT_NOTFOUND + 1 + found);
     }
     return found;
 }
 
-// given the list of CodeInstances that were inferred during the
-// build, select those that are (1) external, and (2) are inferred to be called
-// from the worklist or explicitly added by a `precompile` statement.
-// Also prepares for method_instance_in_queue queries.
+// Given the list of CodeInstances that were inferred during the build, select
+// those that are (1) external, (2) still valid, (3) are inferred to be called
+// from the worklist or explicitly added by a `precompile` statement, and
+// (4) are the most recently computed result for that method.
+// These will be preserved in the image.
 static jl_array_t *queue_external_cis(jl_array_t *list)
 {
     if (list == NULL)
@@ -233,21 +227,22 @@ static jl_array_t *queue_external_cis(jl_array_t *list)
     arraylist_new(&stack, 0);
     jl_array_t *new_specializations = jl_alloc_vec_any(0);
     JL_GC_PUSH1(&new_specializations);
-    for (i = 0; i < n0; i++) {
+    for (i = n0; i-- > 0; ) {
         jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(list, i);
         assert(jl_is_code_instance(ci));
+        if (!ci->relocatability)
+            continue;
         jl_method_instance_t *mi = ci->def;
         jl_method_t *m = mi->def.method;
-        if (jl_is_method(m)) {
-            if (jl_object_in_image((jl_value_t*)m->module)) {
-                if (ptrhash_get(&external_mis, mi) == HT_NOTFOUND) {
-                    int found = has_backedge_to_worklist(mi, &visited, &stack);
-                    assert(found == 0 || found == 1);
-                    assert(stack.len == 0);
-                    if (found == 1) {
-                        ptrhash_put(&external_mis, mi, mi);
-                        jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci);
-                    }
+        if (ci->inferred && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
+            int found = has_backedge_to_worklist(mi, &visited, &stack);
+            assert(found == 0 || found == 1 || found == 2);
+            assert(stack.len == 0);
+            if (found == 1 && ci->max_world == ~(size_t)0) {
+                void **bp = ptrhash_bp(&visited, mi);
+                if (*bp != (void*)((char*)HT_NOTFOUND + 3)) {
+                    *bp = (void*)((char*)HT_NOTFOUND + 3);
+                    jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci);
                 }
             }
         }
@@ -255,35 +250,37 @@ static jl_array_t *queue_external_cis(jl_array_t *list)
     htable_free(&visited);
     arraylist_free(&stack);
     JL_GC_POP();
+    // reverse new_specializations
+    n0 = jl_array_len(new_specializations);
+    jl_value_t **news = (jl_value_t**)jl_array_data(new_specializations);
+    for (i = 0; i < n0; i++) {
+        jl_value_t *temp = news[i];
+        news[i] = news[n0 - i - 1];
+        news[n0 - i - 1] = temp;
+    }
     return new_specializations;
 }
 
 // New roots for external methods
-static void jl_collect_methods(htable_t *mset, jl_array_t *new_specializations)
+static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializations, uint64_t key)
 {
-    size_t i, l = new_specializations ? jl_array_len(new_specializations) : 0;
-    jl_value_t *v;
-    jl_method_t *m;
-    for (i = 0; i < l; i++) {
-        v = jl_array_ptr_ref(new_specializations, i);
-        assert(jl_is_code_instance(v));
-        m = ((jl_code_instance_t*)v)->def->def.method;
+    htable_t mset;
+    htable_new(&mset, 0);
+    size_t l = new_specializations ? jl_array_len(new_specializations) : 0;
+    for (size_t i = 0; i < l; i++) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
+        assert(jl_is_code_instance(ci));
+        jl_method_t *m = ci->def->def.method;
         assert(jl_is_method(m));
-        ptrhash_put(mset, (void*)m, (void*)m);
+        ptrhash_put(&mset, (void*)m, (void*)m);
     }
-}
-
-static void jl_collect_new_roots(jl_array_t *roots, const htable_t *mset, uint64_t key)
-{
-    size_t i, sz = mset->size;
     int nwithkey;
-    jl_method_t *m;
-    void *const *table = mset->table;
+    void *const *table = mset.table;
     jl_array_t *newroots = NULL;
     JL_GC_PUSH1(&newroots);
-    for (i = 0; i < sz; i += 2) {
+    for (size_t i = 0; i < mset.size; i += 2) {
         if (table[i+1] != HT_NOTFOUND) {
-            m = (jl_method_t*)table[i];
+            jl_method_t *m = (jl_method_t*)table[i];
             assert(jl_is_method(m));
             nwithkey = nroots_with_key(m, key);
             if (nwithkey) {
@@ -305,6 +302,7 @@ static void jl_collect_new_roots(jl_array_t *roots, const htable_t *mset, uint64
         }
     }
     JL_GC_POP();
+    htable_free(&mset);
 }
 
 // Create the forward-edge map (caller => callees)
@@ -371,12 +369,18 @@ static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
     }
     if (edges_map == NULL)
         return 1;
-    jl_svec_t *specializations = m->specializations;
-    size_t i, l = jl_svec_len(specializations);
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-        if ((jl_value_t*)callee != jl_nothing)
-            collect_backedges(callee, !s);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
+    if (!jl_is_svec(specializations)) {
+        jl_method_instance_t *callee = (jl_method_instance_t*)specializations;
+        collect_backedges(callee, !s);
+    }
+    else {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
+            if ((jl_value_t*)callee != jl_nothing)
+                collect_backedges(callee, !s);
+        }
     }
     return 1;
 }
@@ -422,9 +426,17 @@ static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_ar
 // Extract `edges` and `ext_targets` from `edges_map`
 // `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges
 // `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target
-static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets)
+static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *external_cis, size_t world)
 {
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    htable_t external_mis;
+    htable_new(&external_mis, 0);
+    if (external_cis) {
+        for (size_t i = 0; i < jl_array_len(external_cis); i++) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(external_cis, i);
+            jl_method_instance_t *mi = ci->def;
+            ptrhash_put(&external_mis, (void*)mi, (void*)mi);
+        }
+    }
     arraylist_t wq;
     arraylist_new(&wq, 0);
     void **table = (void**)jl_array_data(edges_map);    // edges_map is caller => callees
@@ -438,10 +450,11 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets)
             continue;
         assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
         if (!jl_object_in_image((jl_value_t*)caller->def.method->module) ||
-            method_instance_in_queue(caller)) {
+            ptrhash_get(&external_mis, caller) != HT_NOTFOUND) {
             jl_record_edges(caller, &wq, edges);
         }
     }
+    htable_free(&external_mis);
     while (wq.len) {
         jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq);
         jl_record_edges(caller, &wq, edges);
@@ -462,7 +475,8 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets)
     // and compute the old methods list, ready for serialization
     jl_value_t *matches = NULL;
     jl_array_t *callee_ids = NULL;
-    JL_GC_PUSH2(&matches, &callee_ids);
+    jl_value_t *sig = NULL;
+    JL_GC_PUSH3(&matches, &callee_ids, &sig);
     for (size_t i = 0; i < l; i += 2) {
         jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1);
         size_t l = jl_array_len(callees);
@@ -506,14 +520,17 @@ static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets)
                     }
                 }
                 else {
-                    jl_value_t *sig;
-                    if (jl_is_method_instance(callee))
-                        sig = ((jl_method_instance_t*)callee)->specTypes;
-                    else
+                    if (jl_is_method_instance(callee)) {
+                        jl_method_instance_t *mi = (jl_method_instance_t*)callee;
+                        sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
+                    }
+                    else {
                         sig = callee;
+                    }
                     int ambig = 0;
                     matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
-                            -1, 0, world, &min_valid, &max_valid, &ambig);
+                            INT32_MAX, 0, world, &min_valid, &max_valid, &ambig);
+                    sig = NULL;
                     if (matches == jl_nothing) {
                         callee_ids = NULL; // invalid
                         break;
@@ -815,66 +832,70 @@ static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
     }
 }
 
-static int remove_code_instance_from_validation(jl_code_instance_t *codeinst)
-{
-    return ptrhash_remove(&new_code_instance_validate, codeinst);
-}
 
 // verify that these edges intersect with the same methods as before
-static jl_array_t *jl_verify_edges(jl_array_t *targets)
+static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
 {
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    JL_TIMING(VERIFY_IMAGE, VERIFY_Edges);
     size_t i, l = jl_array_len(targets) / 3;
-    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
-    memset(jl_array_data(valids), 1, l);
+    static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL;
+    if (ulong_array == NULL)
+        ulong_array = jl_apply_array_type((jl_value_t*)jl_ulong_type, 1);
+    jl_array_t *maxvalids = jl_alloc_array_1d(ulong_array, l);
+    memset(jl_array_data(maxvalids), 0, l * sizeof(size_t));
     jl_value_t *loctag = NULL;
     jl_value_t *matches = NULL;
-    JL_GC_PUSH3(&valids, &matches, &loctag);
+    jl_value_t *sig = NULL;
+    JL_GC_PUSH4(&maxvalids, &matches, &sig, &loctag);
     for (i = 0; i < l; i++) {
         jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3);
         jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1);
         jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2);
-        int valid = 1;
         size_t min_valid = 0;
         size_t max_valid = ~(size_t)0;
         if (invokesig) {
             assert(callee && "unsupported edge");
             jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
             if ((jl_value_t*)mt == jl_nothing) {
-                valid = 0;
+                max_valid = 0;
             }
             else {
-                matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, world, &min_valid, &max_valid);
+                matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, minworld, &min_valid, &max_valid);
                 if (matches == jl_nothing) {
-                     valid = 0;
+                     max_valid = 0;
                 }
                 else {
                     matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
                     if (matches != expected) {
-                        valid = 0;
+                        max_valid = 0;
                     }
                 }
             }
         }
         else {
-            jl_value_t *sig;
-            if (jl_is_method_instance(callee))
-                sig = ((jl_method_instance_t*)callee)->specTypes;
-            else
+            if (jl_is_method_instance(callee)) {
+                jl_method_instance_t *mi = (jl_method_instance_t*)callee;
+                sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
+            }
+            else {
                 sig = callee;
+            }
             assert(jl_is_array(expected));
             int ambig = 0;
             // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
+            // len + 1 is to allow us to log causes of invalidation (SnoopCompile's @snoopr)
             matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
-                    -1, 0, world, &min_valid, &max_valid, &ambig);
+                    _jl_debug_method_invalidation ? INT32_MAX : jl_array_len(expected),
+                    0, minworld, &min_valid, &max_valid, &ambig);
+            sig = NULL;
             if (matches == jl_nothing) {
-                valid = 0;
+                max_valid = 0;
             }
             else {
                 // setdiff!(matches, expected)
                 size_t j, k, ins = 0;
                 if (jl_array_len(matches) != jl_array_len(expected)) {
-                    valid = 0;
+                    max_valid = 0;
                 }
                 for (k = 0; k < jl_array_len(matches); k++) {
                     jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method;
@@ -886,18 +907,18 @@ static jl_array_t *jl_verify_edges(jl_array_t *targets)
                         // intersection has a new method or a method was
                         // deleted--this is now probably no good, just invalidate
                         // everything about it now
-                        valid = 0;
+                        max_valid = 0;
                         if (!_jl_debug_method_invalidation)
                             break;
                         jl_array_ptr_set(matches, ins++, match);
                     }
                 }
-                if (!valid && _jl_debug_method_invalidation)
+                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation)
                     jl_array_del_end((jl_array_t*)matches, jl_array_len(matches) - ins);
             }
         }
-        jl_array_uint8_set(valids, i, valid);
-        if (!valid && _jl_debug_method_invalidation) {
+        ((size_t*)(jl_array_data(maxvalids)))[i] = max_valid;
+        if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
             jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee);
             loctag = jl_cstr_to_string("insert_backedges_callee");
             jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
@@ -910,105 +931,112 @@ static jl_array_t *jl_verify_edges(jl_array_t *targets)
         //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
     }
     JL_GC_POP();
-    return valids;
+    return maxvalids;
 }
 
-// Combine all edges relevant to a method into the visited table
-static void jl_verify_methods(jl_array_t *edges, jl_array_t *valids, htable_t *visited)
+// Combine all edges relevant to a method to initialize the maxvalids list
+static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids)
 {
+    JL_TIMING(VERIFY_IMAGE, VERIFY_Methods);
     jl_value_t *loctag = NULL;
-    JL_GC_PUSH1(&loctag);
+    jl_array_t *maxvalids2 = NULL;
+    JL_GC_PUSH2(&loctag, &maxvalids2);
     size_t i, l = jl_array_len(edges) / 2;
-    htable_new(visited, l);
+    maxvalids2 = jl_alloc_array_1d(jl_typeof(maxvalids), l);
+    size_t *maxvalids2_data = (size_t*)jl_array_data(maxvalids2);
+    memset(maxvalids2_data, 0, l * sizeof(size_t));
     for (i = 0; i < l; i++) {
         jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
         assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
         jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
-        assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type));
-        int valid = 1;
+        assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
         if (callee_ids == NULL) {
             // serializing the edges had failed
-            valid = 0;
+            maxvalids2_data[i] = 0;
         }
         else {
             int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
             size_t j;
-            for (j = 0; valid && j < idxs[0]; j++) {
+            maxvalids2_data[i] = ~(size_t)0;
+            for (j = 0; j < idxs[0]; j++) {
                 int32_t idx = idxs[j + 1];
-                valid = jl_array_uint8_ref(valids, idx);
-                if (!valid && _jl_debug_method_invalidation) {
+                size_t max_valid = ((size_t*)(jl_array_data(maxvalids)))[idx];
+                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
                     jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
                     loctag = jl_cstr_to_string("verify_methods");
                     jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                     loctag = jl_box_int32((int32_t)idx);
                     jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                 }
+                if (max_valid < maxvalids2_data[i])
+                    maxvalids2_data[i] = max_valid;
+                if (max_valid == 0)
+                    break;
             }
         }
-        ptrhash_put(visited, caller, (void*)(((char*)HT_NOTFOUND) + valid + 1));
         //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
-        //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
-        // HT_NOTFOUND: valid (no invalid edges)
-        // HT_NOTFOUND + 1: invalid
-        // HT_NOTFOUND + 2: need to scan
-        // HT_NOTFOUND + 3 + depth: in-progress
+        //ios_puts(maxvalid2_data[i] == ~(size_t)0 ? "valid\n" : "INVALID\n", ios_stderr);
     }
     JL_GC_POP();
+    return maxvalids2;
 }
 
 
 // Visit the entire call graph, starting from edges[idx] to determine if that method is valid
 // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, arraylist_t *stack)
+// and slightly modified with an early termination option once the computation reaches its minimum
+static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size_t idx, arraylist_t *visited, arraylist_t *stack)
 {
-    jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, idx * 2);
-    assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-    int found = (char*)ptrhash_get(visited, (void*)caller) - (char*)HT_NOTFOUND;
-    if (found == 0)
-        return 1; // NOTFOUND == valid
-    if (found == 1)
-        return 0; // invalid
-    if (found != 2)
-        return found - 1; // depth
-    found = 0;
+    if (maxvalids2_data[idx] == 0) {
+        visited->items[idx] = (void*)1;
+        return 0;
+    }
+    size_t cycle = (size_t)visited->items[idx];
+    if (cycle != 0)
+        return cycle - 1; // depth remaining
     jl_value_t *cause = NULL;
-    arraylist_push(stack, (void*)caller);
-    int depth = stack->len;
-    ptrhash_put(visited, (void*)caller, (void*)((char*)HT_NOTFOUND + 3 + depth)); // change 2 to in-progress at depth
+    arraylist_push(stack, (void*)idx);
+    size_t depth = stack->len;
+    visited->items[idx] = (void*)(1 + depth);
     jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1);
-    assert(jl_typeis((jl_value_t*)callee_ids, jl_array_int32_type));
+    assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
     int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-    int cycle = 0;
     size_t i, n = jl_array_len(callee_ids);
+    cycle = depth;
     for (i = idxs[0] + 1; i < n; i++) {
-        int32_t idx = idxs[i];
-        int child_found = jl_verify_graph_edge(edges, idx, visited, stack);
-        if (child_found == 0) {
+        int32_t childidx = idxs[i];
+        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, childidx, visited, stack);
+        size_t child_max_valid = maxvalids2_data[childidx];
+        if (child_max_valid < maxvalids2_data[idx]) {
+            maxvalids2_data[idx] = child_max_valid;
+            cause = jl_array_ptr_ref(edges, childidx * 2);
+        }
+        if (child_max_valid == 0) {
             // found what we were looking for, so terminate early
-            found = 1;
-            cause = jl_array_ptr_ref(edges, idx * 2);
             break;
         }
-        else if (child_found >= 2 && child_found - 2 < cycle) {
+        else if (child_cycle && child_cycle < cycle) {
             // record the cycle will resolve at depth "cycle"
-            cycle = child_found - 2;
-            assert(cycle);
+            cycle = child_cycle;
         }
     }
-    if (!found && cycle && cycle != depth)
-        return cycle + 2;
+    size_t max_valid = maxvalids2_data[idx];
+    if (max_valid != 0 && cycle != depth)
+        return cycle;
     // If we are the top of the current cycle, now mark all other parts of
     // our cycle with what we found.
-    // Or if we found a backedge, also mark all of the other parts of the
-    // cycle as also having an backedge.
+    // Or if we found a failed edge, also mark all of the other parts of the
+    // cycle as also having an failed edge.
     while (stack->len >= depth) {
-        void *mi = arraylist_pop(stack);
-        assert((char*)ptrhash_get(visited, mi) - (char*)HT_NOTFOUND == 4 + stack->len);
-        if (found)
-            ptrhash_put(visited, mi, (void*)((char*)HT_NOTFOUND + 1 + found));
-        else
-            ptrhash_remove(visited, mi); // assign as NOTFOUND in table
-        if (_jl_debug_method_invalidation && found) {
+        size_t childidx = (size_t)arraylist_pop(stack);
+        assert(visited->items[childidx] == (void*)(2 + stack->len));
+        if (idx != childidx) {
+            if (max_valid < maxvalids2_data[childidx])
+                maxvalids2_data[childidx] = max_valid;
+        }
+        visited->items[childidx] = (void*)1;
+        if (_jl_debug_method_invalidation && max_valid != ~(size_t)0) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(edges, childidx * 2);
             jl_value_t *loctag = NULL;
             JL_GC_PUSH1(&loctag);
             jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
@@ -1018,125 +1046,104 @@ static int jl_verify_graph_edge(jl_array_t *edges, int idx, htable_t *visited, a
             JL_GC_POP();
         }
     }
-    return found ? 0 : 1;
+    return 0;
 }
 
 // Visit all entries in edges, verify if they are valid
-static jl_array_t *jl_verify_graph(jl_array_t *edges, htable_t *visited)
+static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2)
 {
-    arraylist_t stack;
+    JL_TIMING(VERIFY_IMAGE, VERIFY_Graph);
+    arraylist_t stack, visited;
     arraylist_new(&stack, 0);
     size_t i, n = jl_array_len(edges) / 2;
-    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, n);
-    JL_GC_PUSH1(&valids);
-    int8_t *valids_data = (int8_t*)jl_array_data(valids);
-    for (i = 0; i < n; i++)
-        valids_data[i] = jl_verify_graph_edge(edges, i, visited, &stack);
+    arraylist_new(&visited, n);
+    memset(visited.items, 0, n * sizeof(size_t));
+    size_t *maxvalids2_data = (size_t*)jl_array_data(maxvalids2);
+    for (i = 0; i < n; i++) {
+        assert(visited.items[i] == (void*)0 || visited.items[i] == (void*)1);
+        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, i, &visited, &stack);
+        assert(child_cycle == 0); (void)child_cycle;
+        assert(stack.len == 0);
+        assert(visited.items[i] == (void*)1);
+    }
     arraylist_free(&stack);
-    JL_GC_POP();
-    return valids;
+    arraylist_free(&visited);
 }
 
 // Restore backedges to external targets
 // `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
 // `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
-static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list)
+static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list, size_t minworld)
 {
     // determine which CodeInstance objects are still valid in our image
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    jl_array_t *valids = jl_verify_edges(ext_targets);
+    jl_array_t *valids = jl_verify_edges(ext_targets, minworld);
     JL_GC_PUSH1(&valids);
-    htable_t visited;
-    htable_new(&visited, 0);
-    jl_verify_methods(edges, valids, &visited); // consumes valids, creates visited
-    valids = jl_verify_graph(edges, &visited); // consumes visited, creates valids
-    size_t i, l = jl_array_len(edges) / 2;
+    valids = jl_verify_methods(edges, valids); // consumes edges valids, initializes methods valids
+    jl_verify_graph(edges, valids); // propagates methods valids for each edge
+    size_t i, l;
 
     // next build a map from external MethodInstances to their CodeInstance for insertion
-    if (ci_list == NULL) {
-        htable_reset(&visited, 0);
-    }
-    else {
-        size_t i, l = jl_array_len(ci_list);
-        htable_reset(&visited, l);
-        for (i = 0; i < l; i++) {
-            jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i);
-            assert(ptrhash_get(&visited, (void*)ci->def) == HT_NOTFOUND);   // check that we don't have multiple cis for same mi
-            ptrhash_put(&visited, (void*)ci->def, (void*)ci);
-        }
-    }
-
-    // next disable any invalid codes, so we do not try to enable them
+    l = jl_array_len(ci_list);
+    htable_t visited;
+    htable_new(&visited, l);
     for (i = 0; i < l; i++) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        int valid = jl_array_uint8_ref(valids, i);
-        if (valid)
-            continue;
-        void *ci = ptrhash_get(&visited, (void*)caller);
-        if (ci != HT_NOTFOUND) {
-            assert(jl_is_code_instance(ci));
-            remove_code_instance_from_validation((jl_code_instance_t*)ci); // mark it as handled
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i);
+        assert(ci->min_world == minworld);
+        if (ci->max_world == 1) { // sentinel value: has edges to external callables
+            ptrhash_put(&visited, (void*)ci->def, (void*)ci);
         }
         else {
-            jl_code_instance_t *codeinst = caller->cache;
-            while (codeinst) {
-                remove_code_instance_from_validation(codeinst); // should be left invalid
-                codeinst = jl_atomic_load_relaxed(&codeinst->next);
+            assert(ci->max_world == ~(size_t)0);
+            jl_method_instance_t *caller = ci->def;
+            if (ci->inferred && jl_rettype_inferred(caller, minworld, ~(size_t)0) == jl_nothing) {
+                jl_mi_cache_insert(caller, ci);
             }
+            //jl_static_show((jl_stream*)ios_stderr, (jl_value_t*)caller);
+            //ios_puts("free\n", ios_stderr);
         }
     }
 
-    // finally enable any applicable new codes
+    // next enable any applicable new codes
+    l = jl_array_len(edges) / 2;
     for (i = 0; i < l; i++) {
         jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
-        int valid = jl_array_uint8_ref(valids, i);
-        if (!valid)
-            continue;
-        // if this callee is still valid, add all the backedges
-        jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
-        int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
-        for (size_t j = 0; j < idxs[0]; j++) {
-            int32_t idx = idxs[j + 1];
-            jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
-            jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
-            if (callee && jl_is_method_instance(callee)) {
-                jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
-            }
-            else {
-                jl_value_t *sig = callee == NULL ? invokesig : callee;
-                jl_methtable_t *mt = jl_method_table_for(sig);
-                // FIXME: rarely, `callee` has an unexpected `Union` signature,
-                // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
-                // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
-                // This workaround exposes us to (rare) 265-violations.
-                if ((jl_value_t*)mt != jl_nothing)
-                    jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
+        size_t maxvalid = ((size_t*)(jl_array_data(valids)))[i];
+        if (maxvalid == ~(size_t)0) {
+            // if this callee is still valid, add all the backedges
+            jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
+            int32_t *idxs = (int32_t*)jl_array_data(callee_ids);
+            for (size_t j = 0; j < idxs[0]; j++) {
+                int32_t idx = idxs[j + 1];
+                jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
+                jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
+                if (callee && jl_is_method_instance(callee)) {
+                    jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
+                }
+                else {
+                    jl_value_t *sig = callee == NULL ? invokesig : callee;
+                    jl_methtable_t *mt = jl_method_table_for(sig);
+                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
+                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
+                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
+                    // This workaround exposes us to (rare) 265-violations.
+                    if ((jl_value_t*)mt != jl_nothing)
+                        jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
+                }
             }
         }
-        // then enable it
+        // then enable any methods associated with it
         void *ci = ptrhash_get(&visited, (void*)caller);
+        //assert(ci != HT_NOTFOUND);
         if (ci != HT_NOTFOUND) {
             // have some new external code to use
             assert(jl_is_code_instance(ci));
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            remove_code_instance_from_validation(codeinst); // mark it as handled
-            assert(codeinst->min_world >= world && codeinst->inferred);
-            codeinst->max_world = ~(size_t)0;
-            if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) {
+            assert(codeinst->min_world == minworld && codeinst->inferred);
+            codeinst->max_world = maxvalid;
+            if (jl_rettype_inferred(caller, minworld, maxvalid) == jl_nothing) {
                 jl_mi_cache_insert(caller, codeinst);
             }
         }
-        else {
-            jl_code_instance_t *codeinst = caller->cache;
-            while (codeinst) {
-                if (remove_code_instance_from_validation(codeinst)) { // mark it as handled
-                    assert(codeinst->min_world >= world && codeinst->inferred);
-                    codeinst->max_world = ~(size_t)0;
-                }
-                codeinst = jl_atomic_load_relaxed(&codeinst->next);
-            }
-        }
     }
 
     htable_free(&visited);
@@ -1152,27 +1159,6 @@ static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges)
     }
 }
 
-static void validate_new_code_instances(void)
-{
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    size_t i;
-    for (i = 0; i < new_code_instance_validate.size; i += 2) {
-        if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) {
-            //assert(0 && "unexpected unprocessed CodeInstance found");
-            jl_code_instance_t *ci = (jl_code_instance_t*)new_code_instance_validate.table[i];
-            JL_GC_PROMISE_ROOTED(ci); // TODO: this needs a root (or restructuring to avoid it)
-            assert(ci->min_world >= world && ci->inferred);
-            ci->max_world = ~(size_t)0;
-            jl_method_instance_t *caller = ci->def;
-            if (jl_rettype_inferred(caller, world, ~(size_t)0) == jl_nothing) {
-                jl_mi_cache_insert(caller, ci);
-            }
-            //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
-            //ios_puts("FREE\n", ios_stderr);
-        }
-    }
-}
-
 static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods)
 {
     if (!jl_main_module->build_id.lo) {
@@ -1235,3 +1221,45 @@ JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s, uint8_t *pkgimage, int64_t
     }
     return checksum;
 }
+
+// Returns `depmodidxs` where `j = depmodidxs[i]` corresponds to the blob `depmods[j]` in `write_mod_list`
+static jl_array_t *image_to_depmodidx(jl_array_t *depmods)
+{
+    if (!depmods)
+        return NULL;
+    assert(jl_array_len(depmods) < INT32_MAX && "too many dependencies to serialize");
+    size_t lbids = n_linkage_blobs();
+    size_t ldeps = jl_array_len(depmods);
+    jl_array_t *depmodidxs = jl_alloc_array_1d(jl_array_int32_type, lbids);
+    int32_t *dmidxs = (int32_t*)jl_array_data(depmodidxs);
+    memset(dmidxs, -1, lbids * sizeof(int32_t));
+    dmidxs[0] = 0; // the sysimg can also be found at idx 0, by construction
+    for (size_t i = 0, j = 0; i < ldeps; i++) {
+        jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
+        size_t idx = external_blob_index(depmod);
+        if (idx < lbids) { // jl_object_in_image
+            j++;
+            if (dmidxs[idx] == -1)
+                dmidxs[idx] = j;
+        }
+    }
+    return depmodidxs;
+}
+
+// Returns `imageidxs` where `j = imageidxs[i]` is the blob corresponding to `depmods[j]`
+static jl_array_t *depmod_to_imageidx(jl_array_t *depmods)
+{
+    if (!depmods)
+        return NULL;
+    size_t ldeps = jl_array_len(depmods);
+    jl_array_t *imageidxs = jl_alloc_array_1d(jl_array_int32_type, ldeps + 1);
+    int32_t *imgidxs = (int32_t*)jl_array_data(imageidxs);
+    imgidxs[0] = 0;
+    for (size_t i = 0; i < ldeps; i++) {
+        jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
+        size_t j = external_blob_index(depmod);
+        assert(j < INT32_MAX);
+        imgidxs[i + 1] = (int32_t)j;
+    }
+    return imageidxs;
+}
diff --git a/src/subtype.c b/src/subtype.c
index a0896e9050ff2..fd9bd3e8be00f 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -73,18 +73,11 @@ typedef struct jl_varbinding_t {
     // let ub = var.ub ∩ type
     // 0 - var.ub <: type ? var : ub
     // 1 - var.ub = ub; return var
-    // 2 - either (var.ub = ub; return var), or return ub
+    // 2 - var.lb = lb; return ub
     int8_t constraintkind;
-    // intvalued: must be integer-valued; i.e. occurs as N in Vararg{_,N}
-    // 0: No restriction
-    // 1: must be unbounded/ or fixed to a `Int`/typevar
-    // 2: we have some imprecise vararg length intersection that can be improved if this var is const valued.
-    int8_t intvalued;
+    int8_t intvalued; // intvalued: must be integer-valued; i.e. occurs as N in Vararg{_,N}
     int8_t limited;
     int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
-    // when this variable's integer value is compared to that of another,
-    // it equals `other + offset`. used by vararg length parameters.
-    int16_t offset;
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
     jl_array_t *innervars;
@@ -101,12 +94,14 @@ typedef struct jl_stenv_t {
     jl_value_t **envout;      // for passing caller the computed bounds of right-side variables
     int envsz;                // length of envout
     int envidx;               // current index in envout
-    int invdepth;             // # of invariant constructors we're nested in on the left
-    int Rinvdepth;            // # of invariant constructors we're nested in on the right
+    int invdepth;             // current number of invariant constructors we're nested in
     int ignore_free;          // treat free vars as black boxes; used during intersection
     int intersection;         // true iff subtype is being called from intersection
     int emptiness_only;       // true iff intersection only needs to test for emptiness
     int triangular;           // when intersecting Ref{X} with Ref{<:Y}
+    // Used to represent the length difference between 2 vararg.
+    // intersect(X, Y) ==> X = Y + Loffset
+    int Loffset;
 } jl_stenv_t;
 
 // state manipulation utilities
@@ -159,77 +154,141 @@ static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
         memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
     } while (0);
 
+static int current_env_length(jl_stenv_t *e)
+{
+    jl_varbinding_t *v = e->vars;
+    int len = 0;
+    while (v) {
+        len++;
+        v = v->prev;
+    }
+    return len;
+}
+
 typedef struct {
     int8_t *buf;
     int rdepth;
-    int8_t _space[24];
+    int8_t _space[24]; // == 8 * 3
+    jl_gcframe_t gcframe;
+    jl_value_t *roots[24];
 } jl_savedenv_t;
 
-static void save_env(jl_stenv_t *e, jl_value_t **root, jl_savedenv_t *se)
+static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
 {
-    jl_varbinding_t *v = e->vars;
-    int len=0;
-    while (v != NULL) {
-        len++;
-        v = v->prev;
+    jl_value_t **roots = NULL;
+    int nroots = 0;
+    if (root) {
+        if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+            jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+            assert(jl_is_svec(sv));
+            roots = jl_svec_data(sv);
+            nroots = jl_svec_len(sv);
+        }
+        else {
+            roots = se->roots;
+            nroots = se->gcframe.nroots >> 2;
+        }
     }
-    if (root)
-        *root = (jl_value_t*)jl_alloc_svec(len * 3);
-    se->buf = (int8_t*)(len > 8 ? malloc_s(len * 3) : &se->_space);
-#ifdef __clang_gcanalyzer__
-    memset(se->buf, 0, len * 3);
-#endif
-    int i=0, j=0; v = e->vars;
+    jl_varbinding_t *v = e->vars;
+    int i = 0, j = 0;
     while (v != NULL) {
         if (root) {
-            jl_svecset(*root, i++, v->lb);
-            jl_svecset(*root, i++, v->ub);
-            jl_svecset(*root, i++, (jl_value_t*)v->innervars);
+            roots[i++] = v->lb;
+            roots[i++] = v->ub;
+            roots[i++] = (jl_value_t*)v->innervars;
         }
         se->buf[j++] = v->occurs;
         se->buf[j++] = v->occurs_inv;
         se->buf[j++] = v->occurs_cov;
         v = v->prev;
     }
+    assert(i == nroots); (void)nroots;
     se->rdepth = e->Runions.depth;
 }
 
+static void alloc_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
+{
+    jl_task_t *ct = jl_current_task;
+    int len = current_env_length(e);
+    se->gcframe.nroots = 0;
+    se->gcframe.prev = NULL;
+    se->roots[0] = NULL;
+    if (len > 8) {
+        if (root) {
+            se->gcframe.nroots = JL_GC_ENCODE_PUSHARGS(1);
+            se->gcframe.prev = ct->gcstack;
+            ct->gcstack = &se->gcframe;
+            jl_svec_t *sv = jl_alloc_svec(len * 3);
+            se->roots[0] = (jl_value_t*)sv;
+        }
+    }
+    else {
+        if (root && len) {
+            for (int i = 0; i < len * 3; i++)
+                se->roots[i] = NULL;
+            se->gcframe.nroots = JL_GC_ENCODE_PUSHARGS(len * 3);
+            se->gcframe.prev = ct->gcstack;
+            ct->gcstack = &se->gcframe;
+        }
+    }
+    se->buf = (len > 8 ? (int8_t*)malloc_s(len * 3) : se->_space);
+#ifdef __clang_gcanalyzer__
+    memset(se->buf, 0, len * 3);
+#endif
+}
+
+static void save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
+{
+    alloc_env(e, se, root);
+    re_save_env(e, se, root);
+}
+
 static void free_env(jl_savedenv_t *se) JL_NOTSAFEPOINT
 {
+    if (se->gcframe.nroots) {
+        assert(jl_current_task->gcstack == &se->gcframe);
+        JL_GC_POP();
+    }
     if (se->buf != se->_space)
         free(se->buf);
     se->buf = NULL;
 }
 
-static void restore_env(jl_stenv_t *e, jl_value_t *root, jl_savedenv_t *se) JL_NOTSAFEPOINT
+static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPOINT
 {
+    jl_value_t **roots = NULL;
+    int nroots = 0;
+    if (root) {
+        if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+            jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+            assert(jl_is_svec(sv));
+            roots = jl_svec_data(sv);
+            nroots = jl_svec_len(sv);
+        }
+        else {
+            roots = se->roots;
+            nroots = se->gcframe.nroots >> 2;
+        }
+    }
     jl_varbinding_t *v = e->vars;
     int i = 0, j = 0;
     while (v != NULL) {
-        if (root) v->lb = jl_svecref(root, i++);
-        if (root) v->ub = jl_svecref(root, i++);
-        if (root) v->innervars = (jl_array_t*)jl_svecref(root, i++);
+        if (root) {
+            v->lb = roots[i++];
+            v->ub = roots[i++];
+            v->innervars = (jl_array_t*)roots[i++];
+        }
         v->occurs = se->buf[j++];
         v->occurs_inv = se->buf[j++];
         v->occurs_cov = se->buf[j++];
         v = v->prev;
     }
+    assert(i == nroots); (void)nroots;
     e->Runions.depth = se->rdepth;
     if (e->envout && e->envidx < e->envsz)
         memset(&e->envout[e->envidx], 0, (e->envsz - e->envidx)*sizeof(void*));
 }
 
-static int current_env_length(jl_stenv_t *e)
-{
-    jl_varbinding_t *v = e->vars;
-    int len = 0;
-    while (v) {
-        len++;
-        v = v->prev;
-    }
-    return len;
-}
-
 static void clean_occurs(jl_stenv_t *e)
 {
     jl_varbinding_t *v = e->vars;
@@ -239,10 +298,12 @@ static void clean_occurs(jl_stenv_t *e)
     }
 }
 
+#define flip_offset(e) ((e)->Loffset *= -1)
+
 // type utilities
 
 // quickly test that two types are identical
-static int obviously_egal(jl_value_t *a, jl_value_t *b)
+static int obviously_egal(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT
 {
     if (a == (jl_value_t*)jl_typeofbottom_type->super)
         a = (jl_value_t*)jl_typeofbottom_type; // supertype(typeof(Union{})) is equal to, although distinct from, itself
@@ -306,11 +367,8 @@ static int obviously_unequal(jl_value_t *a, jl_value_t *b)
             if (ad->name != bd->name)
                 return 1;
             int istuple = (ad->name == jl_tuple_typename);
-            if ((jl_is_concrete_type(a) || jl_is_concrete_type(b)) &&
-                jl_type_equality_is_identity(a, b)) {
-                if (!istuple && ad->name != jl_type_typename) // HACK: can't properly normalize Tuple{Float64} == Tuple{<:Float64} like types or Type{T} types
-                    return 1;
-            }
+            if (jl_type_equality_is_identity(a, b))
+                return 1;
             size_t i, np;
             if (istuple) {
                 size_t na = jl_nparams(ad), nb = jl_nparams(bd);
@@ -389,19 +447,22 @@ static int obviously_in_union(jl_value_t *u, jl_value_t *x)
     return obviously_egal(u, x);
 }
 
-static int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity)
+int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity)
 {
     if (a == b || a == (jl_value_t*)jl_any_type || b == (jl_value_t*)jl_any_type)
         return 0;
     if (specificity && a == (jl_value_t*)jl_typeofbottom_type)
         return 0;
-    if (jl_is_concrete_type(a) && jl_is_concrete_type(b) &&
-        jl_type_equality_is_identity(a, b) &&
-        (((jl_datatype_t*)a)->name != jl_tuple_typename ||
-         ((jl_datatype_t*)b)->name != jl_tuple_typename))
+    if (jl_is_concrete_type(a) && jl_is_concrete_type(b) && jl_type_equality_is_identity(a, b))
         return 1;
     if (jl_is_unionall(a)) a = jl_unwrap_unionall(a);
     if (jl_is_unionall(b)) b = jl_unwrap_unionall(b);
+    if (jl_is_uniontype(a))
+        return obviously_disjoint(((jl_uniontype_t *)a)->a, b, specificity) &&
+               obviously_disjoint(((jl_uniontype_t *)a)->b, b, specificity);
+    if (jl_is_uniontype(b))
+        return obviously_disjoint(a, ((jl_uniontype_t *)b)->a, specificity) &&
+               obviously_disjoint(a, ((jl_uniontype_t *)b)->b, specificity);
     if (jl_is_datatype(a) && jl_is_datatype(b)) {
         jl_datatype_t *ad = (jl_datatype_t*)a, *bd = (jl_datatype_t*)b;
         if (ad->name != bd->name) {
@@ -477,49 +538,16 @@ static int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity)
     return 0;
 }
 
-static int is_any_like(jl_value_t* x, jl_typeenv_t *env) JL_NOTSAFEPOINT
-{
-    if (x == (jl_value_t *)jl_any_type)
-        return 1;
-    if (jl_is_uniontype(x))
-        return is_any_like(((jl_uniontype_t*)x)->a, env) ||
-               is_any_like(((jl_uniontype_t*)x)->b, env);
-    if (jl_is_unionall(x)) {
-        jl_unionall_t *ua = (jl_unionall_t*)x;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        return is_any_like(ua->body, &newenv);
-    }
-    if (jl_is_typevar(x) && env != NULL) {
-        jl_tvar_t *v = (jl_tvar_t *)x;
-        if (v->lb != jl_bottom_type)
-            return 0;
-        int in_env = 0;
-        jl_typeenv_t *vs = env;
-        while (vs != NULL) {
-            in_env = vs->var == v;
-            if (in_env) break;
-            vs = vs->prev;
-        }
-        return in_env && is_any_like(v->ub, env);
-    }
-    return 0;
-}
-
+jl_value_t *simple_union(jl_value_t *a, jl_value_t *b);
 // compute a least upper bound of `a` and `b`
 static jl_value_t *simple_join(jl_value_t *a, jl_value_t *b)
 {
-    if (is_any_like(a, NULL) || is_any_like(b, NULL))
-        return (jl_value_t *)jl_any_type;
-    if (a == jl_bottom_type || obviously_egal(a,b))
+    if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type || obviously_egal(a, b))
         return b;
-    if (b == jl_bottom_type)
+    if (b == jl_bottom_type || a == (jl_value_t*)jl_any_type)
         return a;
     if (!(jl_is_type(a) || jl_is_typevar(a)) || !(jl_is_type(b) || jl_is_typevar(b)))
         return (jl_value_t*)jl_any_type;
-    if (jl_is_uniontype(a) && obviously_in_union(a, b))
-        return a;
-    if (jl_is_uniontype(b) && obviously_in_union(b, a))
-        return b;
     if (jl_is_kind(a) && jl_is_type_type(b) && jl_typeof(jl_tparam0(b)) == a)
         return a;
     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
@@ -528,15 +556,10 @@ static jl_value_t *simple_join(jl_value_t *a, jl_value_t *b)
         return a;
     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->lb))
         return b;
-    if (!jl_has_free_typevars(a) && !jl_has_free_typevars(b) &&
-        // issue #24521: don't merge Type{T} where typeof(T) varies
-        !(jl_is_type_type(a) && jl_is_type_type(b) && jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b)))) {
-        if (jl_subtype(a, b)) return b;
-        if (jl_subtype(b, a)) return a;
-    }
-    return jl_new_struct(jl_uniontype_type, a, b);
+    return simple_union(a, b);
 }
 
+jl_value_t *simple_intersect(jl_value_t *a, jl_value_t *b, int overesi);
 // Compute a greatest lower bound of `a` and `b`
 // For the subtype path, we need to over-estimate this by returning `b` in many cases.
 // But for `merge_env`, we'd better under-estimate and return a `Union{}`
@@ -548,10 +571,6 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
         return a;
     if (!(jl_is_type(a) || jl_is_typevar(a)) || !(jl_is_type(b) || jl_is_typevar(b)))
         return jl_bottom_type;
-    if (jl_is_uniontype(a) && obviously_in_union(a, b))
-        return b;
-    if (jl_is_uniontype(b) && obviously_in_union(b, a))
-        return a;
     if (jl_is_kind(a) && jl_is_type_type(b) && jl_typeof(jl_tparam0(b)) == a)
         return b;
     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
@@ -560,24 +579,7 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
         return a;
     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->ub))
         return b;
-    if (obviously_disjoint(a, b, 0))
-        return jl_bottom_type;
-    if (!jl_has_free_typevars(a) && !jl_has_free_typevars(b)) {
-        if (jl_subtype(a, b)) return a;
-        if (jl_subtype(b, a)) return b;
-    }
-    return overesi ? b : jl_bottom_type;
-}
-
-static jl_unionall_t *rename_unionall(jl_unionall_t *u)
-{
-    jl_tvar_t *v = jl_new_typevar(u->var->name, u->var->lb, u->var->ub);
-    jl_value_t *t = NULL;
-    JL_GC_PUSH2(&v, &t);
-    t = jl_instantiate_unionall(u, (jl_value_t*)v);
-    t = jl_new_struct(jl_unionall_type, v, t);
-    JL_GC_POP();
-    return (jl_unionall_t*)t;
+    return simple_intersect(a, b, overesi);
 }
 
 // main subtyping algorithm
@@ -620,11 +622,13 @@ static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv
     return u;
 }
 
-static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
+static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow);
 
 // subtype for variable bounds consistency check. needs its own forall/exists environment.
 static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
+    if (jl_is_long(x) && jl_is_long(y))
+        return jl_unbox_long(x) == jl_unbox_long(y) + e->Loffset;
     if (x == y)
         return 1;
     if (x == jl_bottom_type && jl_is_type(y))
@@ -636,23 +640,15 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
     jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
-    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-    int sub;
-    e->Lunions.used = e->Runions.used = 0;
-    e->Runions.depth = 0;
-    e->Runions.more = 0;
-    e->Lunions.depth = 0;
-    e->Lunions.more = 0;
-
-    sub = forall_exists_subtype(x, y, e, 0);
-
-    pop_unionstate(&e->Runions, &oldRunions);
+    int sub = local_forall_exists_subtype(x, y, e, 0, 1);
     pop_unionstate(&e->Lunions, &oldLunions);
     return sub;
 }
 
 static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
+    if (jl_is_long(x) && jl_is_long(y))
+        return jl_unbox_long(x) == jl_unbox_long(y) + e->Loffset;
     if (x == y && !(jl_is_unionall(y)))
         return 1;
     if (x == jl_bottom_type && jl_is_type(y))
@@ -674,7 +670,7 @@ static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param)
         vb->occurs = 1;
     if (vb != NULL && param) {
         // saturate counters at 2; we don't need values bigger than that
-        if (param == 2 && (vb->right ? e->Rinvdepth : e->invdepth) > vb->depth0) {
+        if (param == 2 && e->invdepth > vb->depth0) {
             if (vb->occurs_inv < 2)
                 vb->occurs_inv++;
         }
@@ -696,7 +692,7 @@ static int var_outside(jl_stenv_t *e, jl_tvar_t *x, jl_tvar_t *y)
     return 0;
 }
 
-static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d);
+static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int depth);
 
 static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e);
 
@@ -707,6 +703,10 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     if (bb == NULL)
         return e->ignore_free || subtype_left_var(b->ub, a, e, param);
     record_var_occurrence(bb, e, param);
+    assert(!jl_is_long(a) || e->Loffset == 0);
+    if (e->Loffset != 0 && !jl_is_typevar(a) &&
+        a != jl_bottom_type && a != (jl_value_t *)jl_any_type)
+        return 0;
     if (!bb->right)  // check ∀b . b<:a
         return subtype_left_var(bb->ub, a, e, param);
     if (bb->ub == a)
@@ -716,7 +716,7 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     // for this to work we need to compute issub(left,right) before issub(right,left),
     // since otherwise the issub(a, bb.ub) check in var_gt becomes vacuous.
     if (e->intersection) {
-        jl_value_t *ub = intersect_aside(bb->ub, a, e, 0, bb->depth0);
+        jl_value_t *ub = intersect_aside(a, bb->ub, e, bb->depth0);
         JL_GC_PUSH1(&ub);
         if (ub != (jl_value_t*)b && (!jl_is_typevar(ub) || !reachable_var(ub, b, e)))
             bb->ub = ub;
@@ -744,14 +744,14 @@ static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     if (bb == NULL)
         return e->ignore_free || subtype_left_var(a, b->lb, e, param);
     record_var_occurrence(bb, e, param);
+    assert(!jl_is_long(a) || e->Loffset == 0);
+    if (e->Loffset != 0 && !jl_is_typevar(a) &&
+        a != jl_bottom_type && a != (jl_value_t *)jl_any_type)
+        return 0;
     if (!bb->right)  // check ∀b . b>:a
         return subtype_left_var(a, bb->lb, e, param);
-    if (bb->lb == bb->ub) {
-        if (jl_is_typevar(bb->lb) && !jl_is_type(a) && !jl_is_typevar(a))
-            return var_gt((jl_tvar_t*)bb->lb, a, e, param);
-        if (jl_is_typevar(a) && !jl_is_type(bb->lb) && !jl_is_typevar(bb->lb))
-            return var_lt((jl_tvar_t*)a, bb->lb, e, param);
-    }
+    if (bb->lb == a)
+        return 1;
     if (!((bb->ub == (jl_value_t*)jl_any_type && !jl_is_type(a) && !jl_is_typevar(a)) || subtype_ccheck(a, bb->ub, e)))
         return 0;
     jl_value_t *lb = simple_join(bb->lb, a);
@@ -769,6 +769,30 @@ static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     return 1;
 }
 
+static int subtype_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int R, int param)
+{
+    if (e->intersection) {
+        jl_varbinding_t *bb = lookup(e, (jl_tvar_t*)b);
+        jl_value_t *bub = bb ? bb->ub : ((jl_tvar_t*)b)->ub;
+        jl_value_t *blb = bb ? bb->lb : ((jl_tvar_t*)b)->lb;
+        if (bub == blb && jl_is_typevar(bub)) {
+            int sub = subtype_var((jl_tvar_t *)bub, a, e, R, param);
+            return sub;
+        }
+    }
+    if (e->Loffset != 0 && jl_is_long(a)) {
+        int old_offset = R ? -e->Loffset : e->Loffset;
+        jl_value_t *na = jl_box_long(jl_unbox_long(a) + old_offset);
+        JL_GC_PUSH1(&na);
+        e->Loffset = 0;
+        int sub = R ? var_gt(b, na, e, param) : var_lt(b, na, e, param);
+        e->Loffset = R ? -old_offset : old_offset;
+        JL_GC_POP();
+        return sub;
+    }
+    return R ? var_gt(b, a, e, param) : var_lt(b, a, e, param);
+}
+
 // check that a type is concrete or quasi-concrete (Type{T}).
 // this is used to check concrete typevars:
 // issubtype is false if the lower bound of a concrete type var is not concrete.
@@ -852,7 +876,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
             // outer var can only refer to inner var if bounds changed
             (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
             (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
-            u = rename_unionall(u);
+            u = jl_rename_unionall(u);
             break;
         }
         btemp = btemp->prev;
@@ -865,7 +889,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
 {
     u = unalias_unionall(u, e);
     jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
+                           e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
     int ans;
@@ -929,7 +953,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
     if (R && ans && e->envidx < e->envsz) {
         jl_value_t *val;
         if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
-            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL);
+            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL); // special token result that represents N::Int in the envout
         else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
             val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
         else if (vb.lb == vb.ub)
@@ -965,10 +989,8 @@ static int check_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e)
         jl_value_t *nn = jl_box_long(n);
         JL_GC_PUSH1(&nn);
         e->invdepth++;
-        e->Rinvdepth++;
         int ans = subtype(nn, N, e, 2) && subtype(N, nn, e, 0);
         e->invdepth--;
-        e->Rinvdepth--;
         JL_GC_POP();
         if (!ans)
             return 0;
@@ -978,17 +1000,6 @@ static int check_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e)
 
 static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
 
-struct subtype_tuple_env {
-    jl_datatype_t *xd, *yd;
-    jl_value_t *lastx, *lasty;
-    size_t lx, ly;
-    size_t i, j;
-    int vx, vy;
-    jl_value_t *vtx;
-    jl_value_t *vty;
-    jl_vararg_kind_t vvx, vvy;
-} JL_ROOTED_VALUE_COLLECTION;
-
 static int subtype_tuple_varargs(
     jl_vararg_t *vtx, jl_vararg_t *vty,
     size_t vx, size_t vy,
@@ -1069,32 +1080,57 @@ static int subtype_tuple_varargs(
         }
 
         if (ylv) {
-            if (ylv->depth0 != e->invdepth || ylv->occurs_inv)
+            if (ylv->depth0 != e->invdepth ||
+                ylv->lb != jl_bottom_type ||
+                ylv->ub != (jl_value_t *)jl_any_type)
                 return 0;
             ylv->intvalued = 1;
         }
         // set lb to Any. Since `intvalued` is set, we'll interpret that
         // appropriately.
         e->invdepth++;
-        e->Rinvdepth++;
         int ans = subtype((jl_value_t*)jl_any_type, yp1, e, 2);
         e->invdepth--;
-        e->Rinvdepth--;
         return ans;
     }
 
     // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
     e->invdepth++;
-    e->Rinvdepth++;
     JL_GC_PUSH2(&xp1, &yp1);
-    if (xp1 && jl_is_long(xp1) && vx != 1)
-        xp1 = jl_box_long(jl_unbox_long(xp1) - vx + 1);
-    if (jl_is_long(yp1) && vy != 1)
-        yp1 = jl_box_long(jl_unbox_long(yp1) - vy + 1);
-    int ans = forall_exists_equal(xp1, yp1, e);
+    int ans;
+    jl_varbinding_t *bxp1 = jl_is_typevar(xp1) ? lookup(e, (jl_tvar_t *)xp1) : NULL;
+    jl_varbinding_t *byp1 = jl_is_typevar(yp1) ? lookup(e, (jl_tvar_t *)yp1) : NULL;
+    if (bxp1) {
+        if (bxp1->intvalued == 0)
+            bxp1->intvalued = 1;
+        if (jl_is_long(bxp1->lb))
+            xp1 = bxp1->lb;
+    }
+    if (byp1) {
+        if (byp1->intvalued == 0)
+            byp1->intvalued = 1;
+        if (jl_is_long(byp1->lb))
+            yp1 = byp1->lb;
+    }
+    if (jl_is_long(xp1) && jl_is_long(yp1))
+        ans = jl_unbox_long(xp1) - vx == jl_unbox_long(yp1) - vy;
+    else {
+        if (jl_is_long(xp1) && vx != vy) {
+            xp1 = jl_box_long(jl_unbox_long(xp1) + vy - vx);
+            vx = vy;
+        }
+        if (jl_is_long(yp1) && vy != vx) {
+            yp1 = jl_box_long(jl_unbox_long(yp1) + vx - vy);
+            vy = vx;
+        }
+        assert(e->Loffset == 0);
+        e->Loffset = vx - vy;
+        ans = forall_exists_equal(xp1, yp1, e);
+        assert(e->Loffset == vx - vy);
+        e->Loffset = 0;
+    }
     JL_GC_POP();
     e->invdepth--;
-    e->Rinvdepth--;
     return ans;
 }
 
@@ -1102,7 +1138,7 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
 {
     size_t lx = jl_nparams(xd);
     size_t ly = jl_nparams(yd);
-    size_t i = 0, j = 0, vx = 0, vy = 0, x_reps = 0;
+    size_t i = 0, j = 0, vx = 0, vy = 0, x_reps = 1;
     jl_value_t *lastx = NULL, *lasty = NULL;
     jl_value_t *xi = NULL, *yi = NULL;
 
@@ -1132,7 +1168,8 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
                 xi = jl_tparam(xd, lx-1);
                 if (jl_is_vararg(xi)) {
                     all_varargs = 1;
-                    vy += lx - i;
+                    // count up to lx-2 rather than lx-1.
+                    vy += lx - i - 1;
                     vx = 1;
                 } else {
                     break;
@@ -1152,25 +1189,21 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
             return !!vx;
 
         xi = vx ? jl_unwrap_vararg(xi) : xi;
-        int x_same = lastx && jl_egal(xi, lastx);
-        if (vy) {
-            yi = jl_unwrap_vararg(yi);
-            // keep track of number of consecutive identical types compared to Vararg
-            if (x_same)
-                x_reps++;
-            else
-                x_reps = 1;
-        }
+        yi = vy ? jl_unwrap_vararg(yi) : yi;
+        int x_same = vx > 1 || (lastx && obviously_egal(xi, lastx));
+        int y_same = vy > 1 || (lasty && obviously_egal(yi, lasty));
+        // keep track of number of consecutive identical subtyping
+        x_reps = y_same && x_same ? x_reps + 1 : 1;
         if (x_reps > 2) {
-            // an identical type on the left doesn't need to be compared to a Vararg
+            // an identical type on the left doesn't need to be compared to the same
             // element type on the right more than twice.
         }
         else if (x_same && e->Runions.depth == 0 &&
-            ((yi == lasty && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) ||
+            ((y_same && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) ||
              (yi == lastx && !vx && vy && jl_is_concrete_type(xi)))) {
             // fast path for repeated elements
         }
-        else if (e->Runions.depth == 0 && e->Lunions.depth == 0 && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) {
+        else if (e->Runions.depth == 0 && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) {
             // fast path for separable sub-formulas
             if (!jl_subtype(xi, yi))
                 return 0;
@@ -1279,7 +1312,9 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // of unions and vars: if matching `typevar <: union`, first try to match the whole
             // union against the variable before trying to take it apart to see if there are any
             // variables lurking inside.
-            ui = pick_union_decision(e, 1);
+            // note: for forall var, there's no need to split y if it has no free typevars.
+            jl_varbinding_t *xx = lookup(e, (jl_tvar_t *)x);
+            ui = ((xx && xx->right) || jl_has_free_typevars(y)) && pick_union_decision(e, 1);
         }
         if (ui == 1)
             y = pick_union_element(y, e, 1);
@@ -1319,10 +1354,20 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // to other left-side variables, so using || here is safe.
             return subtype(xub, y, e, param) || subtype(x, ylb, e, param);
         }
-        return var_lt((jl_tvar_t*)x, y, e, param);
+        if (jl_is_unionall(y)) {
+            jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)x);
+            if (xb == NULL ? !e->ignore_free : !xb->right) {
+                // We'd better unwrap `y::UnionAll` eagerly if `x` isa ∀-var.
+                // This makes sure the following cases work correct:
+                // 1) `∀T <: Union{∃S, SomeType{P}} where {P}`: `S == Any` ==> `S >: T`
+                // 2) `∀T <: Union{∀T, SomeType{P}} where {P}`:
+                return subtype_unionall(x, (jl_unionall_t*)y, e, 1, param);
+            }
+        }
+        return subtype_var((jl_tvar_t*)x, y, e, 0, param);
     }
     if (jl_is_typevar(y))
-        return var_gt((jl_tvar_t*)y, x, e, param);
+        return subtype_var((jl_tvar_t*)y, x, e, 1, param);
     if (y == (jl_value_t*)jl_any_type && !jl_has_free_typevars(x))
         return 1;
     if (x == jl_bottom_type && !jl_has_free_typevars(y))
@@ -1373,10 +1418,7 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // The answer is true iff `T` has full bounds (as in `Type`), but this needs to
             // be checked at the same depth where `Type{T}` occurs --- the depth of the LHS
             // doesn't matter because it (e.g. `DataType`) doesn't actually contain the variable.
-            int saved = e->invdepth;
-            e->invdepth = e->Rinvdepth;
             int issub = subtype((jl_value_t*)jl_type_type, y, e, param);
-            e->invdepth = saved;
             return issub;
         }
         while (xd != jl_any_type && xd->name != yd->name) {
@@ -1392,7 +1434,6 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
         size_t i, np = jl_nparams(xd);
         int ans = 1;
         e->invdepth++;
-        e->Rinvdepth++;
         for (i=0; i < np; i++) {
             jl_value_t *xi = jl_tparam(xd, i), *yi = jl_tparam(yd, i);
             if (!(xi == yi || forall_exists_equal(xi, yi, e))) {
@@ -1400,11 +1441,12 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             }
         }
         e->invdepth--;
-        e->Rinvdepth--;
         return ans;
     }
     if (jl_is_type(y))
         return x == jl_bottom_type;
+    if (jl_is_long(x) && jl_is_long(y))
+        return jl_unbox_long(x) == jl_unbox_long(y) + e->Loffset;
     return jl_egal(x, y);
 }
 
@@ -1431,6 +1473,78 @@ static int is_definite_length_tuple_type(jl_value_t *x)
     return k == JL_VARARG_NONE || k == JL_VARARG_INT;
 }
 
+static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore);
+
+static int may_contain_union_decision(jl_value_t *x, jl_stenv_t *e, jl_typeenv_t *log) JL_NOTSAFEPOINT
+{
+    if (x == NULL || x == (jl_value_t*)jl_any_type || x == jl_bottom_type)
+        return 0;
+    if (jl_is_unionall(x))
+        return may_contain_union_decision(((jl_unionall_t *)x)->body, e, log);
+    if (jl_is_datatype(x)) {
+        jl_datatype_t *xd = (jl_datatype_t *)x;
+        for (int i = 0; i < jl_nparams(xd); i++) {
+            jl_value_t *param = jl_tparam(xd, i);
+            if (jl_is_vararg(param))
+                param = jl_unwrap_vararg(param);
+            if (may_contain_union_decision(param, e, log))
+                return 1;
+        }
+        return 0;
+    }
+    if (!jl_is_typevar(x))
+        return jl_is_type(x);
+    jl_typeenv_t *t = log;
+    while (t != NULL) {
+        if (x == (jl_value_t *)t->var)
+            return 1;
+        t = t->prev;
+    }
+    jl_typeenv_t newlog = { (jl_tvar_t*)x, NULL, log };
+    jl_varbinding_t *xb = lookup(e, (jl_tvar_t *)x);
+    return may_contain_union_decision(xb ? xb->lb : ((jl_tvar_t *)x)->lb, e, &newlog) ||
+           may_contain_union_decision(xb ? xb->ub : ((jl_tvar_t *)x)->ub, e, &newlog);
+}
+
+static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow)
+{
+    int16_t oldRmore = e->Runions.more;
+    int sub;
+    int kindx = !jl_has_free_typevars(x);
+    int kindy = !jl_has_free_typevars(y);
+    if (kindx && kindy)
+        return jl_subtype(x, y);
+    if (may_contain_union_decision(y, e, NULL) && pick_union_decision(e, 1) == 0) {
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
+        e->Lunions.used = e->Runions.used = 0;
+        e->Lunions.depth = e->Runions.depth = 0;
+        e->Lunions.more = e->Runions.more = 0;
+        int count = 0, noRmore = 0;
+        sub = _forall_exists_subtype(x, y, e, param, &count, &noRmore);
+        pop_unionstate(&e->Runions, &oldRunions);
+        // we should not try the slow path if `forall_exists_subtype` has tested all cases;
+        // Once limit_slow == 1, also skip it if
+        // 1) `forall_exists_subtype` return false
+        // 2) the left `Union` looks big
+        if (limit_slow == -1)
+            limit_slow = kindx || kindy;
+        if (noRmore || (limit_slow && (count > 3  || !sub)))
+            e->Runions.more = oldRmore;
+    }
+    else {
+        // slow path
+        e->Lunions.used = 0;
+        while (1) {
+            e->Lunions.more = 0;
+            e->Lunions.depth = 0;
+            sub = subtype(x, y, e, param);
+            if (!sub || !next_union_state(e, 0))
+                break;
+        }
+    }
+    return sub;
+}
+
 static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     if (obviously_egal(x, y)) return 1;
@@ -1439,9 +1553,22 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
         return 0;
 
+    if (jl_is_datatype(x) && jl_is_datatype(y)) {
+        // Fastpath for nested constructor. Skip the unneeded `>:` check.
+        // Note: since there is no changes to the environment or union stack implied by `x` or `y`, this will simply forward to calling
+        // `forall_exists_equal(xi, yi, e)` on each parameter `(xi, yi)` of `(x, y)`,
+        // which means this subtype call will give the same result for `subtype(x, y)` and `subtype(y, x)`.
+        jl_datatype_t *xd = (jl_datatype_t*)x, *yd = (jl_datatype_t*)y;
+        if (xd->name != yd->name)
+            return 0;
+        if (xd->name != jl_tuple_typename)
+            return subtype(x, y, e, 2);
+    }
+
     if ((jl_is_uniontype(x) && jl_is_uniontype(y))) {
         // For 2 unions, first try a more efficient greedy algorithm that compares the unions
         // componentwise. If failed, `exists_subtype` would memorize that this branch should be skipped.
+        // Note: this is valid because the normal path checks `>:` locally.
         if (pick_union_decision(e, 1) == 0) {
             return forall_exists_equal(((jl_uniontype_t *)x)->a, ((jl_uniontype_t *)y)->a, e) &&
                    forall_exists_equal(((jl_uniontype_t *)x)->b, ((jl_uniontype_t *)y)->b, e);
@@ -1449,36 +1576,18 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     }
 
     jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
-    e->Lunions.used = 0;
-    int sub;
-
-    if (!jl_has_free_typevars(x) || !jl_has_free_typevars(y)) {
-        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-        e->Runions.used = 0;
-        e->Runions.depth = 0;
-        e->Runions.more = 0;
-        e->Lunions.depth = 0;
-        e->Lunions.more = 0;
-
-        sub = forall_exists_subtype(x, y, e, 2);
 
-        pop_unionstate(&e->Runions, &oldRunions);
-    }
-    else {
-        while (1) {
-            e->Lunions.more = 0;
-            e->Lunions.depth = 0;
-            sub = subtype(x, y, e, 2);
-            if (!sub || !next_union_state(e, 0))
-                break;
-        }
+    int sub = local_forall_exists_subtype(x, y, e, 2, -1);
+    if (sub) {
+        flip_offset(e);
+        sub = local_forall_exists_subtype(y, x, e, 0, 0);
+        flip_offset(e);
     }
-
     pop_unionstate(&e->Lunions, &oldLunions);
-    return sub && subtype(y, x, e, 0);
+    return sub;
 }
 
-static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_t *saved, jl_savedenv_t *se, int param)
+static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_savedenv_t *se, int param)
 {
     e->Runions.used = 0;
     while (1) {
@@ -1492,56 +1601,64 @@ static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_
             // We preserve `envout` here as `subtype_unionall` needs previous assigned env values.
             int oldidx = e->envidx;
             e->envidx = e->envsz;
-            restore_env(e, saved, se);
+            restore_env(e, se, 1);
             e->envidx = oldidx;
         }
         else {
-            restore_env(e, saved, se);
+            restore_env(e, se, 1);
             return 0;
         }
     }
 }
 
-static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
+static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore)
 {
     // The depth recursion has the following shape, after simplification:
     // ∀₁
     //   ∃₁
     assert(e->Runions.depth == 0);
     assert(e->Lunions.depth == 0);
-    jl_value_t *saved=NULL; jl_savedenv_t se;
-    JL_GC_PUSH1(&saved);
-    save_env(e, &saved, &se);
+    jl_savedenv_t se;
+    save_env(e, &se, 1);
 
     e->Lunions.used = 0;
     int sub;
+    if (count) *count = 0;
+    if (noRmore) *noRmore = 1;
     while (1) {
-        sub = exists_subtype(x, y, e, saved, &se, param);
+        sub = exists_subtype(x, y, e, &se, param);
+        if (count) *count = (*count < 4) ? *count + 1 : 4;
+        if (noRmore) *noRmore = *noRmore && e->Runions.more == 0;
         if (!sub || !next_union_state(e, 0))
             break;
-        free_env(&se);
-        save_env(e, &saved, &se);
+        re_save_env(e, &se, 1);
     }
 
     free_env(&se);
-    JL_GC_POP();
     return sub;
 }
 
+static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
+{
+    return _forall_exists_subtype(x, y, e, param, NULL, NULL);
+}
+
 static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
 {
     e->vars = NULL;
-    assert(env != NULL || envsz == 0);
     e->envsz = envsz;
     e->envout = env;
-    if (envsz)
+    if (envsz) {
+        assert(env != NULL);
         memset(env, 0, envsz*sizeof(void*));
+    }
     e->envidx = 0;
-    e->invdepth = e->Rinvdepth = 0;
+    e->invdepth = 0;
     e->ignore_free = 0;
     e->intersection = 0;
     e->emptiness_only = 0;
     e->triangular = 0;
+    e->Loffset = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
     e->Lunions.used = 0;       e->Runions.used = 0;
@@ -1724,7 +1841,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
     if (jl_is_datatype(y)) {
         int istuple = (((jl_datatype_t*)y)->name == jl_tuple_typename);
         int iscov = istuple;
-        // TODO: this would be a nice fast-path to have, unfortuanately,
+        // TODO: this would be a nice fast-path to have, unfortunately,
         //       datatype allocation fails to correctly hash-cons them
         //       and the subtyping tests include tests for this case
         //if (!iscov && ((jl_datatype_t*)y)->isconcretetype && !jl_is_type_type(x)) {
@@ -1992,31 +2109,21 @@ JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env,
     return subtype;
 }
 
-static int subtype_in_env_(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int invdepth, int Rinvdepth)
+static int subtype_in_env(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     jl_stenv_t e2;
     init_stenv(&e2, NULL, 0);
     e2.vars = e->vars;
     e2.intersection = e->intersection;
     e2.ignore_free = e->ignore_free;
-    e2.invdepth = invdepth;
-    e2.Rinvdepth = Rinvdepth;
+    e2.invdepth = e->invdepth;
     e2.envsz = e->envsz;
     e2.envout = e->envout;
     e2.envidx = e->envidx;
+    e2.Loffset = e->Loffset;
     return forall_exists_subtype(x, y, &e2, 0);
 }
 
-static int subtype_in_env(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
-{
-    return subtype_in_env_(x, y, e, e->invdepth, e->Rinvdepth);
-}
-
-static int subtype_bounds_in_env(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d)
-{
-    return subtype_in_env_(x, y, e, R ? e->invdepth : d, R ? d : e->Rinvdepth);
-}
-
 JL_DLLEXPORT int jl_subtype(jl_value_t *x, jl_value_t *y)
 {
     return jl_subtype_env(x, y, NULL, 0);
@@ -2150,23 +2257,39 @@ int jl_has_intersect_type_not_kind(jl_value_t *t)
     t = jl_unwrap_unionall(t);
     if (t == (jl_value_t*)jl_any_type)
         return 1;
-    if (jl_is_uniontype(t)) {
+    assert(!jl_is_vararg(t));
+    if (jl_is_uniontype(t))
         return jl_has_intersect_type_not_kind(((jl_uniontype_t*)t)->a) ||
                jl_has_intersect_type_not_kind(((jl_uniontype_t*)t)->b);
-    }
-    if (jl_is_typevar(t)) {
+    if (jl_is_typevar(t))
         return jl_has_intersect_type_not_kind(((jl_tvar_t*)t)->ub);
-    }
-    if (jl_is_datatype(t)) {
+    if (jl_is_datatype(t))
         if (((jl_datatype_t*)t)->name == jl_type_typename)
             return 1;
-    }
     return 0;
 }
 
+// compute if DataType<:t || Union<:t || UnionAll<:t etc.
+int jl_has_intersect_kind_not_type(jl_value_t *t)
+{
+    t = jl_unwrap_unionall(t);
+    if (t == (jl_value_t*)jl_any_type || jl_is_kind(t))
+        return 1;
+    assert(!jl_is_vararg(t));
+    if (jl_is_uniontype(t))
+        return jl_has_intersect_kind_not_type(((jl_uniontype_t*)t)->a) ||
+               jl_has_intersect_kind_not_type(((jl_uniontype_t*)t)->b);
+    if (jl_is_typevar(t))
+        return jl_has_intersect_kind_not_type(((jl_tvar_t*)t)->ub);
+    return 0;
+}
+
+
 JL_DLLEXPORT int jl_isa(jl_value_t *x, jl_value_t *t)
 {
-    if (jl_typeis(x,t) || t == (jl_value_t*)jl_any_type)
+    if (t == (jl_value_t*)jl_any_type || jl_typetagis(x,t))
+        return 1;
+    if (jl_typetagof(x) < (jl_max_tags << 4) && jl_is_datatype(t) && jl_typetagis(x,((jl_datatype_t*)t)->smalltag << 4))
         return 1;
     if (jl_is_type(x)) {
         if (t == (jl_value_t*)jl_type_type)
@@ -2211,7 +2334,7 @@ JL_DLLEXPORT int jl_isa(jl_value_t *x, jl_value_t *t)
             return 0;
         }
     }
-    if (jl_is_concrete_type(t) && jl_type_equality_is_identity(jl_typeof(x), t))
+    if (jl_is_concrete_type(t))
         return 0;
     return jl_subtype(jl_typeof(x), t);
 }
@@ -2223,7 +2346,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
 
 // intersect in nested union environment, similar to subtype_ccheck
-static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d)
+static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int depth)
 {
     // band-aid for #30335
     if (x == (jl_value_t*)jl_any_type && !jl_is_typevar(y))
@@ -2231,19 +2354,15 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
     // band-aid for #46736
-    if (jl_egal(x, y))
+    if (obviously_egal(x, y))
         return x;
 
     jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-    int savedepth = e->invdepth, Rsavedepth = e->Rinvdepth;
-    // TODO: this doesn't quite make sense
-    e->invdepth = e->Rinvdepth = d;
-
+    int savedepth = e->invdepth;
+    e->invdepth = depth;
     jl_value_t *res = intersect_all(x, y, e);
-
-    pop_unionstate(&e->Runions, &oldRunions);
     e->invdepth = savedepth;
-    e->Rinvdepth = Rsavedepth;
+    pop_unionstate(&e->Runions, &oldRunions);
     return res;
 }
 
@@ -2266,12 +2385,9 @@ static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t
 }
 
 // set a variable to a non-type constant
-static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_UNROOTED, jl_varbinding_t *othervar)
+static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_UNROOTED, jl_stenv_t *e, int R)
 {
-    int offset = bb->offset;
-    if (othervar && offset == 0)
-        offset = -othervar->offset;
-    assert(!othervar || othervar->offset == -offset);
+    int offset = R ? -e->Loffset : e->Loffset;
     if (bb->lb == jl_bottom_type && bb->ub == (jl_value_t*)jl_any_type) {
         if (offset == 0)
             bb->lb = bb->ub = v;
@@ -2298,24 +2414,33 @@ static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_
     return v;
 }
 
-static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_t *e) {
+static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_t *e, int R) {
     if (!bb)
         return (jl_value_t*)tv;
     if (bb->depth0 != e->invdepth)
         return jl_bottom_type;
+    e->invdepth++;
     record_var_occurrence(bb, e, 2);
+    e->invdepth--;
+    int offset = R ? -e->Loffset : e->Loffset;
     if (jl_is_long(bb->lb)) {
         ssize_t blb = jl_unbox_long(bb->lb);
-        if ((blb < bb->offset) || (blb < 0))
+        if (blb < offset || blb < 0)
             return jl_bottom_type;
         // Here we always return the shorter `Vararg`'s length.
-        if (bb->offset <= 0)
+        if (offset <= 0)
             return bb->lb;
-        return jl_box_long(blb - bb->offset);
-    }
-    if (bb->offset > 0) {
-        bb->intvalued = 2;
-        return NULL;
+        return jl_box_long(blb - offset);
+    }
+    if (offset > 0) {
+        if (bb->innervars == NULL)
+            bb->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+        jl_value_t *ntv = NULL;
+        JL_GC_PUSH1(&ntv);
+        ntv = (jl_value_t *)jl_new_typevar(tv->name, jl_bottom_type, (jl_value_t *)jl_any_type);
+        jl_array_ptr_1d_push(bb->innervars, ntv);
+        JL_GC_POP();
+        return ntv;
     }
     return (jl_value_t*)tv;
 }
@@ -2344,17 +2469,15 @@ static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
     return 0;
 }
 
-static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e, int R, int d)
+static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
     if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || try_subtype_by_bounds(a, b, e))
         return 1;
-    jl_value_t *root=NULL; jl_savedenv_t se;
-    JL_GC_PUSH1(&root);
-    save_env(e, &root, &se);
-    int ret = subtype_bounds_in_env(a, b, e, R, d);
-    restore_env(e, root, &se);
+    jl_savedenv_t se;
+    save_env(e, &se, 1);
+    int ret = subtype_in_env(a, b, e);
+    restore_env(e, &se, 1);
     free_env(&se);
-    JL_GC_POP();
     return ret;
 }
 
@@ -2373,7 +2496,7 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
 }
 
 // subtype, treating all vars as existential
-static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d)
+static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     jl_varbinding_t *v = e->vars;
     int len = 0;
@@ -2392,7 +2515,7 @@ static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *
         v->right = 1;
         v = v->prev;
     }
-    int issub = subtype_bounds_in_env(x, y, e, R, d);
+    int issub = subtype_in_env(x, y, e);
     n = 0; v = e->vars;
     while (n < len) {
         assert(v != NULL);
@@ -2446,50 +2569,29 @@ static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTS
     return 0;
 }
 
-static int has_free_vararg_length(jl_value_t *a, jl_stenv_t *e) {
-    if (jl_is_unionall(a))
-        a = jl_unwrap_unionall(a);
-    if (jl_is_datatype(a) && jl_is_tuple_type((jl_datatype_t *)a)) {
-        size_t lx = jl_nparams((jl_datatype_t *)a);
-        if (lx > 0) {
-            jl_value_t *la = jl_tparam((jl_datatype_t *)a, lx-1);
-            if (jl_is_vararg(la)) {
-                jl_value_t *len = jl_unwrap_vararg_num((jl_vararg_t *)la);
-                // return 1 if we meet a vararg with Null length
-                if (!len) return 1;
-                // or a typevar not in the current env.
-                if (jl_is_typevar(len))
-                    return lookup(e, (jl_tvar_t *)len) == NULL;
-            }
-        }
-    }
-    return 0;
-}
 
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
     if (bb == NULL)
-        return R ? intersect_aside(a, b->ub, e, 1, 0) : intersect_aside(b->ub, a, e, 0, 0);
+        return R ? intersect_aside(a, b->ub, e, 0) : intersect_aside(b->ub, a, e, 0);
     if (reachable_var(bb->lb, b, e) || reachable_var(bb->ub, b, e))
         return a;
-    if (bb->lb == bb->ub && jl_is_typevar(bb->lb)) {
-        return intersect(a, bb->lb, e, param);
-    }
+    if (bb->lb == bb->ub && jl_is_typevar(bb->lb))
+        return R ? intersect(a, bb->lb, e, param) : intersect(bb->lb, a, e, param);
     if (!jl_is_type(a) && !jl_is_typevar(a))
-        return set_var_to_const(bb, a, NULL);
-    int d = bb->depth0;
-    jl_value_t *root=NULL; jl_savedenv_t se;
+        return set_var_to_const(bb, a, e, R);
+    jl_savedenv_t se;
     if (param == 2) {
         jl_value_t *ub = NULL;
-        JL_GC_PUSH2(&ub, &root);
+        JL_GC_PUSH1(&ub);
         if (!jl_has_free_typevars(a)) {
-            save_env(e, &root, &se);
-            int issub = subtype_in_env_existential(bb->lb, a, e, 0, d);
-            restore_env(e, root, &se);
+            save_env(e, &se, 1);
+            int issub = subtype_in_env_existential(bb->lb, a, e);
+            restore_env(e, &se, 1);
             if (issub) {
-                issub = subtype_in_env_existential(a, bb->ub, e, 1, d);
-                restore_env(e, root, &se);
+                issub = subtype_in_env_existential(a, bb->ub, e);
+                restore_env(e, &se, 1);
             }
             free_env(&se);
             if (!issub) {
@@ -2500,21 +2602,16 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         }
         else {
             e->triangular++;
-            ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+            ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0);
             e->triangular--;
-            save_env(e, &root, &se);
-            int issub = subtype_in_env_existential(bb->lb, ub, e, 0, d);
-            restore_env(e, root, &se);
+            save_env(e, &se, 1);
+            int issub = subtype_in_env_existential(bb->lb, ub, e);
+            restore_env(e, &se, 1);
             free_env(&se);
             if (!issub) {
                 JL_GC_POP();
                 return jl_bottom_type;
             }
-            if (jl_is_uniontype(ub) && !jl_is_uniontype(a)) {
-                bb->ub = ub;
-                bb->lb = jl_bottom_type;
-                ub = (jl_value_t*)b;
-            }
         }
         if (ub != (jl_value_t*)b) {
             if (jl_has_free_typevars(ub)) {
@@ -2524,17 +2621,16 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
                 }
             }
             bb->ub = ub;
-            // We get a imprecise Tuple here. Don't change `lb` and return the typevar directly.
-            if (has_free_vararg_length(ub, e) && !has_free_vararg_length(a, e)) {
-                JL_GC_POP();
-                return (jl_value_t*)b;
-            }
-            bb->lb = ub;
+            if ((jl_is_uniontype(ub) && !jl_is_uniontype(a)) ||
+                (jl_is_unionall(ub) && !jl_is_unionall(a)))
+                ub = (jl_value_t*)b;
+            else
+                bb->lb = ub;
         }
         JL_GC_POP();
         return ub;
     }
-    jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+    jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
     if (bb->constraintkind == 1 || e->triangular) {
@@ -2545,7 +2641,7 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
     }
     else if (bb->constraintkind == 0) {
         JL_GC_PUSH1(&ub);
-        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
+        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e)) {
             JL_GC_POP();
             return (jl_value_t*)b;
         }
@@ -2553,14 +2649,24 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         return ub;
     }
     assert(bb->constraintkind == 2);
-    if (!jl_is_typevar(a)) {
-        if (ub == a && bb->lb != jl_bottom_type)
-            return ub;
-        else if (jl_egal(bb->ub, bb->lb))
-            return ub;
-        set_bound(&bb->ub, ub, b, e);
-    }
-    return (jl_value_t*)b;
+    if (ub == a && bb->lb != jl_bottom_type)
+        return ub;
+    if (jl_egal(bb->ub, bb->lb))
+        return ub;
+    if (is_leaf_bound(ub))
+        set_bound(&bb->lb, ub, b, e);
+    // TODO: can we improve this bound by pushing a new variable into the environment
+    // and adding that to the lower bound of our variable?
+    //jl_value_t *ntv = NULL;
+    //JL_GC_PUSH2(&ntv, &ub);
+    //if (bb->innervars == NULL)
+    //    bb->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+    //ntv = (jl_value_t*)jl_new_typevar(b->name, bb->lb, ub);
+    //jl_array_ptr_1d_push(bb->innervars, ntv);
+    //jl_value_t *lb = simple_join(b->lb, ntv);
+    //JL_GC_POP();
+    //bb->lb = lb;
+    return ub;
 }
 
 // test whether `var` occurs inside constructors. `want_inv` tests only inside
@@ -2611,16 +2717,34 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
     if (jl_is_unionall(u)) {
         jl_tvar_t *var = ((jl_unionall_t *)u)->var;
         jl_value_t *ub = var->ub, *body = ((jl_unionall_t *)u)->body;
-        JL_GC_PUSH3(&ub, &body, &var);
         assert(var != t);
-        ub = omit_bad_union(ub, t);
-        body = omit_bad_union(body, t);
-        if (ub != NULL && body != NULL && !jl_has_typevar(var->lb, t)) {
-            if (ub != var->ub) {
-                var = jl_new_typevar(var->name, var->lb, ub);
-                body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
+        if (!jl_has_typevar(var->lb, t)) {
+            JL_GC_PUSH3(&ub, &body, &var);
+            body = omit_bad_union(body, t);
+            if (!jl_has_typevar(body, var)) {
+                res = body;
+            }
+            else {
+                ub = omit_bad_union(ub, t);
+                if (ub == jl_bottom_type && var->lb != ub) {
+                    res = jl_bottom_type;
+                }
+                else if (obviously_egal(var->lb, ub)) {
+                    JL_TRY {
+                        res = jl_substitute_var(body, var, ub);
+                    }
+                    JL_CATCH {
+                        res = jl_bottom_type;
+                    }
+                }
+                else {
+                    if (ub != var->ub) {
+                        var = jl_new_typevar(var->name, var->lb, ub);
+                        body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
+                    }
+                    res = jl_new_struct(jl_unionall_type, var, body);
+                }
             }
-            res = jl_new_struct(jl_unionall_type, var, body);
         }
         JL_GC_POP();
     }
@@ -2630,11 +2754,13 @@ static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
         JL_GC_PUSH2(&a, &b);
         a = omit_bad_union(a, t);
         b = omit_bad_union(b, t);
-        res = a == NULL ? b :
-              b == NULL ? a :
-              jl_new_struct(jl_uniontype_type, a, b);
+        res = simple_join(a, b);
         JL_GC_POP();
     }
+    else {
+        res = jl_bottom_type;
+    }
+    assert(res != NULL);
     return res;
 }
 
@@ -2652,10 +2778,9 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         // given x<:T<:x, substitute x for T
         varval = vb->ub;
     }
-    // TODO: `vb.occurs_cov == 1` here allows substituting Tuple{<:X} => Tuple{X},
-    // which is valid but changes some ambiguity errors so we don't need to do it yet.
-    else if ((/*vb->occurs_cov == 1 || */is_leaf_bound(vb->ub)) &&
-             !var_occurs_invariant(u->body, u->var, 0)) {
+    // TODO: `vb.occurs_cov == 1`, we could also substitute Tuple{<:X} => Tuple{X},
+    // but it may change some ambiguity errors so we don't need to do it yet.
+    else if (vb->occurs_cov && is_leaf_bound(vb->ub) && !jl_has_free_typevars(vb->ub)) {
         // replace T<:x with x in covariant position when possible
         varval = vb->ub;
     }
@@ -2673,18 +2798,13 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         }
     }
 
-    // vb is still unbounded.
-    if (vb->intvalued == 2 && !(varval && jl_is_long(varval)))
-        vb->intvalued = 1;
-
     // TODO: this can prevent us from matching typevar identities later
     if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
     // remove/replace/rewrap free occurrences of this var in the environment
-    jl_varbinding_t *btemp = e->vars;
-    int wrap = 1;
-    while (btemp != NULL) {
+    jl_varbinding_t *wrap = NULL;
+    for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
         if (jl_has_typevar(btemp->lb, vb->var)) {
             if (vb->lb == (jl_value_t*)btemp->var) {
                 JL_GC_POP();
@@ -2698,31 +2818,25 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
                     res = jl_bottom_type;
                 }
             }
-            else if (btemp->lb == (jl_value_t*)vb->var)
+            else if (btemp->lb == (jl_value_t*)vb->var) {
                 btemp->lb = vb->lb;
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) &&
-                     !jl_has_typevar(vb->ub, btemp->var) && jl_has_typevar(btemp->ub, vb->var)) {
+            }
+            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
                 // if our variable is T, and some outer variable has constraint S = Ref{T},
                 // move the `where T` outside `where S` instead of putting it here. issue #21243.
-                if (btemp->innervars == NULL)
-                    btemp->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-                if (newvar != vb->var) {
+                if (newvar != vb->var)
                     btemp->lb = jl_substitute_var(btemp->lb, vb->var, (jl_value_t*)newvar);
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
-                }
-                jl_array_ptr_1d_push(btemp->innervars, (jl_value_t*)newvar);
-                wrap = 0;
-                btemp = btemp->prev;
-                continue;
+                wrap = btemp;
             }
-            else
+            else {
                 btemp->lb = jl_new_struct(jl_unionall_type, vb->var, btemp->lb);
+            }
             assert((jl_value_t*)btemp->var != btemp->lb);
         }
         if (jl_has_typevar(btemp->ub, vb->var)) {
             if (vb->ub == (jl_value_t*)btemp->var) {
                 btemp->ub = omit_bad_union(btemp->ub, vb->var);
-                if (btemp->ub == NULL) {
+                if (btemp->ub == jl_bottom_type && btemp->ub != btemp->lb) {
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
@@ -2735,13 +2849,31 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
                     res = jl_bottom_type;
                 }
             }
-            else if (btemp->ub == (jl_value_t*)vb->var)
+            else if (btemp->ub == (jl_value_t*)vb->var) {
+                // TODO: this loses some constraints, such as in this test, where we replace T4<:S3 (e.g. T4==S3 since T4 only appears covariantly once) with T4<:Any
+                // a = Tuple{Float64,T3,T4} where T4 where T3
+                // b = Tuple{S2,Tuple{S3},S3} where S2 where S3
+                // Tuple{Float64, T3, T4} where {S3, T3<:Tuple{S3}, T4<:S3}
                 btemp->ub = vb->ub;
+            }
+            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
+                if (newvar != vb->var)
+                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
+                wrap = btemp;
+            }
             else
                 btemp->ub = jl_new_struct(jl_unionall_type, vb->var, btemp->ub);
             assert((jl_value_t*)btemp->var != btemp->ub);
         }
-        btemp = btemp->prev;
+    }
+
+    if (wrap) {
+        // We only assign the newvar with the outmost var.
+        // This make sure we never create a UnionAll with 2 identical vars.
+        if (wrap->innervars == NULL)
+            wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+        jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)newvar);
+        // TODO: should we move all the innervars here too?
     }
 
     // if `v` still occurs, re-wrap body in `UnionAll v` or eliminate the UnionAll
@@ -2764,17 +2896,32 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
             if (newvar != vb->var)
                 res = jl_substitute_var(res, vb->var, (jl_value_t*)newvar);
             varval = (jl_value_t*)newvar;
-            if (wrap)
+            if (!wrap)
                 res = jl_type_unionall((jl_tvar_t*)newvar, res);
         }
     }
 
-    if (res != jl_bottom_type && vb->innervars != NULL) {
-        int i;
-        for(i=0; i < jl_array_len(vb->innervars); i++) {
+    if (vb->innervars != NULL) {
+        for (size_t i = 0; i < jl_array_len(vb->innervars); i++) {
             jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb->innervars, i);
-            if (jl_has_typevar(res, var))
-                res = jl_type_unionall((jl_tvar_t*)var, res);
+            // the `btemp->prev` walk is only giving a sort of post-order guarantee (since we are
+            // iterating 2 trees at once), so once we set `wrap`, there might remain other branches
+            // of the type walk that now still may have incomplete bounds: finish those now too
+            jl_varbinding_t *wrap = NULL;
+            for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
+                if (btemp->depth0 == vb->depth0 && (jl_has_typevar(btemp->lb, var) || jl_has_typevar(btemp->ub, var))) {
+                    wrap = btemp;
+                }
+            }
+            if (wrap) {
+                if (wrap->innervars == NULL)
+                    wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+                jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)var);
+            }
+            else if (res != jl_bottom_type) {
+                if (jl_has_typevar(res, var))
+                    res = jl_type_unionall((jl_tvar_t*)var, res);
+            }
         }
     }
 
@@ -2805,7 +2952,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
         }
         if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
             btemp->ub == (jl_value_t*)u->var) {
-            u = rename_unionall(u);
+            u = jl_rename_unionall(u);
             break;
         }
         btemp = btemp->prev;
@@ -2853,7 +3000,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
             JL_GC_PUSH1(&res);
             vb->ub = omit_bad_union(vb->ub, u->var);
             JL_GC_POP();
-            if (vb->ub == NULL)
+            if (vb->ub == jl_bottom_type && vb->ub != vb->lb)
                 res = jl_bottom_type;
         }
     }
@@ -2866,12 +3013,14 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
 
 static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    jl_value_t *res=NULL, *save=NULL;
+    jl_value_t *res = NULL;
     jl_savedenv_t se;
     jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
-    JL_GC_PUSH5(&res, &vb.lb, &vb.ub, &save, &vb.innervars);
-    save_env(e, &save, &se);
+                           e->invdepth, NULL, e->vars };
+    JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars);
+    save_env(e, &se, 1);
+    if (is_leaf_typevar(u->var) && !var_occurs_invariant(u->body, u->var, 0))
+        vb.constraintkind = 1;
     res = intersect_unionall_(t, u, e, R, param, &vb);
     if (vb.limited) {
         // if the environment got too big, avoid tree recursion and propagate the flag
@@ -2879,16 +3028,21 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
             e->vars->limited = 1;
     }
     else if (res != jl_bottom_type) {
-        if (vb.concrete || vb.occurs_inv>1 || vb.intvalued > 1 || u->var->lb != jl_bottom_type || (vb.occurs_inv && vb.occurs_cov)) {
-            restore_env(e, NULL, &se);
-            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
+        int constraint1 = vb.constraintkind;
+        if (vb.concrete || vb.occurs_inv>1 || (vb.occurs_inv && vb.occurs_cov))
             vb.constraintkind = vb.concrete ? 1 : 2;
-            res = intersect_unionall_(t, u, e, R, param, &vb);
-        }
-        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) {
-            restore_env(e, save, &se);
-            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
+        else if (u->var->lb != jl_bottom_type)
+            vb.constraintkind = 2;
+        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0))
             vb.constraintkind = 1;
+        int reintersection = constraint1 != vb.constraintkind || vb.concrete;
+        if (reintersection) {
+            if (constraint1 == 1) {
+                vb.lb = vb.var->lb;
+                vb.ub = vb.var->ub;
+            }
+            restore_env(e, &se, vb.constraintkind == 1 ? 1 : 0);
+            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
@@ -2897,6 +3051,8 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
     return res;
 }
 
+static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
+
 // check n = (length of vararg type v)
 static int intersect_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e, int8_t R)
 {
@@ -2905,15 +3061,14 @@ static int intersect_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e, int8
     if (N && jl_is_typevar(N)) {
         jl_value_t *len = jl_box_long(n);
         JL_GC_PUSH1(&len);
-        jl_value_t *il = R ? intersect(len, N, e, 2) : intersect(N, len, e, 2);
+        jl_value_t *il = R ? intersect_invariant(len, N, e) : intersect_invariant(N, len, e);
         JL_GC_POP();
-        if (il == jl_bottom_type)
+        if (il == NULL || il == jl_bottom_type)
             return 0;
     }
     return 1;
 }
 
-static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
 static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t offset, jl_stenv_t *e, int param)
 {
     // Vararg: covariant in first parameter, invariant in second
@@ -2925,31 +3080,37 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
     if (intersect(xp1, yp1, e, param==0 ? 1 : param) == jl_bottom_type)
         return jl_bottom_type;
     jl_value_t *i2=NULL, *ii = intersect(xp1, yp1, e, 1);
-    if (ii == jl_bottom_type) return jl_bottom_type;
-    JL_GC_PUSH2(&ii, &i2);
+    if (ii == jl_bottom_type)
+        return jl_bottom_type;
     if (!xp2 && !yp2) {
-        ii = (jl_value_t*)jl_wrap_vararg(ii, NULL);
-        JL_GC_POP();
+        if (obviously_egal(xp1, ii))
+            ii = (jl_value_t*)vmx;
+        else if (obviously_egal(yp1, ii))
+            ii = (jl_value_t*)vmy;
+        else {
+            JL_GC_PUSH1(&ii);
+            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL);
+            JL_GC_POP();
+        }
         return ii;
     }
+    JL_GC_PUSH2(&ii, &i2);
+    assert(e->Loffset == 0);
+    e->Loffset = offset;
     jl_varbinding_t *xb = NULL, *yb = NULL;
-    if (xp2 && jl_is_typevar(xp2)) {
+    if (xp2) {
+        assert(jl_is_typevar(xp2));
         xb = lookup(e, (jl_tvar_t*)xp2);
-        if (xb) {
-            if (xb->intvalued == 0) xb->intvalued = 1;
-            xb->offset = offset;
-        }
+        if (xb) xb->intvalued = 1;
         if (!yp2)
-            i2 = bound_var_below((jl_tvar_t*)xp2, xb, e);
+            i2 = bound_var_below((jl_tvar_t*)xp2, xb, e, 0);
     }
-    if (yp2 && jl_is_typevar(yp2)) {
+    if (yp2) {
+        assert(jl_is_typevar(yp2));
         yb = lookup(e, (jl_tvar_t*)yp2);
-        if (yb) {
-            if (yb->intvalued == 0) yb->intvalued = 1;
-            yb->offset = -offset;
-        }
+        if (yb) yb->intvalued = 1;
         if (!xp2)
-            i2 = bound_var_below((jl_tvar_t*)yp2, yb, e);
+            i2 = bound_var_below((jl_tvar_t*)yp2, yb, e, 1);
     }
     if (xp2 && yp2) {
         // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
@@ -2960,9 +3121,16 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
             i2 = jl_bottom_type;
         }
     }
-    if (xb) xb->offset = 0;
-    if (yb) yb->offset = 0;
-    ii = i2 == jl_bottom_type ? (jl_value_t*)jl_bottom_type : (jl_value_t*)jl_wrap_vararg(ii, i2);
+    assert(e->Loffset == offset);
+    e->Loffset = 0;
+    if (i2 == jl_bottom_type)
+        ii = (jl_value_t*)jl_bottom_type;
+    else if (xp2 && obviously_egal(xp1, ii) && obviously_egal(xp2, i2))
+        ii = (jl_value_t*)vmx;
+    else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
+        ii = (jl_value_t*)vmy;
+    else
+        ii = (jl_value_t*)jl_wrap_vararg(ii, i2);
     JL_GC_POP();
     return ii;
 }
@@ -2971,70 +3139,99 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t
 static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int param)
 {
     size_t lx = jl_nparams(xd), ly = jl_nparams(yd);
+    size_t llx = lx, lly = ly;
     if (lx == 0 && ly == 0)
         return (jl_value_t*)yd;
-    int vx=0, vy=0, vvx = (lx > 0 && jl_is_vararg(jl_tparam(xd, lx-1)));
-    int vvy = (ly > 0 && jl_is_vararg(jl_tparam(yd, ly-1)));
-    if (!vvx && !vvy && lx != ly)
-        return jl_bottom_type;
-    jl_svec_t *params = jl_alloc_svec(lx > ly ? lx : ly);
-    jl_value_t *res=NULL;
-    JL_GC_PUSH1(&params);
+    int vx=0, vy=0;
+    jl_vararg_kind_t vvx = lx > 0 ? jl_vararg_kind(jl_tparam(xd, lx-1)) : JL_VARARG_NONE;
+    jl_vararg_kind_t vvy = ly > 0 ? jl_vararg_kind(jl_tparam(yd, ly-1)) : JL_VARARG_NONE;
+    if (vvx == JL_VARARG_INT)
+        llx += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(xd, lx-1))) - 1;
+    if (vvy == JL_VARARG_INT)
+        lly += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(yd, ly-1))) - 1;
+
+    if ((vvx == JL_VARARG_NONE || vvx == JL_VARARG_INT) &&
+        (vvy == JL_VARARG_NONE || vvy == JL_VARARG_INT)) {
+        if (llx != lly)
+            return jl_bottom_type;
+    }
+
+    size_t np = llx > lly ? llx : lly;
+    jl_value_t *res = NULL;
+    jl_svec_t *p = NULL;
+    jl_value_t **params;
+    jl_value_t **roots;
+    JL_GC_PUSHARGS(roots, np < 64 ? np : 1);
+    if (np < 64) {
+        params = roots;
+    }
+    else {
+        p = jl_alloc_svec(np);
+        roots[0] = (jl_value_t*)p;
+        params = jl_svec_data(p);
+    }
     size_t i=0, j=0;
     jl_value_t *xi, *yi;
+    int isx = 1, isy = 1; // try to reuse the object x or y as res whenever we can (e.g. when it is the supertype) instead of allocating a copy
     while (1) {
         vx = vy = 0;
-        xi = i < lx ? jl_tparam(xd, i) : NULL;
-        yi = j < ly ? jl_tparam(yd, j) : NULL;
+        xi = i < llx ? jl_tparam(xd, i < lx ? i : lx - 1) : NULL;
+        yi = j < lly ? jl_tparam(yd, j < ly ? j : ly - 1) : NULL;
         if (xi == NULL && yi == NULL) {
-            assert(i == j && i == jl_svec_len(params));
+            assert(i == j && i == np);
             break;
         }
-        if (xi && jl_is_vararg(xi)) vx = 1;
-        if (yi && jl_is_vararg(yi)) vy = 1;
+        if (xi && jl_is_vararg(xi)) vx = vvx != JL_VARARG_INT;
+        if (yi && jl_is_vararg(yi)) vy = vvy != JL_VARARG_INT;
         if (xi == NULL || yi == NULL) {
-            res = jl_bottom_type;
-            if (vx && intersect_vararg_length(xi, ly+1-lx, e, 0))
-                res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), j);
-            if (vy && intersect_vararg_length(yi, lx+1-ly, e, 1))
-                res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), i);
+            if (vx && intersect_vararg_length(xi, lly+1-llx, e, 0)) {
+                np = j;
+                p = NULL;
+            }
+            else if (vy && intersect_vararg_length(yi, llx+1-lly, e, 1)) {
+                np = i;
+                p = NULL;
+            }
+            else {
+                res = jl_bottom_type;
+            }
             break;
         }
         jl_value_t *ii = NULL;
-        if (vx && vy)
+        if (vx && vy) {
             ii = intersect_varargs((jl_vararg_t*)xi,
                                    (jl_vararg_t*)yi,
-                                   ly - lx, // xi's offset: {A^n...,Vararg{T,N}} ∩ {Vararg{S,M}}
+                                   lly - llx, // xi's offset: {A^n...,Vararg{T,N}} ∩ {Vararg{S,M}}
                                             // {(A∩S)^n...,Vararg{T∩S,N}} plus N = M-n
-                                   e, param);
+                                   e,
+                                   param);
+        }
         else {
-            if (vx)
-                xi = jl_unwrap_vararg(xi);
-            if (vy)
-                yi = jl_unwrap_vararg(yi);
-            ii = intersect(xi, yi, e, param == 0 ? 1 : param);
+            ii = intersect(jl_is_vararg(xi) ? jl_unwrap_vararg(xi) : xi,
+                           jl_is_vararg(yi) ? jl_unwrap_vararg(yi) : yi,
+                           e,
+                           param == 0 ? 1 : param);
         }
         if (ii == jl_bottom_type) {
             if (vx && vy) {
                 jl_varbinding_t *xb=NULL, *yb=NULL;
                 jl_value_t *xlen = jl_unwrap_vararg_num(xi);
-                if (xlen && jl_is_typevar(xlen))
-                    xb = lookup(e, (jl_tvar_t*)xlen);
+                assert(xlen == NULL || jl_is_typevar(xlen));
+                if (xlen) xb = lookup(e, (jl_tvar_t*)xlen);
                 jl_value_t *ylen = jl_unwrap_vararg_num(yi);
-                if (ylen && jl_is_typevar(ylen))
-                    yb = lookup(e, (jl_tvar_t*)ylen);
+                assert(ylen == NULL || jl_is_typevar(ylen));
+                if (ylen) yb = lookup(e, (jl_tvar_t*)ylen);
                 int len = i > j ? i : j;
-                if ((xb && jl_is_long(xb->lb) && lx-1+jl_unbox_long(xb->lb) != len) ||
-                    (yb && jl_is_long(yb->lb) && ly-1+jl_unbox_long(yb->lb) != len)) {
-                    res = jl_bottom_type;
-                }
-                else if (param == 2 && jl_is_unionall(xi) != jl_is_unionall(yi)) {
+                if ((xb && jl_is_long(xb->lb) && llx-1+jl_unbox_long(xb->lb) != len) ||
+                    (yb && jl_is_long(yb->lb) && lly-1+jl_unbox_long(yb->lb) != len)) {
                     res = jl_bottom_type;
                 }
                 else {
-                    if (xb) set_var_to_const(xb, jl_box_long(len-lx+1), yb);
-                    if (yb) set_var_to_const(yb, jl_box_long(len-ly+1), xb);
-                    res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), len);
+                    assert(e->Loffset == 0);
+                    if (xb) set_var_to_const(xb, jl_box_long(len-llx+1), e, 0);
+                    if (yb) set_var_to_const(yb, jl_box_long(len-lly+1), e, 1);
+                    np = len;
+                    p = NULL;
                 }
             }
             else {
@@ -3042,15 +3239,44 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
             }
             break;
         }
-        jl_svecset(params, (i > j ? i : j), ii);
+        isx = isx && ii == xi;
+        isy = isy && ii == yi;
+        if (p)
+            jl_svecset(p, (i > j ? i : j), ii);
+        else
+            params[i > j ? i : j] = ii;
         if (vx && vy)
             break;
-        if (i < lx-1 || !vx) i++;
-        if (j < ly-1 || !vy) j++;
+        if (!vx) i++;
+        if (!vy) j++;
     }
     // TODO: handle Vararg with explicit integer length parameter
-    if (res == NULL)
-        res = (jl_value_t*)jl_apply_tuple_type(params);
+    if (res == NULL) {
+        assert(!p || np == jl_svec_len(p));
+        isx = isx && lx == np;
+        isy = isy && ly == np;
+        if (!isx && !isy) {
+            // do a more careful check now for equivalence
+            if (lx == np) {
+                isx = 1;
+                for (i = 0; i < np; i++)
+                    isx = isx && obviously_egal(params[i], jl_tparam(xd, i));
+            }
+            if (!isx && ly == np) {
+                isy = 1;
+                for (i = 0; i < np; i++)
+                    isy = isy && obviously_egal(params[i], jl_tparam(yd, i));
+            }
+        }
+        if (isx)
+            res = (jl_value_t*)xd;
+        else if (isy)
+            res = (jl_value_t*)yd;
+        else if (p)
+            res = jl_apply_tuple_type(p);
+        else
+            res = jl_apply_tuple_type_v(params, np);
+    }
     JL_GC_POP();
     return res;
 }
@@ -3070,6 +3296,7 @@ static jl_value_t *intersect_sub_datatype(jl_datatype_t *xd, jl_datatype_t *yd,
     // attempt to populate additional constraints into `e`
     // if that attempt fails, then return bottom
     // otherwise return xd (finish_unionall will later handle propagating those constraints)
+    assert(e->Loffset == 0);
     jl_value_t *isuper = R ? intersect((jl_value_t*)yd, (jl_value_t*)xd->super, e, param) :
                              intersect((jl_value_t*)xd->super, (jl_value_t*)yd, e, param);
     if (isuper == jl_bottom_type)
@@ -3079,44 +3306,38 @@ static jl_value_t *intersect_sub_datatype(jl_datatype_t *xd, jl_datatype_t *yd,
 
 static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
-    if (!jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
+    if (e->Loffset == 0 && !jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
         return (jl_subtype(x,y) && jl_subtype(y,x)) ? y : NULL;
     }
     e->invdepth++;
-    e->Rinvdepth++;
     jl_value_t *ii = intersect(x, y, e, 2);
     e->invdepth--;
-    e->Rinvdepth--;
-    // Skip the following subtype check if `ii` was returned from `set_vat_to_const`.
-    // As `var_gt`/`var_lt` might not handle `Vararg` length offset correctly.
-    // TODO: fix this on subtype side and remove this branch.
-    if (jl_is_long(ii) && ((jl_is_typevar(x) && jl_is_long(y)) || (jl_is_typevar(y) && jl_is_long(x))))
-        return ii;
-    if (jl_is_typevar(x) && jl_is_typevar(y) && (jl_is_typevar(ii) || !jl_is_type(ii)))
-        return ii;
+    if (jl_is_typevar(x) && jl_is_typevar(y) && jl_is_typevar(ii))
+        return ii; // skip the following check due to possible circular constraints.
     if (ii == jl_bottom_type) {
         if (!subtype_in_env(x, jl_bottom_type, e))
             return NULL;
-        flip_vars(e);
+        flip_vars(e); flip_offset(e);
         if (!subtype_in_env(y, jl_bottom_type, e)) {
-            flip_vars(e);
+            flip_vars(e); flip_offset(e);
             return NULL;
         }
-        flip_vars(e);
+        flip_vars(e); flip_offset(e);
         return jl_bottom_type;
     }
-    jl_value_t *root=NULL;
     jl_savedenv_t se;
-    JL_GC_PUSH2(&ii, &root);
-    save_env(e, &root, &se);
-    if (!subtype_in_env_existential(x, y, e, 0, e->invdepth))
+    JL_GC_PUSH1(&ii);
+    save_env(e, &se, 1);
+    if (!subtype_in_env_existential(x, y, e))
         ii = NULL;
     else {
-        restore_env(e, root, &se);
-        if (!subtype_in_env_existential(y, x, e, 0, e->invdepth))
+        restore_env(e, &se, 1);
+        flip_offset(e);
+        if (!subtype_in_env_existential(y, x, e))
             ii = NULL;
+        flip_offset(e);
     }
-    restore_env(e, root, &se);
+    restore_env(e, &se, 1);
     free_env(&se);
     JL_GC_POP();
     return ii;
@@ -3125,6 +3346,7 @@ static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t
 // intersection where x == Type{...} and y is not
 static jl_value_t *intersect_type_type(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int8_t R)
 {
+    assert(e->Loffset == 0);
     jl_value_t *p0 = jl_tparam0(x);
     if (!jl_is_typevar(p0))
         return (jl_typeof(p0) == y) ? x : jl_bottom_type;
@@ -3173,6 +3395,23 @@ static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOT
     return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1);
 }
 
+static int has_typevar_via_env(jl_value_t *x, jl_tvar_t *t, jl_stenv_t *e)
+{
+    if (e->Loffset == 0) {
+        jl_varbinding_t *temp = e->vars;
+        while (temp != NULL) {
+            if (temp->var == t)
+                break;
+            if (temp->lb == temp->ub &&
+                temp->lb == (jl_value_t *)t &&
+                jl_has_typevar(x, temp->var))
+                return 1;
+            temp = temp->prev;
+        }
+    }
+    return jl_has_typevar(x, t);
+}
+
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -3199,54 +3438,82 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 jl_value_t *xub = xx ? xx->ub : ((jl_tvar_t*)x)->ub;
                 jl_value_t *ylb = yy ? yy->lb : ((jl_tvar_t*)y)->lb;
                 jl_value_t *yub = yy ? yy->ub : ((jl_tvar_t*)y)->ub;
-                record_var_occurrence(xx, e, param);
                 if (xx && yy && xx->depth0 != yy->depth0) {
+                    record_var_occurrence(xx, e, param);
                     record_var_occurrence(yy, e, param);
                     return subtype_in_env(yy->ub, yy->lb, e) ? y : jl_bottom_type;
                 }
                 if (xub == xlb && jl_is_typevar(xub)) {
+                    record_var_occurrence(xx, e, param);
                     if (y == xub) {
                         record_var_occurrence(yy, e, param);
                         return y;
                     }
-                    if (!xx || xx->offset == 0)
-                        return intersect(y, xub, e, param);
-                    // try to propagate the x's offset to xub.
-                    jl_varbinding_t *tvb = lookup(e, (jl_tvar_t*)xub);
-                    assert(tvb && tvb->offset == 0);
-                    tvb->offset = xx->offset;
-                    jl_value_t *res = intersect(y, xub, e, param);
-                    tvb->offset = 0;
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(xub, y, e, param);
+                    if (R) flip_offset(e);
+                    return res;
+                }
+                if (yub == ylb && jl_is_typevar(yub)) {
+                    record_var_occurrence(yy, e, param);
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(x, yub, e, param);
+                    if (R) flip_offset(e);
+                    return res;
+                }
+                if (xub == xlb && jl_is_typevar(xub)) {
+                    record_var_occurrence(xx, e, param);
+                    if (y == xub) {
+                        record_var_occurrence(yy, e, param);
+                        return y;
+                    }
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(xub, y, e, param);
+                    if (R) flip_offset(e);
                     return res;
                 }
-                record_var_occurrence(yy, e, param);
                 if (yub == ylb && jl_is_typevar(yub)) {
-                    // We always set inner var equal to outer.
-                    if (!yy || yy->offset == 0)
-                        return intersect(x, yub, e, param);
-                    // try to propagate the y's offset to yub.
-                    jl_varbinding_t *tvb = lookup(e, (jl_tvar_t*)yub);
-                    assert(tvb && tvb->offset == 0);
-                    tvb->offset = yy->offset;
+                    record_var_occurrence(yy, e, param);
+                    if (R) flip_offset(e);
                     jl_value_t *res = intersect(x, yub, e, param);
-                    tvb->offset = 0;
+                    if (R) flip_offset(e);
                     return res;
                 }
+                record_var_occurrence(xx, e, param);
+                record_var_occurrence(yy, e, param);
+                int xoffset = R ? -e->Loffset : e->Loffset;
                 if (!jl_is_type(ylb) && !jl_is_typevar(ylb)) {
                     if (xx)
-                        return set_var_to_const(xx, ylb, yy);
-                    if ((xlb == jl_bottom_type && xub == (jl_value_t*)jl_any_type) || jl_egal(xlb, ylb))
-                        return ylb;
+                        return set_var_to_const(xx, ylb, e, R);
+                    if ((xlb == jl_bottom_type && xub == (jl_value_t*)jl_any_type) || jl_egal(xlb, ylb)) {
+                        if (xoffset == 0)
+                            return ylb;
+                        else if (jl_is_long(ylb)) {
+                            if (xoffset > 0)
+                                return ylb;
+                            else
+                                return jl_box_long(jl_unbox_long(ylb) + xoffset);
+                        }
+                    }
                     return jl_bottom_type;
                 }
                 if (!jl_is_type(xlb) && !jl_is_typevar(xlb)) {
                     if (yy)
-                        return set_var_to_const(yy, xlb, xx);
-                    if (ylb == jl_bottom_type && yub == (jl_value_t*)jl_any_type)
-                        return xlb;
+                        return set_var_to_const(yy, xlb, e, !R);
+                    if (ylb == jl_bottom_type && yub == (jl_value_t*)jl_any_type) {
+                        if (xoffset == 0)
+                            return xlb;
+                        else if (jl_is_long(xlb)) {
+                            if (xoffset < 0)
+                                return xlb;
+                            else
+                                return jl_box_long(jl_unbox_long(ylb) - xoffset);
+                        }
+                    }
                     return jl_bottom_type;
                 }
                 int ccheck;
+                if (R) flip_offset(e);
                 if (xlb == xub && ylb == yub &&
                     jl_has_typevar(xlb, (jl_tvar_t *)y) &&
                     jl_has_typevar(ylb, (jl_tvar_t *)x)) {
@@ -3261,13 +3528,19 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 }
                 else {
                     if (R) flip_vars(e);
-                    ccheck = subtype_in_env(xlb, yub, e) && subtype_in_env(ylb, xub, e);
+                    ccheck = subtype_in_env(xlb, yub, e);
+                    if (ccheck) {
+                        flip_offset(e);
+                        ccheck = subtype_in_env(ylb, xub, e);
+                        flip_offset(e);
+                    }
                     if (R) flip_vars(e);
                 }
+                if (R) flip_offset(e);
                 if (!ccheck)
                     return jl_bottom_type;
-                if ((jl_has_typevar(xub, (jl_tvar_t*)y) || jl_has_typevar(xub, (jl_tvar_t*)x)) &&
-                    (jl_has_typevar(yub, (jl_tvar_t*)x) || jl_has_typevar(yub, (jl_tvar_t*)y))) {
+                if ((has_typevar_via_env(xub, (jl_tvar_t*)y, e) || has_typevar_via_env(xub, (jl_tvar_t*)x, e)) &&
+                    (has_typevar_via_env(yub, (jl_tvar_t*)x, e) || has_typevar_via_env(yub, (jl_tvar_t*)y, e))) {
                     // TODO: This doesn't make much sense.
                     // circular constraint. the result will be Bottom, but in the meantime
                     // we need to avoid computing intersect(xub, yub) since it won't terminate.
@@ -3275,31 +3548,29 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 }
                 jl_value_t *ub=NULL, *lb=NULL;
                 JL_GC_PUSH2(&lb, &ub);
-                ub = intersect_aside(xub, yub, e, 0, xx ? xx->depth0 : 0);
+                int d = xx ? xx->depth0 : yy ? yy->depth0 : 0;
+                ub = R ? intersect_aside(yub, xub, e, d) : intersect_aside(xub, yub, e, d);
                 if (reachable_var(xlb, (jl_tvar_t*)y, e))
                     lb = ylb;
                 else
                     lb = simple_join(xlb, ylb);
-                if (yy && yy->offset == 0) {
+                if (yy && xoffset == 0) {
                     yy->lb = lb;
                     if (!reachable_var(ub, (jl_tvar_t*)y, e))
                         yy->ub = ub;
                     assert(yy->ub != y);
                     assert(yy->lb != y);
                 }
-                if (xx && xx->offset == 0 && !reachable_var(y, (jl_tvar_t*)x, e)) {
+                if (xx && xoffset == 0 && !reachable_var(y, (jl_tvar_t*)x, e)) {
                     xx->lb = y;
                     xx->ub = y;
                     assert(xx->ub != x);
                 }
                 JL_GC_POP();
                 // Here we always return the shorter `Vararg`'s length.
-                if ((xx && xx->offset < 0) || (yy && yy->offset > 0)) {
-                    if (yy) yy->intvalued = 2;
-                    return x;
-                }
-                return y;
+                return xoffset < 0 ? x : y;
             }
+            assert(e->Loffset == 0);
             record_var_occurrence(xx, e, param);
             record_var_occurrence(yy, e, param);
             if (xx && yy && xx->concrete && !yy->concrete) {
@@ -3314,7 +3585,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
         record_var_occurrence(lookup(e, (jl_tvar_t*)y), e, param);
         return intersect_var((jl_tvar_t*)y, x, e, 1, param);
     }
-    if (!jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
+    if (e->Loffset == 0 && !jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
         if (jl_subtype(x, y)) return x;
         if (jl_subtype(y, x)) return y;
     }
@@ -3335,9 +3606,9 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     if (jl_is_unionall(x)) {
         if (jl_is_unionall(y)) {
             jl_value_t *a=NULL, *b=jl_bottom_type, *res=NULL;
-            JL_GC_PUSH2(&a,&b);
+            JL_GC_PUSH2(&a, &b);
             jl_savedenv_t se;
-            save_env(e, NULL, &se);
+            save_env(e, &se, 0);
             a = intersect_unionall(y, (jl_unionall_t*)x, e, 0, param);
             if (jl_is_unionall(a)) {
                 jl_unionall_t *ua = (jl_unionall_t*)a;
@@ -3345,7 +3616,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                     jl_unionall_t *ub = (jl_unionall_t*)ua->body;
                     if (jl_has_typevar(ub->var->ub, ua->var) ||
                         jl_has_typevar(ub->var->lb, ua->var)) {
-                        restore_env(e, NULL, &se); // restore counts
+                        restore_env(e, &se, 0); // restore counts
                         b = intersect_unionall(x, (jl_unionall_t*)y, e, 1, param);
                     }
                 }
@@ -3382,20 +3653,40 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
             size_t i, np = jl_nparams(xd);
             jl_value_t **newparams;
             JL_GC_PUSHARGS(newparams, np);
-            for (i=0; i < np; i++) {
+            int isx = 1, isy = 1; // try to reuse the object x or y as res whenever we can (e.g. when it is the supertype) instead of allocating a copy
+            for (i = 0; i < np; i++) {
                 jl_value_t *xi = jl_tparam(xd, i), *yi = jl_tparam(yd, i);
                 jl_value_t *ii = intersect_invariant(xi, yi, e);
                 if (ii == NULL)
                     break;
+                isx = isx && ii == xi;
+                isy = isy && ii == yi;
                 newparams[i] = ii;
             }
             jl_value_t *res = jl_bottom_type;
-            if (i >= np) {
-                JL_TRY {
-                    res = jl_apply_type(xd->name->wrapper, newparams, np);
+            if (i == np) {
+                if (!isx && !isy) {
+                    // do a more careful check now for equivalence
+                    isx = 1;
+                    for (i = 0; i < np; i++)
+                        isx = isx && obviously_egal(newparams[i], jl_tparam(xd, i));
+                    if (!isx) {
+                        isy = 1;
+                        for (i = 0; i < np; i++)
+                            isy = isy && obviously_egal(newparams[i], jl_tparam(yd, i));
+                    }
                 }
-                JL_CATCH {
-                    res = jl_bottom_type;
+                if (isx)
+                    res = x;
+                else if (isy)
+                    res = y;
+                else {
+                    JL_TRY {
+                        res = jl_apply_type(xd->name->wrapper, newparams, np);
+                    }
+                    JL_CATCH {
+                        res = jl_bottom_type;
+                    }
                 }
             }
             JL_GC_POP();
@@ -3418,35 +3709,54 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     return jl_bottom_type;
 }
 
-static int merge_env(jl_stenv_t *e, jl_value_t **root, jl_savedenv_t *se, int count)
+static int merge_env(jl_stenv_t *e, jl_savedenv_t *se, int count)
 {
-    if (count == 0) {
-        int len = current_env_length(e);
-        *root = (jl_value_t*)jl_alloc_svec(len * 3);
-        se->buf = (int8_t*)(len > 8 ? malloc_s(len * 3) : &se->_space);
-        memset(se->buf, 0, len * 3);
+    if (count == 0)
+        alloc_env(e, se, 1);
+    jl_value_t **roots = NULL;
+    int nroots = 0;
+    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+        assert(jl_is_svec(sv));
+        roots = jl_svec_data(sv);
+        nroots = jl_svec_len(sv);
+    }
+    else {
+        roots = se->roots;
+        nroots = se->gcframe.nroots >> 2;
     }
     int n = 0;
     jl_varbinding_t *v = e->vars;
-    jl_value_t *b1 = NULL, *b2 = NULL;
-    JL_GC_PUSH2(&b1, &b2); // clang-sagc does not understand that *root is rooted already
     v = e->vars;
     while (v != NULL) {
+        if (count == 0) {
+            // need to initialize this
+            se->buf[n] = 0;
+            se->buf[n+1] = 0;
+            se->buf[n+2] = 0;
+        }
         if (v->occurs) {
             // only merge lb/ub/innervars if this var occurs.
-            b1 = jl_svecref(*root, n);
+            jl_value_t *b1, *b2;
+            b1 = roots[n];
+            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
             b2 = v->lb;
-            jl_svecset(*root, n, b1 ? simple_meet(b1, b2, 0) : b2);
-            b1 = jl_svecref(*root, n+1);
+            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+            roots[n] = b1 ? simple_meet(b1, b2, 0) : b2;
+            b1 = roots[n+1];
+            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
             b2 = v->ub;
-            jl_svecset(*root, n+1, b1 ? simple_join(b1, b2) : b2);
-            b1 = jl_svecref(*root, n+2);
+            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+            roots[n+1] = b1 ? simple_join(b1, b2) : b2;
+            b1 = roots[n+2];
+            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
             b2 = (jl_value_t*)v->innervars;
+            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
             if (b2 && b1 != b2) {
                 if (b1)
                     jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
                 else
-                    jl_svecset(*root, n+2, b2);
+                    roots[n+2] = b2;
             }
             // record the meeted vars.
             se->buf[n] = 1;
@@ -3459,33 +3769,52 @@ static int merge_env(jl_stenv_t *e, jl_value_t **root, jl_savedenv_t *se, int co
         n = n + 3;
         v = v->prev;
     }
-    JL_GC_POP();
+    assert(n == nroots); (void)nroots;
     return count + 1;
 }
 
 // merge untouched vars' info.
-static void final_merge_env(jl_value_t **merged, jl_savedenv_t *me, jl_value_t **saved, jl_savedenv_t *se)
+static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
 {
-    int l = jl_svec_len(*merged);
-    assert(l == jl_svec_len(*saved) && l%3 == 0);
-    jl_value_t *b1 = NULL, *b2 = NULL;
-    JL_GC_PUSH2(&b1, &b2);
-    for (int n = 0; n < l; n = n + 3) {
-        if (jl_svecref(*merged, n) == NULL)
-            jl_svecset(*merged, n, jl_svecref(*saved, n));
-        if (jl_svecref(*merged, n+1) == NULL)
-            jl_svecset(*merged, n+1, jl_svecref(*saved, n+1));
-        b1 = jl_svecref(*merged, n+2);
-        b2 = jl_svecref(*saved , n+2);
+    jl_value_t **merged = NULL;
+    jl_value_t **saved = NULL;
+    int nroots = 0;
+    assert(se->gcframe.nroots == me->gcframe.nroots);
+    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+        assert(jl_is_svec(sv));
+        saved = jl_svec_data(sv);
+        nroots = jl_svec_len(sv);
+        sv = (jl_svec_t*)me->roots[0];
+        assert(jl_is_svec(sv));
+        merged = jl_svec_data(sv);
+        assert(nroots == jl_svec_len(sv));
+    }
+    else {
+        saved = se->roots;
+        merged = me->roots;
+        nroots = se->gcframe.nroots >> 2;
+    }
+    assert(nroots == current_env_length(e) * 3);
+    assert(nroots % 3 == 0);
+    for (int n = 0; n < nroots; n = n + 3) {
+        if (merged[n] == NULL)
+            merged[n] = saved[n];
+        if (merged[n+1] == NULL)
+            merged[n+1] = saved[n+1];
+        jl_value_t *b1, *b2;
+        b1 = merged[n+2];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = saved[n+2];
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know this came from our GC frame
         if (b2 && b1 != b2) {
             if (b1)
                 jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
             else
-                jl_svecset(*merged, n+2, b2);
+                merged[n+2] = b2;
         }
         me->buf[n] |= se->buf[n];
     }
-    JL_GC_POP();
 }
 
 static void expand_local_env(jl_stenv_t *e, jl_value_t *res)
@@ -3517,19 +3846,17 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     e->Runions.more = 0;
     e->Runions.used = 0;
     jl_value_t **is;
-    JL_GC_PUSHARGS(is, 4);
-    jl_value_t **saved = &is[2];
-    jl_value_t **merged = &is[3];
+    JL_GC_PUSHARGS(is, 2);
     jl_savedenv_t se, me;
-    save_env(e, saved, &se);
+    save_env(e, &se, 1);
     int niter = 0, total_iter = 0;
     clean_occurs(e);
     is[0] = intersect(x, y, e, 0); // root
     if (is[0] != jl_bottom_type) {
         expand_local_env(e, is[0]);
-        niter = merge_env(e, merged, &me, niter);
+        niter = merge_env(e, &me, niter);
     }
-    restore_env(e, *saved, &se);
+    restore_env(e, &se, 1);
     while (next_union_state(e, 1)) {
         if (e->emptiness_only && is[0] != jl_bottom_type)
             break;
@@ -3540,9 +3867,9 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         is[1] = intersect(x, y, e, 0);
         if (is[1] != jl_bottom_type) {
             expand_local_env(e, is[1]);
-            niter = merge_env(e, merged, &me, niter);
+            niter = merge_env(e, &me, niter);
         }
-        restore_env(e, *saved, &se);
+        restore_env(e, &se, 1);
         if (is[0] == jl_bottom_type)
             is[0] = is[1];
         else if (is[1] != jl_bottom_type) {
@@ -3556,8 +3883,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
     if (niter) {
-        final_merge_env(merged, &me, saved, &se);
-        restore_env(e, *merged, &me);
+        final_merge_env(e, &me, &se);
+        restore_env(e, &me, 1);
         free_env(&me);
     }
     free_env(&se);
@@ -3683,13 +4010,14 @@ static jl_value_t *switch_union_tuple(jl_value_t *a, jl_value_t *b)
         ts[1] = jl_tparam(b, i);
         jl_svecset(vec, i, jl_type_union(ts, 2));
     }
-    jl_value_t *ans = (jl_value_t*)jl_apply_tuple_type(vec);
+    jl_value_t *ans = jl_apply_tuple_type(vec);
     JL_GC_POP();
     return ans;
 }
 
 // `a` might have a non-empty intersection with some concrete type b even if !(a<:b) and !(b<:a)
 // For example a=`Tuple{Type{<:Vector}}` and b=`Tuple{DataType}`
+// TODO: this query is partly available memoized as jl_type_equality_is_identity
 static int might_intersect_concrete(jl_value_t *a)
 {
     if (jl_is_unionall(a))
@@ -3739,9 +4067,9 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t *
         *ans = a; sz = szb;
         if (issubty) *issubty = 1;
     }
-    else if (lta && ltb) {
-        goto bot;
-    }
+    // else if (lta && ltb) { // !jl_type_equality_is_identity known in this case because obviously_disjoint returned false
+    //     goto bot;
+    // }
     else if (jl_subtype(b, a)) {
         *ans = b;
     }
@@ -3859,39 +4187,139 @@ int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv)
 
 // specificity comparison
 
-static int eq_msp(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env)
+static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, jl_typeenv_t *env)
 {
     if (!(jl_is_type(a) || jl_is_typevar(a)) ||
         !(jl_is_type(b) || jl_is_typevar(b)))
         return jl_egal(a, b);
+    if (a == b) // assume the TypeVar env is the same??
+        return 1;
+    if (jl_typeof(a) == jl_typeof(b) && jl_types_egal(a, b))
+        return 1;
+    if (obviously_unequal(a, b))
+        return 0;
+    // the following is an interleaved version of:
+    //   return jl_type_equal(a, b)
+    // where we try to do the fast checks before the expensive ones
+    if (jl_is_datatype(a) && !jl_is_concrete_type(b)) {
+        // if one type looks simpler, check it on the right
+        // first in order to reject more quickly.
+        jl_value_t *temp = a;
+        a = b;
+        b = temp;
+    }
+    // first check if a <: b has an obvious answer
+    int subtype_ab = 2;
+    if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
+        subtype_ab = 1;
+    }
+    else if (obvious_subtype(a, b, b0, &subtype_ab)) {
+#ifdef NDEBUG
+        if (subtype_ab == 0)
+            return 0;
+#endif
+    }
+    else {
+        subtype_ab = 3;
+    }
+    // next check if b <: a has an obvious answer
+    int subtype_ba = 2;
+    if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
+        subtype_ba = 1;
+    }
+    else if (obvious_subtype(b, a, a0, &subtype_ba)) {
+#ifdef NDEBUG
+        if (subtype_ba == 0)
+            return 0;
+#endif
+    }
+    else {
+        subtype_ba = 3;
+    }
+    // finally, do full subtyping for any inconclusive test
     JL_GC_PUSH2(&a, &b);
-    jl_typeenv_t *e = env;
-    while (e != NULL) {
-        a = jl_type_unionall(e->var, a);
-        b = jl_type_unionall(e->var, b);
-        e = e->prev;
+    jl_typeenv_t *env2 = env;
+    while (env2 != NULL) {
+        a = jl_type_unionall(env2->var, a);
+        b = jl_type_unionall(env2->var, b);
+        env2 = env2->prev;
+    }
+    jl_stenv_t e;
+#ifdef NDEBUG
+    if (subtype_ab != 1)
+#endif
+    {
+        init_stenv(&e, NULL, 0);
+        int subtype = forall_exists_subtype(a, b, &e, 0);
+        assert(subtype_ab == 3 || subtype_ab == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
+#ifndef NDEBUG
+        if (subtype_ab != 0 && subtype_ab != 1) // ensures that running in a debugger doesn't change the result
+#endif
+        subtype_ab = subtype;
+#ifdef NDEBUG
+        if (subtype_ab == 0) {
+            JL_GC_POP();
+            return 0;
+        }
+#endif
+    }
+#ifdef NDEBUG
+    if (subtype_ba != 1)
+#endif
+    {
+        init_stenv(&e, NULL, 0);
+        int subtype = forall_exists_subtype(b, a, &e, 0);
+        assert(subtype_ba == 3 || subtype_ba == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
+#ifndef NDEBUG
+        if (subtype_ba != 0 && subtype_ba != 1) // ensures that running in a debugger doesn't change the result
+#endif
+        subtype_ba = subtype;
     }
-    int eq = jl_types_equal(a, b);
     JL_GC_POP();
-    return eq;
+    // all tests successful
+    return subtype_ab && subtype_ba;
 }
 
-static int sub_msp(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env)
+static int sub_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, jl_typeenv_t *env)
 {
-    JL_GC_PUSH2(&a, &b);
+    jl_stenv_t e;
+    if (y == (jl_value_t*)jl_any_type || x == jl_bottom_type)
+        return 1;
+    if (x == y ||
+        (jl_typeof(x) == jl_typeof(y) &&
+         (jl_is_unionall(y) || jl_is_uniontype(y)) &&
+         jl_types_egal(x, y))) {
+        return 1;
+    }
+    int obvious_sub = 2;
+    if (obvious_subtype(x, y, y0, &obvious_sub)) {
+#ifdef NDEBUG
+        return obvious_sub;
+#endif
+    }
+    else {
+        obvious_sub = 3;
+    }
+    JL_GC_PUSH2(&x, &y);
     while (env != NULL) {
-        if (jl_is_type(a) || jl_is_typevar(a))
-            a = jl_type_unionall(env->var, a);
-        if (jl_is_type(b) || jl_is_typevar(b))
-            b = jl_type_unionall(env->var, b);
+        if (jl_is_type(x) || jl_is_typevar(x))
+            x = jl_type_unionall(env->var, x);
+        if (jl_is_type(y) || jl_is_typevar(y))
+            y = jl_type_unionall(env->var, y);
         env = env->prev;
     }
-    int sub = jl_subtype(a, b);
+    init_stenv(&e, NULL, 0);
+    int subtype = forall_exists_subtype(x, y, &e, 0);
+    assert(obvious_sub == 3 || obvious_sub == subtype || jl_has_free_typevars(x) || jl_has_free_typevars(y));
+#ifndef NDEBUG
+    if (obvious_sub == 0 || obvious_sub == 1)
+        subtype = obvious_sub; // this ensures that running in a debugger doesn't change the result
+#endif
     JL_GC_POP();
-    return sub;
+    return subtype;
 }
 
-static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_typeenv_t *env);
+static int type_morespecific_(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, int invariant, jl_typeenv_t *env);
 
 static int num_occurs(jl_tvar_t *v, jl_typeenv_t *env);
 
@@ -3914,7 +4342,7 @@ static jl_value_t *nth_tuple_elt(jl_datatype_t *t JL_PROPAGATES_ROOT, size_t i)
     return NULL;
 }
 
-static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invariant, jl_typeenv_t *env)
+static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, jl_value_t *c0, jl_value_t *p0, int invariant, jl_typeenv_t *env)
 {
     size_t plen = jl_nparams(pdt);
     if (plen == 0) return 0;
@@ -3944,8 +4372,8 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
             break;
         }
 
-        if (type_morespecific_(pe, ce, invariant, env)) {
-            assert(!type_morespecific_(ce, pe, invariant, env));
+        if (type_morespecific_(pe, ce, p0, c0, invariant, env)) {
+            assert(!type_morespecific_(ce, pe, c0, p0, invariant, env));
             return 0;
         }
 
@@ -3958,9 +4386,9 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
         if (cva && pva && i >= clen-1 && i >= plen-1 && (some_morespecific || (cdiag && !pdiag)))
             return 1;
 
-        int cms = type_morespecific_(ce, pe, invariant, env);
+        int cms = type_morespecific_(ce, pe, c0, p0, invariant, env);
 
-        if (!cms && !sub_msp(ce, pe, env)) {
+        if (!cms && !sub_msp(ce, pe, p0, env)) {
             /*
               A bound vararg tuple can be more specific despite disjoint elements in order to
               preserve transitivity. For example in
@@ -3973,7 +4401,7 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
         }
 
         // Tuple{..., T} not more specific than Tuple{..., Vararg{S}} if S is diagonal
-        if (!cms && i == clen-1 && clen == plen && !cva && pva && eq_msp(ce, pe, env) &&
+        if (!cms && i == clen-1 && clen == plen && !cva && pva && eq_msp(ce, pe, c0, p0, env) &&
             jl_is_typevar(ce) && jl_is_typevar(pe) && !cdiag && pdiag)
             return 0;
 
@@ -4002,7 +4430,7 @@ static size_t tuple_full_length(jl_value_t *t)
 
 // Called when a is a bound-vararg and b is not a vararg. Sets the vararg length
 // in a to match b, as long as this makes some earlier argument more specific.
-static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, int swap, jl_typeenv_t *env)
+static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, int swap, jl_typeenv_t *env)
 {
     size_t n = jl_nparams(a);
     int taillen = tuple_full_length(b)-n+1;
@@ -4022,12 +4450,12 @@ static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, int swap, jl_typ
     }
     int ret = -1;
     if (changed) {
-        if (eq_msp(b, (jl_value_t*)new_a, env))
+        if (eq_msp(b, (jl_value_t*)new_a, b0, a0, env))
             ret = swap;
         else if (swap)
-            ret = type_morespecific_(b, (jl_value_t*)new_a, 0, env);
+            ret = type_morespecific_(b, (jl_value_t*)new_a, b0, a0, 0, env);
         else
-            ret = type_morespecific_((jl_value_t*)new_a, b, 0, env);
+            ret = type_morespecific_((jl_value_t*)new_a, b, a0, b0, 0, env);
     }
     JL_GC_POP();
     return ret;
@@ -4072,38 +4500,61 @@ static int num_occurs(jl_tvar_t *v, jl_typeenv_t *env)
     return 0;
 }
 
+int tuple_cmp_typeofbottom(jl_datatype_t *a, jl_datatype_t *b)
+{
+    size_t i, la = jl_nparams(a), lb = jl_nparams(b);
+    for (i = 0; i < la || i < lb; i++) {
+        jl_value_t *pa = i < la ? jl_tparam(a, i) : NULL;
+        jl_value_t *pb = i < lb ? jl_tparam(b, i) : NULL;
+        assert(jl_typeofbottom_type); // for clang-sa
+        int xa = pa == (jl_value_t*)jl_typeofbottom_type || pa == (jl_value_t*)jl_typeofbottom_type->super;
+        int xb = pb == (jl_value_t*)jl_typeofbottom_type || pb == (jl_value_t*)jl_typeofbottom_type->super;
+        if (xa != xb)
+            return xa - xb;
+    }
+    return 0;
+}
+
+
 #define HANDLE_UNIONALL_A                                               \
     jl_unionall_t *ua = (jl_unionall_t*)a;                              \
     jl_typeenv_t newenv = { ua->var, 0x0, env };                        \
     newenv.val = (jl_value_t*)(intptr_t)count_occurs(ua->body, ua->var); \
-    return type_morespecific_(ua->body, b, invariant, &newenv)
+    return type_morespecific_(ua->body, b, a0, b0, invariant, &newenv)
 
 #define HANDLE_UNIONALL_B                                               \
     jl_unionall_t *ub = (jl_unionall_t*)b;                              \
     jl_typeenv_t newenv = { ub->var, 0x0, env };                        \
     newenv.val = (jl_value_t*)(intptr_t)count_occurs(ub->body, ub->var); \
-    return type_morespecific_(a, ub->body, invariant, &newenv)
+    return type_morespecific_(a, ub->body, a0, b0, invariant, &newenv)
 
-static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_typeenv_t *env)
+static int type_morespecific_(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, int invariant, jl_typeenv_t *env)
 {
     if (a == b)
         return 0;
 
     if (jl_is_tuple_type(a) && jl_is_tuple_type(b)) {
+        // compare whether a and b have Type{Union{}} included,
+        // which makes them instantly the most specific, regardless of all else,
+        // for whichever is left most (the left-to-right behavior here ensures
+        // we do not need to keep track of conflicts with multiple methods).
+        int msp = tuple_cmp_typeofbottom((jl_datatype_t*)a, (jl_datatype_t*)b);
+        if (msp)
+            return msp > 0;
         // When one is JL_VARARG_BOUND and the other has fixed length,
         // allow the argument length to fix the tvar
         jl_vararg_kind_t akind = jl_va_tuple_kind((jl_datatype_t*)a);
         jl_vararg_kind_t bkind = jl_va_tuple_kind((jl_datatype_t*)b);
         int ans = -1;
         if (akind == JL_VARARG_BOUND && bkind < JL_VARARG_BOUND) {
-            ans = args_morespecific_fix1(a, b, 0, env);
+            ans = args_morespecific_fix1(a, b, a0, b0, 0, env);
             if (ans == 1) return 1;
         }
         if (bkind == JL_VARARG_BOUND && akind < JL_VARARG_BOUND) {
-            ans = args_morespecific_fix1(b, a, 1, env);
+            ans = args_morespecific_fix1(b, a, b0, a0, 1, env);
             if (ans == 0) return 0;
         }
-        return tuple_morespecific((jl_datatype_t*)a, (jl_datatype_t*)b, invariant, env);
+        return tuple_morespecific((jl_datatype_t*)a, (jl_datatype_t*)b, a0, b0, invariant, env);
     }
 
     if (!invariant) {
@@ -4117,13 +4568,13 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
         }
         // Union a is more specific than b if some element of a is more specific than b, but
         // not vice-versa.
-        if (sub_msp(b, a, env))
+        if (sub_msp(b, a, a0, env))
             return 0;
         jl_uniontype_t *u = (jl_uniontype_t*)a;
-        if (type_morespecific_(u->a, b, invariant, env) || type_morespecific_(u->b, b, invariant, env)) {
+        if (type_morespecific_(u->a, b, a0, b0, invariant, env) || type_morespecific_(u->b, b, a0, b0, invariant, env)) {
             if (jl_is_uniontype(b)) {
                 jl_uniontype_t *v = (jl_uniontype_t*)b;
-                if (type_morespecific_(v->a, a, invariant, env) || type_morespecific_(v->b, a, invariant, env))
+                if (type_morespecific_(v->a, a, b0, a0, invariant, env) || type_morespecific_(v->b, a, b0, a0, invariant, env))
                     return 0;
             }
             return 1;
@@ -4137,11 +4588,11 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
         jl_value_t *tp0a = jl_tparam0(a);
         if (jl_is_typevar(tp0a)) {
             jl_value_t *ub = ((jl_tvar_t*)tp0a)->ub;
-            if (jl_is_kind(b) && !sub_msp((jl_value_t*)jl_any_type, ub, env))
+            if (jl_is_kind(b) && !sub_msp((jl_value_t*)jl_any_type, ub, b0, env))
                 return 1;
         }
         else if (tp0a == jl_bottom_type) {
-            if (sub_msp(b, (jl_value_t*)jl_type_type, env))
+            if (sub_msp(b, (jl_value_t*)jl_type_type, (jl_value_t*)jl_type_type, env))
                 return 1;
         }
         else if (b == (jl_value_t*)jl_datatype_type || b == (jl_value_t*)jl_unionall_type ||
@@ -4155,8 +4606,8 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
             HANDLE_UNIONALL_A;
         }
         jl_uniontype_t *u = (jl_uniontype_t*)b;
-        if (type_morespecific_(a, u->a, invariant, env) || type_morespecific_(a, u->b, invariant, env))
-            return !type_morespecific_(b, a, invariant, env);
+        if (type_morespecific_(a, u->a, a0, b0, invariant, env) || type_morespecific_(a, u->b, a0, b0, invariant, env))
+            return !type_morespecific_(b, a, b0, a0, invariant, env);
         return 0;
     }
 
@@ -4172,7 +4623,7 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                     if (tta->name != jl_type_typename) return 1;
                     jl_value_t *tp0 = jl_tparam0(b);
                     if (jl_is_typevar(tp0)) {
-                        if (sub_msp((jl_value_t*)jl_any_type, ((jl_tvar_t*)tp0)->ub, env))
+                        if (sub_msp((jl_value_t*)jl_any_type, ((jl_tvar_t*)tp0)->ub, b0, env))
                             return 1;
                     }
                 }
@@ -4185,11 +4636,11 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                     int bfree = jl_has_free_typevars(bpara);
                     if (!afree && !bfree && !jl_types_equal(apara, bpara))
                         return 0;
-                    if (type_morespecific_(apara, bpara, 1, env) && (jl_is_typevar(apara) || !afree || bfree))
+                    if (type_morespecific_(apara, bpara, a0, b0, 1, env) && (jl_is_typevar(apara) || !afree || bfree))
                         ascore += 1;
-                    else if (type_morespecific_(bpara, apara, 1, env) && (jl_is_typevar(bpara) || !bfree || afree))
+                    else if (type_morespecific_(bpara, apara, b0, a0, 1, env) && (jl_is_typevar(bpara) || !bfree || afree))
                         bscore += 1;
-                    else if (eq_msp(apara, bpara, env)) {
+                    else if (eq_msp(apara, bpara, a0, b0, env)) {
                         if (!afree && bfree)
                             ascore += 1;
                         else if (afree && !bfree)
@@ -4228,13 +4679,13 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
     if (jl_is_typevar(a)) {
         if (jl_is_typevar(b)) {
             return (( type_morespecific_((jl_value_t*)((jl_tvar_t*)a)->ub,
-                                         (jl_value_t*)((jl_tvar_t*)b)->ub, 0, env) &&
+                                         (jl_value_t*)((jl_tvar_t*)b)->ub, a0, b0, 0, env) &&
                      !type_morespecific_((jl_value_t*)((jl_tvar_t*)a)->lb,
-                                         (jl_value_t*)((jl_tvar_t*)b)->lb, 0, env)) ||
+                                         (jl_value_t*)((jl_tvar_t*)b)->lb, a0, b0, 0, env)) ||
                     ( type_morespecific_((jl_value_t*)((jl_tvar_t*)b)->lb,
-                                         (jl_value_t*)((jl_tvar_t*)a)->lb, 0, env) &&
+                                         (jl_value_t*)((jl_tvar_t*)a)->lb, b0, a0, 0, env) &&
                      !type_morespecific_((jl_value_t*)((jl_tvar_t*)b)->ub,
-                                         (jl_value_t*)((jl_tvar_t*)a)->ub, 0, env)));
+                                         (jl_value_t*)((jl_tvar_t*)a)->ub, b0, a0, 0, env)));
         }
         if (!jl_is_type(b))
             return 0;
@@ -4243,7 +4694,7 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                 return 1;
             if (!jl_has_free_typevars(b))
                 return 0;
-            if (eq_msp(((jl_tvar_t*)a)->ub, b, env))
+            if (eq_msp(((jl_tvar_t*)a)->ub, b, a0, b0, env))
                 return num_occurs((jl_tvar_t*)a, env) >= 2;
         }
         else {
@@ -4252,7 +4703,7 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                 num_occurs((jl_tvar_t*)a, env) >= 2)
                 return 1;
         }
-        return type_morespecific_(((jl_tvar_t*)a)->ub, b, 0, env);
+        return type_morespecific_(((jl_tvar_t*)a)->ub, b, a0, b0, 0, env);
     }
     if (jl_is_typevar(b)) {
         if (!jl_is_type(a))
@@ -4261,21 +4712,21 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
             if (((jl_tvar_t*)b)->ub == jl_bottom_type)
                 return 0;
             if (jl_has_free_typevars(a)) {
-                if (type_morespecific_(a, ((jl_tvar_t*)b)->ub, 0, env))
+                if (type_morespecific_(a, ((jl_tvar_t*)b)->ub, a0, b0, 0, env))
                     return 1;
-                if (eq_msp(a, ((jl_tvar_t*)b)->ub, env))
+                if (eq_msp(a, ((jl_tvar_t*)b)->ub, a0, b0, env))
                     return num_occurs((jl_tvar_t*)b, env) < 2;
                 return 0;
             }
             else {
                 if (obviously_disjoint(a, ((jl_tvar_t*)b)->ub, 1))
                     return 0;
-                if (type_morespecific_(((jl_tvar_t*)b)->ub, a, 0, env))
+                if (type_morespecific_(((jl_tvar_t*)b)->ub, a, b0, a0, 0, env))
                     return 0;
                 return 1;
             }
         }
-        return type_morespecific_(a, ((jl_tvar_t*)b)->ub, 0, env);
+        return type_morespecific_(a, ((jl_tvar_t*)b)->ub, a0, b0, 0, env);
     }
 
     if (jl_is_unionall(a)) {
@@ -4298,12 +4749,12 @@ JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b)
         return 0;
     if (jl_subtype(a, b))
         return 1;
-    return type_morespecific_(a, b, 0, NULL);
+    return type_morespecific_(a, b, a, b, 0, NULL);
 }
 
 JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b)
 {
-    return type_morespecific_(a, b, 0, NULL);
+    return type_morespecific_(a, b, a, b, 0, NULL);
 }
 
 #ifdef __cplusplus
diff --git a/src/support/Makefile b/src/support/Makefile
index a884aa5fd47e0..1ee98a4eabdee 100644
--- a/src/support/Makefile
+++ b/src/support/Makefile
@@ -24,7 +24,7 @@ HEADERS := $(wildcard *.h) $(LIBUV_INC)/uv.h
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
-FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
+FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
 FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
diff --git a/src/support/MurmurHash3.c b/src/support/MurmurHash3.c
index a26f58ef40cfa..7eaded17c379f 100644
--- a/src/support/MurmurHash3.c
+++ b/src/support/MurmurHash3.c
@@ -57,11 +57,11 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
 
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x86_32 ( const void * key, int len,
+void MurmurHash3_x86_32 ( const void * key, size_t len,
                           uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 4;
+  const size_t nblocks = len / 4;
 
   uint32_t h1 = seed;
 
@@ -73,7 +73,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 
   const uint8_t * tail = data + nblocks*4;
 
-  for(int i = -nblocks; i; i++)
+  for(size_t i = -nblocks; i; i++)
   {
     uint32_t k1 = jl_load_unaligned_i32(tail + sizeof(uint32_t)*i);
 
@@ -111,11 +111,11 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x86_128 ( const void * key, const int len,
+void MurmurHash3_x86_128 ( const void * key, const size_t len,
                            uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
+  const size_t nblocks = len / 16;
 
   uint32_t h1 = seed;
   uint32_t h2 = seed;
@@ -132,7 +132,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
 
   const uint8_t *tail = data + nblocks*16;
 
-  for(int i = -nblocks; i; i++)
+  for(size_t i = -nblocks; i; i++)
   {
     uint32_t k1 = jl_load_unaligned_i32(tail + sizeof(uint32_t)*(i*4 + 0));
     uint32_t k2 = jl_load_unaligned_i32(tail + sizeof(uint32_t)*(i*4 + 1));
@@ -217,11 +217,11 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
 
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x64_128 ( const void * key, const int len,
+void MurmurHash3_x64_128 ( const void * key, const size_t len,
                            const uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
+  const size_t nblocks = len / 16;
 
   uint64_t h1 = seed;
   uint64_t h2 = seed;
@@ -232,7 +232,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   //----------
   // body
 
-  for(int i = 0; i < nblocks; i++)
+  for(size_t i = 0; i < nblocks; i++)
   {
     uint64_t k1 = jl_load_unaligned_i64(data + sizeof(uint64_t)*(i*2 + 0));
     uint64_t k2 = jl_load_unaligned_i64(data + sizeof(uint64_t)*(i*2 + 1));
diff --git a/src/support/MurmurHash3.h b/src/support/MurmurHash3.h
index e3e7da9df62fa..6137098d6828c 100644
--- a/src/support/MurmurHash3.h
+++ b/src/support/MurmurHash3.h
@@ -8,14 +8,14 @@
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 #include <stdint.h>
-
+#include <stddef.h>
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x86_32  ( const void * key, size_t len, uint32_t seed, void * out );
 
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out );
 
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
 
 //-----------------------------------------------------------------------------
 
diff --git a/src/support/analyzer_annotations.h b/src/support/analyzer_annotations.h
index 3e577e6b45483..69827e4d77f37 100644
--- a/src/support/analyzer_annotations.h
+++ b/src/support/analyzer_annotations.h
@@ -22,7 +22,6 @@
 #define JL_ALWAYS_LEAFTYPE JL_GLOBALLY_ROOTED
 #define JL_ROOTS_TEMPORARILY __attribute__((annotate("julia_temporarily_roots")))
 #define JL_REQUIRE_ROOTED_SLOT __attribute__((annotate("julia_require_rooted_slot")))
-#define JL_ROOTED_VALUE_COLLECTION __attribute__((annotate("julia_rooted_value_collection")))
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -46,7 +45,6 @@ extern "C" {
 #define JL_ALWAYS_LEAFTYPE
 #define JL_ROOTS_TEMPORARILY
 #define JL_REQUIRE_ROOTED_SLOT
-#define JL_ROOTED_VALUE_COLLECTION
 #define JL_GC_PROMISE_ROOTED(x) (void)(x)
 #define jl_may_leak(x) (void)(x)
 
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index 891c091413084..a30fe85ccc0d0 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -72,16 +72,24 @@ typedef intptr_t ssize_t;
 
 #ifdef _OS_WINDOWS_
 #define STDCALL  __stdcall
-# ifdef LIBRARY_EXPORTS
+# ifdef JL_LIBRARY_EXPORTS_INTERNAL
 #  define JL_DLLEXPORT __declspec(dllexport)
-# else
-#  define JL_DLLEXPORT __declspec(dllimport)
 # endif
+# ifdef JL_LIBRARY_EXPORTS_CODEGEN
+#  define JL_DLLEXPORT_CODEGEN __declspec(dllexport)
+# endif
+#define JL_HIDDEN
 #define JL_DLLIMPORT   __declspec(dllimport)
 #else
 #define STDCALL
-# define JL_DLLEXPORT __attribute__ ((visibility("default")))
-#define JL_DLLIMPORT
+#define JL_DLLIMPORT __attribute__ ((visibility("default")))
+#define JL_HIDDEN __attribute__ ((visibility("hidden")))
+#endif
+#ifndef JL_DLLEXPORT
+# define JL_DLLEXPORT JL_DLLIMPORT
+#endif
+#ifndef JL_DLLEXPORT_CODEGEN
+# define JL_DLLEXPORT_CODEGEN JL_DLLIMPORT
 #endif
 
 #ifdef _OS_LINUX_
diff --git a/src/support/ios.c b/src/support/ios.c
index 4a6aeb54a4d32..b5a168f705603 100644
--- a/src/support/ios.c
+++ b/src/support/ios.c
@@ -196,6 +196,9 @@ static char *_buf_realloc(ios_t *s, size_t sz)
 
     if (sz <= s->maxsize) return s->buf;
 
+    if (!s->growable)
+        return NULL;
+
     if (s->ownbuf && s->buf != &s->local[0]) {
         // if we own the buffer we're free to resize it
         temp = (char*)LLT_REALLOC(s->buf, sz);
@@ -892,6 +895,7 @@ static void _ios_init(ios_t *s)
     s->readable = 1;
     s->writable = 1;
     s->rereadable = 0;
+    s->growable = 1;
 }
 
 /* stream object initializers. we do no allocation. */
@@ -935,9 +939,11 @@ ios_t *ios_file(ios_t *s, const char *fname, int rd, int wr, int create, int tru
 {
     int flags;
     int fd;
-    if (!(rd || wr))
+    if (!(rd || wr)) {
         // must specify read and/or write
+        errno = EINVAL;
         goto open_file_err;
+    }
     flags = wr ? (rd ? O_RDWR : O_WRONLY) : O_RDONLY;
     if (create) flags |= O_CREAT;
     if (trunc)  flags |= O_TRUNC;
diff --git a/src/support/ios.h b/src/support/ios.h
index 046edfae0556f..2547555b5585d 100644
--- a/src/support/ios.h
+++ b/src/support/ios.h
@@ -19,13 +19,13 @@ extern "C" {
 typedef enum { bm_none=1000, bm_line, bm_block, bm_mem } bufmode_t;
 typedef enum { bst_none, bst_rd, bst_wr } bufstate_t;
 
-#define IOS_INLSIZE 54
+#define IOS_INLSIZE 83
 #define IOS_BUFSIZE 32768
 
 #ifdef _P64
-#define ON_P64(x) x
+#define IF_P64(x,y) x
 #else
-#define ON_P64(x)
+#define IF_P64(x,y) y
 #endif
 
 // We allow ios_t as a cvalue in flisp, which only guarantees pointer
@@ -36,10 +36,8 @@ JL_ATTRIBUTE_ALIGN_PTRSIZE(typedef struct {
     // in general, you can do any operation in any state.
     char *buf;        // start of buffer
 
-    int errcode;
-
-    ON_P64(int _pad_bm;)      // put bm at same offset as type field of uv_stream_s
-    bufmode_t bm;     //
+    IF_P64(int64_t userdata;, int errcode;)
+    bufmode_t bm;     // bm must be at same offset as type field of uv_stream_s
     bufstate_t state;
 
     int64_t maxsize;    // space allocated to buffer
@@ -51,6 +49,8 @@ JL_ATTRIBUTE_ALIGN_PTRSIZE(typedef struct {
     size_t lineno;    // current line number
     size_t u_colno;     // current column number (in Unicode charwidths)
 
+    IF_P64(int errcode;, int64_t userdata;)
+
     // pointer-size integer to support platforms where it might have
     // to be a pointer
     long fd;
@@ -74,11 +74,14 @@ JL_ATTRIBUTE_ALIGN_PTRSIZE(typedef struct {
     // request durable writes (fsync)
     // unsigned char durable:1;
 
-    int64_t userdata;
+    // this declares that the buffer should not be (re-)alloc'd when
+    // attempting to write beyond its current maxsize.
+    unsigned char growable:1;
+
     char local[IOS_INLSIZE];
 } ios_t);
 
-#undef ON_P64
+#undef IF_P64
 
 extern void (*ios_set_io_wait_func)(int);
 /* low-level interface functions */
diff --git a/src/symbol.c b/src/symbol.c
index 0f8b41787ad13..dcfa0b6086846 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -35,15 +35,13 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
     jl_sym_t *sym;
     size_t nb = symbol_nbytes(len);
-    assert(jl_symbol_type && "not initialized");
-
     jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(nb, 0, sizeof(void*), 0);
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
-    tag->header = ((uintptr_t)jl_symbol_type) | GC_OLD_MARKED;
+    jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED);
 #ifdef MMTK_GC
     jl_ptls_t ptls = jl_current_task->ptls;
-    post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1);
+    mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1);
 #endif
     jl_atomic_store_relaxed(&sym->left, NULL);
     jl_atomic_store_relaxed(&sym->right, NULL);
diff --git a/src/task.c b/src/task.c
index 7373de937b9ae..477ae481071a0 100644
--- a/src/task.c
+++ b/src/task.c
@@ -646,13 +646,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
     int finalizers_inhibited = ptls->finalizers_inhibited;
     ptls->finalizers_inhibited = 0;
 
-#ifdef ENABLE_TIMINGS
-    jl_timing_block_t *blk = ptls->timing_stack;
-    if (blk)
-        jl_timing_block_stop(blk);
-    ptls->timing_stack = NULL;
-#endif
-
+    jl_timing_block_t *blk = jl_timing_block_exit_task(ct, ptls);
     ctx_switch(ct);
 
 #ifdef MIGRATE_TASKS
@@ -672,15 +666,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
            0 != ct->ptls &&
            0 == ptls->finalizers_inhibited);
     ptls->finalizers_inhibited = finalizers_inhibited;
-
-#ifdef ENABLE_TIMINGS
-    assert(ptls->timing_stack == NULL);
-    ptls->timing_stack = blk;
-    if (blk)
-        jl_timing_block_start(blk);
-#else
-    (void)ct;
-#endif
+    jl_timing_block_enter_task(ct, ptls, blk); (void)blk;
 
     sig_atomic_t other_defer_signal = ptls->defer_signal;
     ptls->defer_signal = defer_signal;
@@ -726,11 +712,10 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 #define pop_timings_stack() /* Nothing */
 #endif
 
-#define throw_internal_body()                                                  \
+#define throw_internal_body(altstack)                                          \
     assert(!jl_get_safe_restore());                                            \
     jl_ptls_t ptls = ct->ptls;                                                 \
     ptls->io_wait = 0;                                                         \
-    JL_GC_PUSH1(&exception);                                                   \
     jl_gc_unsafe_enter(ptls);                                                  \
     if (exception) {                                                           \
         /* The temporary ptls->bt_data is rooted by special purpose code in the\
@@ -743,6 +728,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
     assert(ct->excstack && ct->excstack->top);                                 \
     jl_handler_t *eh = ct->eh;                                                 \
     if (eh != NULL) {                                                          \
+        if (altstack) ptls->sig_exception = NULL;                              \
         pop_timings_stack()                                                    \
         asan_unpoison_task_stack(ct, &eh->eh_ctx);                             \
         jl_longjmp(eh->eh_ctx, 1);                                             \
@@ -755,23 +741,21 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
 CFI_NORETURN
-    throw_internal_body()
+    JL_GC_PUSH1(&exception);
+    throw_internal_body(0);
     jl_unreachable();
 }
 
-#ifdef _COMPILER_ASAN_ENABLED_
 /* On the signal stack, we don't want to create any asan frames, but we do on the
    normal, stack, so we split this function in two, depending on which context
-   we're calling it in */
-JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
+   we're calling it in. This also lets us avoid making a GC frame on the altstack,
+   which might end up getting corrupted if we recur here through another signal. */
+JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception)
 {
 CFI_NORETURN
-    throw_internal_body()
+    throw_internal_body(1);
     jl_unreachable();
 }
-#else
-#define throw_internal_altstack throw_internal
-#endif
 
 // record backtrace and raise an error
 JL_DLLEXPORT void jl_throw(jl_value_t *e JL_MAYBE_UNROOTED)
@@ -813,7 +797,7 @@ CFI_NORETURN
     }
     jl_ptls_t ptls = ct->ptls;
     jl_value_t *e = ptls->sig_exception;
-    ptls->sig_exception = NULL;
+    JL_GC_PROMISE_ROOTED(e);
     throw_internal_altstack(ct, e);
 }
 
@@ -866,34 +850,194 @@ uint64_t jl_genrandom(uint64_t rngState[4]) JL_NOTSAFEPOINT
     return res;
 }
 
-void jl_rng_split(uint64_t to[4], uint64_t from[4]) JL_NOTSAFEPOINT
+/*
+The jl_rng_split function forks a task's RNG state in a way that is essentially
+guaranteed to avoid collisions between the RNG streams of all tasks. The main
+RNG is the xoshiro256++ RNG whose state is stored in rngState[0..3]. There is
+also a small internal RNG used for task forking stored in rngState[4]. This
+state is used to iterate a LCG (linear congruential generator), which is then
+put through four different variations of the strongest PCG output function,
+referred to as PCG-RXS-M-XS-64 [1]. This output function is invertible: it maps
+a 64-bit state to 64-bit output; which is one of the reasons it's not
+recommended for general purpose RNGs unless space is at a premium, but in our
+usage invertibility is actually a benefit, as is explained below.
+
+The goal of jl_rng_split is to perturb the state of each child task's RNG in
+such a way each that for an entire tree of tasks spawned starting with a given
+state in a root task, no two tasks have the same RNG state. Moreover, we want to
+do this in a way that is deterministic and repeatable based on (1) the root
+task's seed, (2) how many random numbers are generated, and (3) the task tree
+structure. The RNG state of a parent task is allowed to affect the initial RNG
+state of a child task, but the mere fact that a child was spawned should not
+alter the RNG output of the parent. This second requirement rules out using the
+main RNG to seed children -- some separate state must be maintained and changed
+upon forking a child task while leaving the main RNG state unchanged.
+
+The basic approach is that used by the DotMix [2] and SplitMix [3] RNG systems:
+each task is uniquely identified by a sequence of "pedigree" numbers, indicating
+where in the task tree it was spawned. This vector of pedigree coordinates is
+then reduced to a single value by computing a dot product with a common vector
+of random weights. The DotMix paper provides a proof that this dot product hash
+value (referred to as a "compression function") is collision resistant in the
+sense the the pairwise collision probability of two distinct tasks is 1/N where
+N is the number of possible weight values. Both DotMix and SplitMix use a prime
+value of N because the proof requires that the difference between two distinct
+pedigree coordinates must be invertible, which is guaranteed by N being prime.
+We take a different approach: we instead limit pedigree coordinates to being
+binary instead -- when a task spawns a child, both tasks share the same pedigree
+prefix, with the parent appending a zero and the child appending a one. This way
+a binary pedigree vector uniquely identifies each task. Moreover, since the
+coordinates are binary, the difference between coordinates is always one which
+is its own inverse regardless of whether N is prime or not. This allows us to
+compute the dot product modulo 2^64 using native machine arithmetic, which is
+considerably more efficient and simpler to implement than arithmetic in a prime
+modulus. It also means that when accumulating the dot product incrementally, as
+described in SplitMix, we don't need to multiply weights by anything, we simply
+add the random weight for the current task tree depth to the parent's dot
+product to derive the child's dot product.
+
+We use the LCG in rngState[4] to derive generate pseudorandom weights for the
+dot product. Each time a child is forked, we update the LCG in both parent and
+child tasks. In the parent, that's all we have to do -- the main RNG state
+remains unchanged (recall that spawning a child should *not* affect subsequence
+RNG draws in the parent). The next time the parent forks a child, the dot
+product weight used will be different, corresponding to being a level deeper in
+the binary task tree. In the child, we use the LCG state to generate four
+pseudorandom 64-bit weights (more below) and add each weight to one of the
+xoshiro256 state registers, rngState[0..3]. If we assume the main RNG remains
+unused in all tasks, then each register rngState[0..3] accumulates a different
+Dot/SplitMix dot product hash as additional child tasks are spawned. Each one is
+collision resistant with a pairwise collision chance of only 1/2^64. Assuming
+that the four pseudorandom 64-bit weight streams are sufficiently independent,
+the pairwise collision probability for distinct tasks is 1/2^256. If we somehow
+managed to spawn a trillion tasks, the probability of a collision would be on
+the order of 1/10^54. Practically impossible. Put another way, this is the same
+as the probability of two SHA256 hash values accidentally colliding, which we
+generally consider so unlikely as not to be worth worrying about.
+
+What about the random "junk" that's in the xoshiro256 state registers from
+normal use of the RNG? For a tree of tasks spawned with no intervening samples
+taken from the main RNG, all tasks start with the same junk which doesn't affect
+the chance of collision. The Dot/SplitMix papers even suggest adding a random
+base value to the dot product, so we can consider whatever happens to be in the
+xoshiro256 registers to be that. What if the main RNG gets used between task
+forks? In that case, the initial state registers will be different. The DotMix
+collision resistance proof doesn't apply without modification, but we can
+generalize the setup by adding a different base constant to each compression
+function and observe that we still have a 1/N chance of the weight value
+matching that exact difference. This proves collision resistance even between
+tasks whose dot product hashes are computed with arbitrary offsets. We can
+conclude that this scheme provides collision resistance even in the face of
+different starting states of the main RNG. Does this seem too good to be true?
+Perhaps another way of thinking about it will help. Suppose we seeded each task
+completely randomly. Then there would also be a 1/2^256 chance of collision,
+just as the DotMix proof gives. Essentially what the proof is telling us is that
+if the weights are chosen uniformly and uncorrelated with the rest of the
+compression function, then the dot product construction is a good enough way to
+pseudorandomly seed each task. From that perspective, it's easier to believe
+that adding an arbitrary constant to each seed doesn't worsen its randomness.
+
+This leaves us with the question of how to generate four pseudorandom weights to
+add to the rngState[0..3] registers at each depth of the task tree. The scheme
+used here is that a single 64-bit LCG state is iterated in both parent and child
+at each task fork, and four different variations of the PCG-RXS-M-XS-64 output
+function are applied to that state to generate four different pseudorandom
+weights. Another obvious way to generate four weights would be to iterate the
+LCG four times per task split. There are two main reasons we've chosen to use
+four output variants instead:
+
+1. Advancing four times per fork reduces the set of possible weights that each
+   register can be perturbed by from 2^64 to 2^60. Since collision resistance is
+   proportional to the number of possible weight values, that would reduce
+   collision resistance.
+
+2. It's easier to compute four PCG output variants in parallel. Iterating the
+   LCG is inherently sequential. Each PCG variant can be computed independently
+   from the LCG state. All four can even be computed at once with SIMD vector
+   instructions, but the compiler doesn't currently choose to do that.
+
+A key question is whether the approach of using four variations of PCG-RXS-M-XS
+is sufficiently random both within and between streams to provide the collision
+resistance we expect. We obviously can't test that with 256 bits, but we have
+tested it with a reduced state analogue using four PCG-RXS-M-XS-8 output
+variations applied to a common 8-bit LCG. Test results do indicate sufficient
+independence: a single register has collisions at 2^5 while four registers only
+start having collisions at 2^20, which is actually better scaling of collision
+resistance than we expect in theory. In theory, with one byte of resistance we
+have a 50% chance of some collision at 20, which matches, but four bytes gives a
+50% chance of collision at 2^17 and our (reduced size analogue) construction is
+still collision free at 2^19. This may be due to the next observation, which guarantees collision avoidance for certain shapes of task trees as a result of using an
+invertible RNG to generate weights.
+
+In the specific case where a parent task spawns a sequence of child tasks with
+no intervening usage of its main RNG, the parent and child tasks are actually
+_guaranteed_ to have different RNG states. This is true because the four PCG
+streams each produce every possible 2^64 bit output exactly once in the full
+2^64 period of the LCG generator. This is considered a weakness of PCG-RXS-M-XS
+when used as a general purpose RNG, but is quite beneficial in this application.
+Since each of up to 2^64 children will be perturbed by different weights, they
+cannot have hash collisions. What about parent colliding with child? That can
+only happen if all four main RNG registers are perturbed by exactly zero. This
+seems unlikely, but could it occur? Consider this part of each output function:
+
+    p ^= p >> ((p >> 59) + 5);
+    p *= m[i];
+    p ^= p >> 43
+
+It's easy to check that this maps zero to zero. An unchanged parent RNG can only
+happen if all four `p` values are zero at the end of this, which implies that
+they were all zero at the beginning. However, that is impossible since the four
+`p` values differ from `x` by different additive constants, so they cannot all
+be zero. Stated more generally, this non-collision property: assuming the main
+RNG isn't used between task forks, sibling and parent tasks cannot have RNG
+collisions. If the task tree structure is more deeply nested or if there are
+intervening uses of the main RNG, we're back to relying on "merely" 256 bits of
+collision resistance, but it's nice to know that in what is likely the most
+common case, RNG collisions are actually impossible. This fact may also explain
+better-than-theoretical collision resistance observed in our experiment with a
+reduced size analogue of our hashing system.
+
+[1]: https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf
+
+[2]: http://supertech.csail.mit.edu/papers/dprng.pdf
+
+[3]: https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf
+*/
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT
 {
-    /* TODO: consider a less ad-hoc construction
-       Ideally we could just use the output of the random stream to seed the initial
-       state of the child. Out of an overabundance of caution we multiply with
-       effectively random coefficients, to break possible self-interactions.
-
-       It is not the goal to mix bits -- we work under the assumption that the
-       source is well-seeded, and its output looks effectively random.
-       However, xoshiro has never been studied in the mode where we seed the
-       initial state with the output of another xoshiro instance.
-
-       Constants have nothing up their sleeve:
-       0x02011ce34bce797f == hash(UInt(1))|0x01
-       0x5a94851fb48a6e05 == hash(UInt(2))|0x01
-       0x3688cf5d48899fa7 == hash(UInt(3))|0x01
-       0x867b4bb4c42e5661 == hash(UInt(4))|0x01
-    */
-    to[0] = 0x02011ce34bce797f * jl_genrandom(from);
-    to[1] = 0x5a94851fb48a6e05 * jl_genrandom(from);
-    to[2] = 0x3688cf5d48899fa7 * jl_genrandom(from);
-    to[3] = 0x867b4bb4c42e5661 * jl_genrandom(from);
+    // load and advance the internal LCG state
+    uint64_t x = src[4];
+    src[4] = dst[4] = x * 0xd1342543de82ef95 + 1;
+    // high spectrum multiplier from https://arxiv.org/abs/2001.05304
+
+    static const uint64_t a[4] = {
+        0xe5f8fa077b92a8a8, // random additive offsets...
+        0x7a0cd918958c124d,
+        0x86222f7d388588d4,
+        0xd30cbd35f2b64f52
+    };
+    static const uint64_t m[4] = {
+        0xaef17502108ef2d9, // standard PCG multiplier
+        0xf34026eeb86766af, // random odd multipliers...
+        0x38fd70ad58dd9fbb,
+        0x6677f9b93ab0c04d
+    };
+
+    // PCG-RXS-M-XS output with four variants
+    for (int i = 0; i < 4; i++) {
+        uint64_t p = x + a[i];
+        p ^= p >> ((p >> 59) + 5);
+        p *= m[i];
+        p ^= p >> 43;
+        dst[i] = src[i] + p; // SplitMix dot product
+    }
 }
 
 JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize)
 {
     jl_task_t *ct = jl_current_task;
     jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
+    jl_set_typetagof(t, jl_task_tag, 0);
     JL_PROBE_RT_NEW_TASK(ct, t);
     t->copy_stack = 0;
     if (ssize == 0) {
@@ -940,8 +1084,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->ptls = NULL;
     t->world_age = ct->world_age;
     t->reentrant_timing = 0;
-    t->reentrant_inference = 0;
-    t->inference_start_time = 0;
+    jl_timing_init_task(t);
 
 #ifdef COPY_STACKS
     if (!t->copy_stack) {
@@ -1054,6 +1197,7 @@ CFI_NORETURN
     sanitizer_finish_switch_fiber(ptls->previous_task, ct);
     _start_task();
 }
+
 STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void)
 {
 CFI_NORETURN
@@ -1077,6 +1221,7 @@ CFI_NORETURN
 
     ct->started = 1;
     JL_PROBE_RT_START_TASK(ct);
+    jl_timing_block_enter_task(ct, ptls, NULL);
     if (jl_atomic_load_relaxed(&ct->_isexception)) {
         record_backtrace(ptls, 0);
         jl_push_excstack(&ct->excstack, ct->result,
@@ -1089,7 +1234,7 @@ CFI_NORETURN
                 ptls->defer_signal = 0;
                 jl_sigint_safepoint(ptls);
             }
-            JL_TIMING(ROOT);
+            JL_TIMING(ROOT, ROOT);
             res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
@@ -1490,6 +1635,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     if (jl_nothing == NULL) // make a placeholder
         jl_nothing = jl_gc_permobj(0, jl_nothing_type);
     jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
+    jl_set_typetagof(ct, jl_task_tag, 0);
     memset(ct, 0, sizeof(jl_task_t));
     void *stack = stack_lo;
     size_t ssize = (char*)stack_hi - (char*)stack_lo;
@@ -1509,6 +1655,12 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
         ct->stkbuf = stack;
         ct->bufsz = ssize;
     }
+
+#ifdef USE_TRACY
+    char *unique_string = (char *)malloc(strlen("Root") + 1);
+    strcpy(unique_string, "Root");
+    ct->name = unique_string;
+#endif
     ct->started = 1;
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
@@ -1528,8 +1680,6 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->ptls = ptls;
     ct->world_age = 1; // OK to run Julia code on this task
     ct->reentrant_timing = 0;
-    ct->reentrant_inference = 0;
-    ct->inference_start_time = 0;
     ptls->root_task = ct;
     jl_atomic_store_relaxed(&ptls->current_task, ct);
     JL_GC_PROMISE_ROOTED(ct);
@@ -1543,6 +1693,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->ctx.asan_fake_stack = NULL;
 #endif
 
+    jl_timing_block_enter_task(ct, ptls, NULL);
+
 #ifdef COPY_STACKS
     // initialize the base_ctx from which all future copy_stacks will be copies
     if (always_copy_stacks) {
@@ -1556,12 +1708,15 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 #endif
         if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
             start_task(); // sanitizer_finish_switch_fiber is part of start_task
-        return ct;
     }
-    ssize = JL_STACK_SIZE;
-    char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
-    ptls->stackbase = stkbuf + ssize;
-    ptls->stacksize = ssize;
+    else {
+        ssize = JL_STACK_SIZE;
+        char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+        if (stkbuf != NULL) {
+            ptls->stackbase = stkbuf + ssize;
+            ptls->stacksize = ssize;
+        }
+    }
 #endif
 
     if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
diff --git a/src/threading.c b/src/threading.c
index bc31eb1e46bb6..51bdd6e8107da 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -10,6 +10,10 @@
 #include "julia_internal.h"
 #include "julia_assert.h"
 
+#ifdef USE_ITTAPI
+#include "ittapi/ittnotify.h"
+#endif
+
 // Ref https://www.uclibc.org/docs/tls.pdf
 // For variant 1 JL_ELF_TLS_INIT_SIZE is the size of the thread control block (TCB)
 // For variant 2 JL_ELF_TLS_INIT_SIZE is 0
@@ -589,6 +593,8 @@ static void jl_check_tls(void)
 JL_DLLEXPORT const int jl_tls_elf_support = 0;
 #endif
 
+extern int gc_first_tid;
+
 // interface to Julia; sets up to make the runtime thread-safe
 void jl_init_threading(void)
 {
@@ -604,17 +610,16 @@ void jl_init_threading(void)
     // specified on the command line (and so are in `jl_options`) or by the
     // environment variable. Set the globals `jl_n_threadpools`, `jl_n_threads`
     // and `jl_n_threads_per_pool`.
-    jl_n_threadpools = 1;
+    jl_n_threadpools = 2;
     int16_t nthreads = JULIA_NUM_THREADS;
     int16_t nthreadsi = 0;
     char *endptr, *endptri;
 
     if (jl_options.nthreads != 0) { // --threads specified
-        jl_n_threadpools = jl_options.nthreadpools;
         nthreads = jl_options.nthreads_per_pool[0];
         if (nthreads < 0)
             nthreads = jl_effective_threads();
-        if (jl_n_threadpools == 2)
+        if (jl_options.nthreadpools == 2)
             nthreadsi = jl_options.nthreads_per_pool[1];
     }
     else if ((cp = getenv(NUM_THREADS_NAME))) { // ENV[NUM_THREADS_NAME] specified
@@ -639,18 +644,35 @@ void jl_init_threading(void)
                 if (errno != 0 || endptri == cp || nthreadsi < 0)
                     nthreadsi = 0;
             }
-            if (nthreadsi > 0)
-                jl_n_threadpools++;
         }
     }
 
-    jl_all_tls_states_size = nthreads + nthreadsi;
+    int16_t ngcthreads = jl_options.ngcthreads - 1;
+    if (ngcthreads == -1 &&
+        (cp = getenv(NUM_GC_THREADS_NAME))) { // ENV[NUM_GC_THREADS_NAME] specified
+
+        ngcthreads = (uint64_t)strtol(cp, NULL, 10) - 1;
+    }
+    if (ngcthreads == -1) {
+        // if `--gcthreads` was not specified, set the number of GC threads
+        // to half of compute threads
+        if (nthreads <= 1) {
+            ngcthreads = 0;
+        }
+        else {
+            ngcthreads = (nthreads / 2) - 1;
+        }
+    }
+
+    jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads;
     jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int));
-    jl_n_threads_per_pool[0] = nthreads;
-    jl_n_threads_per_pool[1] = nthreadsi;
+    jl_n_threads_per_pool[0] = nthreadsi;
+    jl_n_threads_per_pool[1] = nthreads;
 
     jl_atomic_store_release(&jl_all_tls_states, (jl_ptls_t*)calloc(jl_all_tls_states_size, sizeof(jl_ptls_t)));
     jl_atomic_store_release(&jl_n_threads, jl_all_tls_states_size);
+    jl_n_gcthreads = ngcthreads;
+    gc_first_tid = nthreads;
 }
 
 static uv_barrier_t thread_init_done;
@@ -658,6 +680,12 @@ static uv_barrier_t thread_init_done;
 void jl_start_threads(void)
 {
     int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    int ngcthreads = jl_n_gcthreads;
+#ifdef MMTK_GC
+    // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads.
+    // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia.
+    ngcthreads = 0;
+#endif
     int cpumasksize = uv_cpumask_size();
     char *cp;
     int i, exclusive;
@@ -690,15 +718,23 @@ void jl_start_threads(void)
     // create threads
     uv_barrier_init(&thread_init_done, nthreads);
 
+    // GC/System threads need to be after the worker threads.
+    int nworker_threads = nthreads - ngcthreads;
+
     for (i = 1; i < nthreads; ++i) {
         jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
-        uv_thread_create(&uvtid, jl_threadfun, t);
-        if (exclusive) {
-            mask[i] = 1;
-            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-            mask[i] = 0;
+        if (i < nworker_threads) {
+            uv_thread_create(&uvtid, jl_threadfun, t);
+            if (exclusive) {
+                mask[i] = 1;
+                uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+                mask[i] = 0;
+            }
+        }
+        else {
+            uv_thread_create(&uvtid, jl_gc_threadfun, t);
         }
         uv_thread_detach(&uvtid);
     }
@@ -731,6 +767,15 @@ JL_DLLEXPORT void jl_exit_threaded_region(void)
     }
 }
 
+// Profiling stubs
+
+void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
+    lock->count = 0;
+    jl_profile_lock_init(lock, name);
+}
+
 void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
 {
     jl_task_t *owner = jl_atomic_load_relaxed(&lock->owner);
@@ -738,9 +783,18 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
         lock->count++;
         return;
     }
+    // Don't use JL_TIMING for instant acquires, results in large blowup of events
+    jl_profile_lock_start_wait(lock);
+    if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
+        lock->count = 1;
+        jl_profile_lock_acquired(lock);
+        return;
+    }
+    JL_TIMING(LOCK_SPIN, LOCK_SPIN);
     while (1) {
         if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
             lock->count = 1;
+            jl_profile_lock_acquired(lock);
             return;
         }
         if (safepoint) {
@@ -753,7 +807,7 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
                 uv_cond_wait(&cond, &tls_lock);
             uv_mutex_unlock(&tls_lock);
         }
-        jl_cpu_pause();
+        jl_cpu_suspend();
         owner = jl_atomic_load_relaxed(&lock->owner);
     }
 }
@@ -816,6 +870,7 @@ void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
     assert(jl_atomic_load_relaxed(&lock->owner) == jl_current_task &&
            "Unlocking a lock in a different thread.");
     if (--lock->count == 0) {
+        jl_profile_lock_release_start(lock);
         jl_atomic_store_release(&lock->owner, (jl_task_t*)NULL);
         jl_cpu_wake();
         if (jl_running_under_rr(0)) {
@@ -824,6 +879,7 @@ void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
             uv_cond_broadcast(&cond);
             uv_mutex_unlock(&tls_lock);
         }
+        jl_profile_lock_release_end(lock);
     }
 #endif
 }
diff --git a/src/threading.h b/src/threading.h
index 4df6815124eb9..40792a2889e44 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -25,6 +25,7 @@ jl_ptls_t jl_init_threadtls(int16_t tid) JL_NOTSAFEPOINT;
 
 // provided by a threading infrastructure
 void jl_init_threadinginfra(void);
+void jl_gc_threadfun(void *arg);
 void jl_threadfun(void *arg);
 
 #ifdef __cplusplus
diff --git a/src/timing.c b/src/timing.c
index 929a09305f993..3290e68ee9169 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -6,44 +6,113 @@
 #include "options.h"
 #include "stdio.h"
 
+jl_module_t *jl_module_root(jl_module_t *m);
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #ifdef ENABLE_TIMINGS
-#include "timing.h"
 
 #ifndef HAVE_TIMING_SUPPORT
 #error Timings are not supported on your compiler
 #endif
 
 static uint64_t t0;
-JL_DLLEXPORT uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
+#if defined(USE_TRACY) || defined(USE_ITTAPI)
+/**
+ * These sources often generate millions of events / minute. Although Tracy
+ * can generally keep up with that, those events also bloat the saved ".tracy"
+ * files, so we disable them by default.
+ **/
+JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) |
+                                              (1ull << JL_TIMING_TYPE_CACHE_LOOKUP) |
+                                              (1ull << JL_TIMING_METHOD_MATCH) |
+                                              (1ull << JL_TIMING_METHOD_LOOKUP_FAST) |
+                                              (1ull << JL_TIMING_AST_COMPRESS) |
+                                              (1ull << JL_TIMING_AST_UNCOMPRESS));
+#else
+JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~0ull;
+#endif
+
+JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST] = {0};
+
+// Used to as an item limit when several strings of metadata can
+// potentially be associated with a single timing zone.
+JL_DLLEXPORT uint32_t jl_timing_print_limit = 10;
+
 const char *jl_timing_names[(int)JL_TIMING_LAST] =
     {
-#define X(name) #name
+#define X(name) #name,
         JL_TIMING_OWNERS
 #undef X
     };
 
+JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
+
+#ifdef USE_ITTAPI
+JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
+#endif
+
 void jl_print_timings(void)
 {
+#ifdef USE_TIMING_COUNTS
     uint64_t total_time = cycleclock() - t0;
     uint64_t root_time = total_time;
     for (int i = 0; i < JL_TIMING_LAST; i++) {
-        root_time -= jl_timing_data[i];
+        root_time -= jl_timing_counts[i];
     }
-    jl_timing_data[0] = root_time;
+    jl_timing_counts[0] = root_time;
+    fprintf(stderr, "\nJULIA TIMINGS\n");
     for (int i = 0; i < JL_TIMING_LAST; i++) {
-        if (jl_timing_data[i] != 0)
+        if (jl_timing_counts[i] != 0)
             fprintf(stderr, "%-25s : %5.2f %%   %" PRIu64 "\n", jl_timing_names[i],
-                    100 * (((double)jl_timing_data[i]) / total_time), jl_timing_data[i]);
+                    100 * (((double)jl_timing_counts[i]) / total_time), jl_timing_counts[i]);
     }
+
+    fprintf(stderr, "\nJULIA COUNTERS\n");
+#define X(name) do { \
+        int64_t val = (int64_t) jl_atomic_load_relaxed(&jl_timing_counters[(int)JL_TIMING_COUNTER_##name].basic_counter); \
+        if (val != 0) \
+            fprintf(stderr, "%-25s : %" PRIi64 "\n", #name, val); \
+    } while (0);
+
+    JL_TIMING_COUNTERS
+#undef X
+#endif
 }
 
 void jl_init_timing(void)
 {
     t0 = cycleclock();
+
+    _Static_assert(JL_TIMING_EVENT_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing events!");
+    _Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!");
+
+    int i __attribute__((unused)) = 0;
+#ifdef USE_ITTAPI
+#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name));
+    JL_TIMING_EVENTS
+#undef X
+    i = 0;
+#define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime");
+    JL_TIMING_COUNTERS
+#undef X
+#endif
+#ifdef USE_TRACY
+    i = 0;
+#define X(counter_name) jl_timing_counters[i].tracy_counter = (jl_tracy_counter_t){0, #counter_name}; \
+        TracyCPlotConfig(jl_timing_counters[i++].tracy_counter.name, TracyPlotFormatNumber, /* rectilinear */ 1, /* fill */ 1, /* color */ 0);
+    JL_TIMING_COUNTERS
+#undef X
+    // We reference these by enum indexing and then asking for the name, since that allows the compiler
+    // to catch name mismatches.
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_HeapSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITCodeSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITDataSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_ImageSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+#endif
 }
 
 void jl_destroy_timing(void)
@@ -62,20 +131,265 @@ jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block)
     return cur_block->prev;
 }
 
-void jl_timing_block_start(jl_timing_block_t *cur_block)
+void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk)
+{
+    if (prev_blk != NULL) {
+        assert(ptls->timing_stack == NULL);
+
+        ptls->timing_stack = prev_blk;
+        if (prev_blk != NULL) {
+            _COUNTS_START(&prev_blk->counts_ctx, cycleclock());
+        }
+    }
+
+#ifdef USE_TRACY
+    TracyCFiberEnter(ct->name);
+#else
+    (void)ct;
+#endif
+}
+
+jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls)
+{
+#ifdef USE_TRACY
+    // Tracy is fairly strict about not leaving a fiber that hasn't
+    // been entered, which happens often when connecting to a running
+    // Julia session.
+    //
+    // Eventually, Tracy will support telling the server which fibers
+    // are active upon connection, but until then we work around the
+    // problem by not explicitly leaving the fiber at all.
+    //
+    // Later when we enter the new fiber directly, that will cause the
+    // the active fiber to be left implicitly.
+
+    //TracyCFiberLeave;
+#endif
+    (void)ct;
+
+    jl_timing_block_t *blk = ptls->timing_stack;
+    ptls->timing_stack = NULL;
+
+    if (blk != NULL) {
+        _COUNTS_STOP(&blk->counts_ctx, cycleclock());
+    }
+    return blk;
+}
+
+JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    ios_t buf;
+    ios_mem(&buf, IOS_INLSIZE);
+    buf.growable = 0; // Restrict to inline buffer to avoid allocation
+
+    jl_static_show((JL_STREAM*)&buf, v);
+    if (buf.size == buf.maxsize)
+        memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
+
+    TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size);
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    jl_module_t *root = jl_module_root(m);
+    if (root == m || root == jl_main_module) {
+        const char *module_name = jl_symbol_name(m->name);
+        TracyCZoneText(*(cur_block->tracy_ctx), module_name, strlen(module_name));
+    } else {
+        jl_timing_printf(cur_block, "%s.%s", jl_symbol_name(root->name), jl_symbol_name(m->name));
+    }
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    const char *filename = gnu_basename(path);
+    TracyCZoneText(*(cur_block->tracy_ctx), filename, strlen(filename));
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block)
+{
+    jl_timing_show_func_sig(mi->specTypes, cur_block);
+    jl_method_t *def = mi->def.method;
+    jl_timing_printf(cur_block, "%s:%d in %s",
+                     gnu_basename(jl_symbol_name(def->file)),
+                     def->line,
+                     jl_symbol_name(def->module->name));
+}
+
+JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block)
+{
+    jl_timing_show((jl_value_t *)method, cur_block);
+    jl_timing_printf(cur_block, "%s:%d in %s",
+                    gnu_basename(jl_symbol_name(method->file)),
+                    method->line,
+                    jl_symbol_name(method->module->name));
+}
+
+JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    ios_t buf;
+    ios_mem(&buf, IOS_INLSIZE);
+    buf.growable = 0; // Restrict to inline buffer to avoid allocation
+
+    jl_static_show_config_t config = { /* quiet */ 1 };
+    jl_static_show_func_sig_((JL_STREAM*)&buf, v, config);
+    if (buf.size == buf.maxsize)
+        memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
+
+    TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size);
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...)
+{
+    va_list args;
+    va_start(args, format);
+
+#ifdef USE_TRACY
+    ios_t buf;
+    ios_mem(&buf, IOS_INLSIZE);
+    buf.growable = 0; // Restrict to inline buffer to avoid allocation
+
+    jl_vprintf((JL_STREAM*)&buf, format, args);
+    if (buf.size == buf.maxsize)
+        memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
+
+    TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size);
+#endif
+    va_end(args);
+}
+
+JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str)
+{
+#ifdef USE_TRACY
+    TracyCZoneText(*(cur_block->tracy_ctx), str, strlen(str));
+#endif
+}
+
+void jl_timing_init_task(jl_task_t *t)
+{
+#ifdef USE_TRACY
+    jl_value_t *start_type = jl_typeof(t->start);
+    const char *start_name = "";
+    if (jl_is_datatype(start_type))
+        start_name = jl_symbol_name(((jl_datatype_t *) start_type)->name->name);
+
+    static uint16_t task_id = 1;
+
+    // XXX: Tracy uses this as a handle internally and requires that this
+    // string live forever, so this allocation is intentionally leaked.
+    char *fiber_name;
+    if (start_name[0] == '#') {
+        jl_method_instance_t *mi = jl_method_lookup(&t->start, 1, jl_get_world_counter());
+        const char *filename = gnu_basename(jl_symbol_name(mi->def.method->file));
+        const char *module_name = jl_symbol_name(mi->def.method->module->name);
+
+        // 26 characters in "Task 65535 (:0000000 in )\0"
+        size_t fiber_name_len = strlen(filename) + strlen(module_name) + 26;
+        fiber_name = (char *)malloc(fiber_name_len);
+        snprintf(fiber_name, fiber_name_len,  "Task %d (%s:%d in %s)",
+                 task_id++, filename, mi->def.method->line, module_name);
+    } else {
+
+        // 16 characters in "Task 65535 (\"\")\0"
+        size_t fiber_name_len = strlen(start_name) + 16;
+        fiber_name = (char *)malloc(fiber_name_len);
+        snprintf(fiber_name, fiber_name_len,  "Task %d (\"%s\")",
+                 task_id++, start_name);
+    }
+
+    t->name = fiber_name;
+#endif
+}
+
+JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled)
 {
-    _jl_timing_block_start(cur_block, cycleclock());
+    for (int i = 0; i < JL_TIMING_LAST; i++) {
+        if (strcmp(subsystem, jl_timing_names[i]) == 0) {
+            uint64_t subsystem_bit = (1ul << i);
+            if (enabled) {
+                jl_timing_enable_mask |= subsystem_bit;
+            } else {
+                jl_timing_enable_mask &= ~subsystem_bit;
+            }
+            return 0;
+        }
+    }
+    return -1;
 }
 
-void jl_timing_block_stop(jl_timing_block_t *cur_block)
+static void jl_timing_set_enable_from_env(void)
 {
-    _jl_timing_block_stop(cur_block, cycleclock());
+    const char *env = getenv("JULIA_TIMING_SUBSYSTEMS");
+    if (!env)
+        return;
+
+    // Copy `env`, so that we can modify it
+    size_t sz = strlen(env) + 1;
+    char *env_copy = (char *)malloc(sz);
+    memcpy(env_copy, env, sz);
+
+    char *subsystem = env_copy;
+    char *ch = subsystem;
+    uint8_t enable = 1;
+    while (1) {
+        // +SUBSYSTEM means enable, -SUBSYSTEM means disable
+        if (*subsystem == '+' || *subsystem == '-')
+            enable = (*subsystem++ == '+');
+
+        if (*ch == ',') {
+            *ch++ = '\0';
+            if ((*subsystem != '\0') && jl_timing_set_enable(subsystem, enable))
+                fprintf(stderr, "warning: unable to configure timing for non-existent subsystem \"%s\"\n", subsystem);
+
+            subsystem = ch;
+            enable = 1;
+        }
+        else if (*ch == '\0') {
+            if ((*subsystem != '\0') && jl_timing_set_enable(subsystem, enable))
+                fprintf(stderr, "warning: unable to configure timing for non-existent subsystem \"%s\"\n", subsystem);
+
+            break;
+        }
+        else ch++;
+    }
+    free(env_copy);
+}
+
+static void jl_timing_set_print_limit_from_env(void)
+{
+    const char *const env = getenv("JULIA_TIMING_METADATA_PRINT_LIMIT");
+    if (!env)
+        return;
+
+    char *endp;
+    long value = strtol(env, &endp, 10);
+    if (*endp == '\0' && value >= 0 && value <= UINT32_MAX)
+        jl_timing_print_limit = (uint32_t)value;
+}
+
+void jl_timing_apply_env(void)
+{
+    // JULIA_TIMING_SUBSYSTEMS
+    jl_timing_set_enable_from_env();
+
+    // JULIA_TIMING_METADATA_PRINT_LIMIT
+    jl_timing_set_print_limit_from_env();
 }
 
 #else
 
 void jl_init_timing(void) { }
 void jl_destroy_timing(void) { }
+JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { return -1; }
+JL_DLLEXPORT uint32_t jl_timing_print_limit = 0;
 
 #endif
 
diff --git a/src/timing.h b/src/timing.h
index 70f34fa89f543..73614864ea733 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -3,91 +3,289 @@
 #ifndef JL_TIMING_H
 #define JL_TIMING_H
 
+#include "julia.h"
+
+static inline const char *gnu_basename(const char *path)
+{
+    const char *base = strrchr(path, '/');
+#ifdef _WIN32
+    const char *backslash = strrchr(path, '\\');
+    if (backslash > base)
+        base = backslash;
+#endif
+    return base ? base+1 : path;
+}
+
+#ifdef USE_TRACY
+typedef struct {
+    _Atomic(int64_t) val;
+    char* name;
+} jl_tracy_counter_t;
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
+
 void jl_init_timing(void);
 void jl_destroy_timing(void) JL_NOTSAFEPOINT;
+
+// Update the enable bit-mask to enable/disable tracing events for
+// the subsystem in `jl_timing_names` matching the provided string.
+//
+// Returns -1 if no matching sub-system was found.
+JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled);
+
+// Check for environment vars "JULIA_TIMING_METADATA_PRINT_LIMIT" and
+// "JULIA_TIMING_SUBSYSTEMS" and if present apply these to the metadata
+// print limit and the timings enable mask, respectively.
+//
+// For example, to enable INFERENCE and METHOD_MATCH and disable GC:
+//     JULIA_TIMING_SUBSYSTEMS="+INFERENCE,-GC,+METHOD_MATCH"
+//
+// For example, to increase the metadata item print limit from 10 to 20:
+//     JULIA_TIMING_METADATA_PRINT_LIMIT=20
+void jl_timing_apply_env(void);
+
+// Configurable item limit, runtime code should use this to limit printing
+// when adding potentially many items of metadata to a single timing zone.
+extern JL_DLLEXPORT uint32_t jl_timing_print_limit;
+
 #ifdef __cplusplus
 }
 #endif
 
-#ifndef ENABLE_TIMINGS
-#define JL_TIMING(owner)
+#ifdef __cplusplus
+#define HAVE_TIMING_SUPPORT
+#elif defined(_COMPILER_CLANG_)
+#define HAVE_TIMING_SUPPORT
+#elif defined(_COMPILER_GCC_)
+#define HAVE_TIMING_SUPPORT
+#endif
+
+#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_TIMING_COUNTS )
+#define ENABLE_TIMINGS
+#endif
+
+#if !defined( ENABLE_TIMINGS ) || !defined( HAVE_TIMING_SUPPORT )
+
+#define JL_TIMING(subsystem, event)
+#define JL_TIMING_SUSPEND(subsystem, ct)
+
+#define jl_timing_show(v, b)
+#define jl_timing_show_module(m, b)
+#define jl_timing_show_filename(f, b)
+#define jl_timing_show_method_instance(mi, b)
+#define jl_timing_show_method(mi, b)
+#define jl_timing_show_func_sig(tt, b)
+#define jl_timing_printf(b, f, ...)
+#define jl_timing_puts(b, s)
+#define jl_timing_init_task(t)
+#define jl_timing_block_enter_task(ct, ptls, blk)
+#define jl_timing_block_exit_task(ct, ptls) ((jl_timing_block_t *)NULL)
+#define jl_pop_timing_block(blk)
+
+#define jl_timing_counter_inc(counter, value)
+#define jl_timing_counter_dec(counter, value)
+
+#define jl_profile_lock_init(lock, name)
+#define jl_profile_lock_start_wait(lock)
+#define jl_profile_lock_acquired(lock)
+#define jl_profile_lock_release_start(lock)
+#define jl_profile_lock_release_end(lock)
+
 #else
 
 #include "julia_assert.h"
+#ifdef USE_TRACY
+#include "tracy/TracyC.h"
+#endif
+
+#ifdef USE_ITTAPI
+#include <ittapi/ittnotify.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 void jl_print_timings(void);
 jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block);
-void jl_timing_block_start(jl_timing_block_t *cur_block);
-void jl_timing_block_stop(jl_timing_block_t *cur_block);
+
+void jl_timing_init_task(jl_task_t *t);
+void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk);
+jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls);
+
+// Add the output of `jl_static_show(x)` as a text annotation to the
+// profiling region corresponding to `cur_block`.
+//
+// If larger than IOS_INLSIZE (~80 characters), text is truncated.
+JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
+JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
 #ifdef __cplusplus
 }
 #endif
 
 #ifdef __cplusplus
-#define HAVE_TIMING_SUPPORT
-#elif defined(_COMPILER_CLANG_)
-#define HAVE_TIMING_SUPPORT
-#elif defined(_COMPILER_GCC_)
-#define HAVE_TIMING_SUPPORT
+#define JL_TIMING_CURRENT_BLOCK (&__timing_block.block)
+#else
+#define JL_TIMING_CURRENT_BLOCK (&__timing_block)
 #endif
 
-#ifndef HAVE_TIMING_SUPPORT
-#define JL_TIMING(owner)
-#else
+#define JL_TIMING_OWNERS         \
+        X(ROOT)                  \
+        X(GC)                    \
+        X(LOWERING)              \
+        X(PARSING)               \
+        X(INFERENCE)             \
+        X(CODEGEN)               \
+        X(METHOD_LOOKUP_SLOW)    \
+        X(METHOD_LOOKUP_FAST)    \
+        X(CODEINST_COMPILE)      \
+        X(LLVM_OPT)              \
+        X(LLVM_ORC)              \
+        X(METHOD_MATCH)          \
+        X(TYPE_CACHE_LOOKUP)     \
+        X(TYPE_CACHE_INSERT)     \
+        X(STAGED_FUNCTION)       \
+        X(MACRO_INVOCATION)      \
+        X(AST_COMPRESS)          \
+        X(AST_UNCOMPRESS)        \
+        X(SYSIMG_DUMP)           \
+        X(NATIVE_AOT)            \
+        X(ADD_METHOD)            \
+        X(LOAD_MODULE)           \
+        X(LOAD_IMAGE)            \
+        X(VERIFY_IMAGE)          \
+        X(SAVE_MODULE)           \
+        X(INIT_MODULE)           \
+        X(LOCK_SPIN)             \
+        X(STACKWALK)             \
+        X(DL_OPEN)               \
+        X(JULIA_INIT)            \
+
+
+#define JL_TIMING_EVENTS \
+        JL_TIMING_OWNERS \
+        X(GC_Stop) \
+        X(GC_Mark) \
+        X(GC_Sweep) \
+        X(GC_Finalizers) \
+        X(CODEGEN_LLVM) \
+        X(CODEGEN_Codeinst) \
+        X(CODEGEN_Workqueue) \
+        X(LOAD_Sysimg) \
+        X(LOAD_Pkgimg) \
+        X(LOAD_Processor) \
+        X(VERIFY_Edges) \
+        X(VERIFY_Methods) \
+        X(VERIFY_Graph) \
+        X(STACKWALK_Backtrace) \
+        X(STACKWALK_Excstack) \
+        X(NATIVE_Dump) \
+        X(NATIVE_Create) \
+
+
+#define JL_TIMING_COUNTERS \
+        X(Invalidations) \
+        X(HeapSize) \
+        X(JITSize) \
+        X(JITCodeSize) \
+        X(JITDataSize) \
+        X(ImageSize) \
 
-#define JL_TIMING_OWNERS          \
-        X(ROOT),                  \
-        X(GC),                    \
-        X(LOWERING),              \
-        X(PARSING),               \
-        X(INFERENCE),             \
-        X(CODEGEN),               \
-        X(METHOD_LOOKUP_SLOW),    \
-        X(METHOD_LOOKUP_FAST),    \
-        X(LLVM_OPT),              \
-        X(LLVM_MODULE_FINISH),    \
-        X(METHOD_MATCH),          \
-        X(TYPE_CACHE_LOOKUP),     \
-        X(TYPE_CACHE_INSERT),     \
-        X(STAGED_FUNCTION),       \
-        X(MACRO_INVOCATION),      \
-        X(AST_COMPRESS),          \
-        X(AST_UNCOMPRESS),        \
-        X(SYSIMG_LOAD),           \
-        X(SYSIMG_DUMP),           \
-        X(NATIVE_DUMP),           \
-        X(ADD_METHOD),            \
-        X(LOAD_MODULE),           \
-        X(SAVE_MODULE),           \
-        X(INIT_MODULE),
 
 enum jl_timing_owners {
-#define X(name) JL_TIMING_ ## name
+#define X(name) JL_TIMING_ ## name,
     JL_TIMING_OWNERS
 #undef X
     JL_TIMING_LAST
 };
 
-extern uint64_t jl_timing_data[(int)JL_TIMING_LAST];
-extern const char *jl_timing_names[(int)JL_TIMING_LAST];
+enum jl_timing_events {
+#define X(name) JL_TIMING_EVENT_ ## name,
+    JL_TIMING_EVENTS
+#undef X
+    JL_TIMING_EVENT_LAST
+};
 
-struct _jl_timing_block_t { // typedef in julia.h
-    jl_timing_block_t *prev;
+enum jl_timing_counter_types {
+#define X(name) JL_TIMING_COUNTER_ ## name,
+    JL_TIMING_COUNTERS
+#undef X
+    JL_TIMING_COUNTER_LAST
+};
+
+/**
+ * Timing back-ends differ in terms of whether they support nested
+ * and asynchronous events.
+ **/
+
+/**
+ * Timing Backend: Aggregated timing counts (implemented in timing.c)
+ **/
+
+#ifdef USE_TIMING_COUNTS
+#define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx;
+#define _COUNTS_CTOR(block, owner) _jl_timing_counts_ctor(block, owner)
+#define _COUNTS_DESTROY(block) _jl_timing_counts_destroy(block)
+#define _COUNTS_START(block, t) _jl_timing_counts_start(block, t)
+#define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t)
+#else
+#define _COUNTS_CTX_MEMBER
+#define _COUNTS_CTOR(block, owner)
+#define _COUNTS_DESTROY(block)
+#define _COUNTS_START(block, t)
+#define _COUNTS_STOP(block, t)
+#endif
+
+/**
+ * Timing Backend: Tracy
+ **/
+
+#ifdef USE_TRACY
+#define _TRACY_CTX_MEMBER TracyCZoneCtx *tracy_ctx;
+#define _TRACY_CTOR(context, name, enable) TracyCZoneN(__tracy_ctx, name, (enable)); \
+                                           (context) = &__tracy_ctx
+#define _TRACY_DESTROY(ctx) TracyCZoneEnd(*ctx)
+#else
+#define _TRACY_CTX_MEMBER
+#define _TRACY_CTOR(context, name, enable)
+#define _TRACY_DESTROY(block)
+#endif
+
+#ifdef USE_ITTAPI
+#define _ITTAPI_CTX_MEMBER int owner; int event;
+#define _ITTAPI_CTOR(block, owner, event) block->owner = owner; block->event = event
+#define _ITTAPI_START(block) if (_jl_timing_enabled(block->owner)) __itt_event_start(jl_timing_ittapi_events[block->event])
+#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->owner)) __itt_event_end(jl_timing_ittapi_events[block->event])
+#else
+#define _ITTAPI_CTX_MEMBER
+#define _ITTAPI_CTOR(block, owner, event)
+#define _ITTAPI_START(block)
+#define _ITTAPI_STOP(block)
+#endif
+
+/**
+ * Implementation: Aggregated counts back-end
+ **/
+
+extern JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST];
+typedef struct _jl_timing_counts_t {
     uint64_t total;
     uint64_t t0;
     int owner;
 #ifdef JL_DEBUG_BUILD
     uint8_t running;
 #endif
-};
+} jl_timing_counts_t;
 
-STATIC_INLINE void _jl_timing_block_stop(jl_timing_block_t *block, uint64_t t) JL_NOTSAFEPOINT {
+STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT {
 #ifdef JL_DEBUG_BUILD
     assert(block->running);
     block->running = 0;
@@ -95,7 +293,7 @@ STATIC_INLINE void _jl_timing_block_stop(jl_timing_block_t *block, uint64_t t) J
     block->total += t - block->t0;
 }
 
-STATIC_INLINE void _jl_timing_block_start(jl_timing_block_t *block, uint64_t t) JL_NOTSAFEPOINT {
+STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT {
 #ifdef JL_DEBUG_BUILD
     assert(!block->running);
     block->running = 1;
@@ -103,44 +301,94 @@ STATIC_INLINE void _jl_timing_block_start(jl_timing_block_t *block, uint64_t t)
     block->t0 = t;
 }
 
-STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner) JL_NOTSAFEPOINT {
-    uint64_t t = cycleclock();
+STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block, int owner) JL_NOTSAFEPOINT {
     block->owner = owner;
     block->total = 0;
 #ifdef JL_DEBUG_BUILD
     block->running = 0;
 #endif
-    _jl_timing_block_start(block, t);
-    return t;
 }
 
-STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) JL_NOTSAFEPOINT {
-    uint64_t t = _jl_timing_block_init(block, owner);
+STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block) JL_NOTSAFEPOINT {
+    jl_timing_counts[block->owner] += block->total;
+}
+
+/**
+ * Top-level jl_timing implementation
+ **/
+
+extern JL_DLLEXPORT uint64_t jl_timing_enable_mask;
+extern const char *jl_timing_names[(int)JL_TIMING_LAST];
+#ifdef USE_ITTAPI
+extern JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST];
+#endif
+
+struct _jl_timing_block_t { // typedef in julia.h
+    struct _jl_timing_block_t *prev;
+    _TRACY_CTX_MEMBER
+    _ITTAPI_CTX_MEMBER
+    _COUNTS_CTX_MEMBER
+};
+
+STATIC_INLINE int _jl_timing_enabled(int event) JL_NOTSAFEPOINT {
+    return !!(jl_timing_enable_mask & (1 << event));
+}
+
+STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, int event) JL_NOTSAFEPOINT {
+    uint64_t t = cycleclock(); (void)t;
+    _COUNTS_CTOR(&block->counts_ctx, owner);
+    _COUNTS_START(&block->counts_ctx, t);
+    _ITTAPI_CTOR(block, owner, event);
+    _ITTAPI_START(block);
+
     jl_task_t *ct = jl_current_task;
     jl_timing_block_t **prevp = &ct->ptls->timing_stack;
     block->prev = *prevp;
-    if (block->prev)
-        _jl_timing_block_stop(block->prev, t);
+    if (block->prev) {
+        _COUNTS_STOP(&block->prev->counts_ctx, t);
+    }
     *prevp = block;
 }
 
 STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT {
-    uint64_t t = cycleclock();
+    uint64_t t = cycleclock(); (void)t;
+
+    _ITTAPI_STOP(block);
+    _COUNTS_STOP(&block->counts_ctx, t);
+    _COUNTS_DESTROY(&block->counts_ctx);
+    _TRACY_DESTROY(block->tracy_ctx);
+
     jl_task_t *ct = jl_current_task;
-    _jl_timing_block_stop(block, t);
-    jl_timing_data[block->owner] += block->total;
     jl_timing_block_t **pcur = &ct->ptls->timing_stack;
     assert(*pcur == block);
     *pcur = block->prev;
-    if (block->prev)
-        _jl_timing_block_start(block->prev, t);
+    if (block->prev) {
+        _COUNTS_START(&block->prev->counts_ctx, t);
+    }
+}
+
+typedef struct _jl_timing_suspend_t {
+    jl_task_t *ct;
+} jl_timing_suspend_t;
+
+STATIC_INLINE void _jl_timing_suspend_ctor(jl_timing_suspend_t *suspend, const char *subsystem, jl_task_t *ct) JL_NOTSAFEPOINT {
+    suspend->ct = ct;
+#ifdef USE_TRACY
+    TracyCFiberEnter(subsystem);
+#endif
+}
+
+STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_NOTSAFEPOINT {
+#ifdef USE_TRACY
+    TracyCFiberEnter(suspend->ct->name);
+#endif
 }
 
 #ifdef __cplusplus
 struct jl_timing_block_cpp_t {
     jl_timing_block_t block;
-    jl_timing_block_cpp_t(int owner) JL_NOTSAFEPOINT {
-        _jl_timing_block_ctor(&block, owner);
+    jl_timing_block_cpp_t(int owner, int event) JL_NOTSAFEPOINT {
+        _jl_timing_block_ctor(&block, owner, event);
     }
     ~jl_timing_block_cpp_t() JL_NOTSAFEPOINT {
         _jl_timing_block_destroy(&block);
@@ -150,15 +398,116 @@ struct jl_timing_block_cpp_t {
     jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &) = delete;
     jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &&) = delete;
 };
-#define JL_TIMING(owner) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## owner)
+#define JL_TIMING(subsystem, event) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \
+    _TRACY_CTOR(__timing_block.block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1)
 #else
-#define JL_TIMING(owner) \
+#define JL_TIMING(subsystem, event) \
     __attribute__((cleanup(_jl_timing_block_destroy))) \
     jl_timing_block_t __timing_block; \
-    _jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## owner)
+    _jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \
+    _TRACY_CTOR(__timing_block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1)
+#endif
+
+#ifdef __cplusplus
+struct jl_timing_suspend_cpp_t {
+    jl_timing_suspend_t suspend;
+    jl_timing_suspend_cpp_t(const char *subsystem, jl_task_t *ct) JL_NOTSAFEPOINT {
+        _jl_timing_suspend_ctor(&suspend, subsystem, ct);
+    }
+    ~jl_timing_suspend_cpp_t() JL_NOTSAFEPOINT {
+        _jl_timing_suspend_destroy(&suspend);
+    }
+    jl_timing_suspend_cpp_t(const jl_timing_suspend_cpp_t &) = delete;
+    jl_timing_suspend_cpp_t(jl_timing_suspend_cpp_t &&) = delete;
+    jl_timing_suspend_cpp_t& operator=(const jl_timing_suspend_cpp_t &) = delete;
+    jl_timing_suspend_cpp_t& operator=(jl_timing_suspend_cpp_t &&) = delete;
+};
+#define JL_TIMING_SUSPEND(subsystem, ct) jl_timing_suspend_cpp_t __suspend_block(#subsystem, ct)
+#else
+#define JL_TIMING_SUSPEND(subsystem, ct) \
+    __attribute__((cleanup(_jl_timing_suspend_destroy))) \
+    jl_timing_suspend_t __timing_suspend; \
+    _jl_timing_suspend_ctor(&__timing_suspend, #subsystem, ct)
+#endif
+
+// Counting
+#ifdef USE_ITTAPI
+#define _ITTAPI_COUNTER_MEMBER __itt_counter ittapi_counter;
+#else
+#define _ITTAPI_COUNTER_MEMBER
+#endif
+
+#ifdef USE_TRACY
+# define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
+# else
+# define _TRACY_COUNTER_MEMBER
+#endif
+
+#ifdef USE_TIMING_COUNTS
+#define _COUNTS_MEMBER _Atomic(uint64_t) basic_counter;
+#else
+#define _COUNTS_MEMBER
+#endif
+
+typedef struct {
+    _ITTAPI_COUNTER_MEMBER
+    _TRACY_COUNTER_MEMBER
+    _COUNTS_MEMBER
+} jl_timing_counter_t;
+
+JL_DLLEXPORT extern jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
+
+static inline void jl_timing_counter_inc(int counter, uint64_t val) JL_NOTSAFEPOINT {
+#ifdef USE_ITTAPI
+    __itt_counter_inc_delta(jl_timing_counters[counter].ittapi_counter, val);
+#endif
+#ifdef USE_TRACY
+    jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
+    uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, val);
+    TracyCPlotI(tracy_counter->name, oldval + val);
 #endif
+#ifdef USE_TIMING_COUNTS
+    jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, val);
+#endif
+}
+
+static inline void jl_timing_counter_dec(int counter, uint64_t val) JL_NOTSAFEPOINT {
+#ifdef USE_ITTAPI
+    __itt_counter_dec_delta(jl_timing_counters[counter].ittapi_counter, val);
+#endif
+#ifdef USE_TRACY
+    jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
+    uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, -val);
+    TracyCPlotI(tracy_counter->name, oldval - val);
+#endif
+#ifdef USE_TIMING_COUNTS
+    jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, -(int64_t)val);
+#endif
+}
 
+// Locking profiling
+static inline void jl_profile_lock_init(jl_mutex_t *lock, const char *name) {
+#ifdef USE_ITTAPI
+    __itt_sync_create(lock, "jl_mutex_t", name, __itt_attr_mutex);
 #endif
+}
+static inline void jl_profile_lock_start_wait(jl_mutex_t *lock) {
+#ifdef USE_ITTAPI
+    __itt_sync_prepare(lock);
+#endif
+}
+static inline void jl_profile_lock_acquired(jl_mutex_t *lock) {
+#ifdef USE_ITTAPI
+    __itt_sync_acquired(lock);
+#endif
+}
+static inline void jl_profile_lock_release_start(jl_mutex_t *lock) {
+#ifdef USE_ITTAPI
+    __itt_sync_releasing(lock);
+#endif
+}
+static inline void jl_profile_lock_release_end(jl_mutex_t *lock) {}
+
 #endif
 
 #endif
diff --git a/src/toplevel.c b/src/toplevel.c
index 6f29c0a82d617..200d0ad220231 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -64,7 +64,8 @@ static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROO
 
 void jl_module_run_initializer(jl_module_t *m)
 {
-    JL_TIMING(INIT_MODULE);
+    JL_TIMING(INIT_MODULE, INIT_MODULE);
+    jl_timing_show_module(m, JL_TIMING_CURRENT_BLOCK);
     jl_function_t *f = jl_module_get_initializer(m);
     if (f == NULL)
         return;
@@ -412,7 +413,7 @@ static void expr_attributes(jl_value_t *v, int *has_ccall, int *has_defs, int *h
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile)
 {
     jl_array_t *body = src->code;
-    assert(jl_typeis(body, jl_array_any_type));
+    assert(jl_typetagis(body, jl_array_any_type));
     size_t i;
     int has_ccall = 0, has_defs = 0, has_opaque = 0;
     if (include_force_compile && jl_has_meta(body, jl_force_compile_sym))
@@ -874,7 +875,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
     int has_ccall = 0, has_defs = 0, has_loops = 0, has_opaque = 0, forced_compile = 0;
     assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
-    if (!jl_is_code_info(thk) || !jl_typeis(thk->code, jl_array_any_type)) {
+    if (!jl_is_code_info(thk) || !jl_typetagis(thk->code, jl_array_any_type)) {
         jl_eval_errorf(m, "malformed \"thunk\" statement");
     }
     body_attributes((jl_array_t*)thk->code, &has_ccall, &has_defs, &has_loops, &has_opaque, &forced_compile);
diff --git a/src/typemap.c b/src/typemap.c
index 3afa1ffc1e212..1bdbe52a974dd 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -9,7 +9,7 @@
 #endif
 #include "julia_assert.h"
 
-#define MAX_METHLIST_COUNT 12 // this can strongly affect the sysimg size and speed!
+#define MAX_METHLIST_COUNT 6 // this helps configure the sysimg size and speed.
 
 #ifdef __cplusplus
 extern "C" {
@@ -23,7 +23,7 @@ static int jl_is_any(jl_value_t *t1)
     return t1 == (jl_value_t*)jl_any_type;
 }
 
-static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT)
+static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
     if (jl_is_unionall(t1))
         t1 = jl_unwrap_unionall(t1);
@@ -33,6 +33,9 @@ static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT)
     else if (jl_is_typevar(t1)) {
         return jl_type_extract_name(((jl_tvar_t*)t1)->ub);
     }
+    else if (t1 == jl_bottom_type || t1 == (jl_value_t*)jl_typeofbottom_type || t1 == (jl_value_t*)jl_typeofbottom_type->super) {
+        return (jl_value_t*)jl_typeofbottom_type->name; // put Union{} and typeof(Union{}) and Type{Union{}} together for convenience
+    }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
         if (!jl_is_kind(t1))
@@ -63,6 +66,9 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
     else if (jl_is_typevar(t1)) {
         return jl_type_extract_name_precise(((jl_tvar_t*)t1)->ub, 0);
     }
+    else if (t1 == jl_bottom_type || t1 == (jl_value_t*)jl_typeofbottom_type || t1 == (jl_value_t*)jl_typeofbottom_type->super) {
+        return 1;
+    }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
         if ((invariant || !dt->name->abstract) && !jl_is_kind(t1))
@@ -84,6 +90,18 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
     return 1;
 }
 
+// return whether Type{Union{}} is a subtype of Type{t1} (which may have free typevars)
+static int jl_parameter_includes_bottom(jl_value_t *t1)
+{
+    if (jl_is_typevar(t1) || t1 == jl_bottom_type)
+        return 1;
+    else if (jl_is_uniontype(t1)) {
+        jl_uniontype_t *u1 = (jl_uniontype_t*)t1;
+        return jl_parameter_includes_bottom(u1->a) && jl_parameter_includes_bottom(u1->b);
+    }
+    return 0;
+}
+
 
 // ----- Type Signature Subtype Testing ----- //
 
@@ -249,7 +267,7 @@ static inline int sig_match_simple(jl_value_t *arg1, jl_value_t **args, size_t n
 
 // predicate to fast-test if this type is a leaf type that can exist in the cache
 // and does not need a more expensive linear scan to find all intersections
-// be careful not to put non-leaf types or DataType/UnionAll/Union in the
+// we try not to put non-leaf types or DataType/UnionAll/Union in the
 // argument cache, since they should have a lower priority and so will go in some
 // later list
 static int is_cache_leaf(jl_value_t *ty, int tparam)
@@ -259,11 +277,11 @@ static int is_cache_leaf(jl_value_t *ty, int tparam)
     return (jl_is_concrete_type(ty) && (tparam || !jl_is_kind(ty)));
 }
 
-static _Atomic(jl_typemap_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static _Atomic(jl_value_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
     if (cache == (jl_array_t*)jl_an_empty_vec_any)
         return NULL;
-    _Atomic(jl_typemap_t*) *pml = jl_table_peek_bp(cache, ty);
+    _Atomic(jl_value_t*) *pml = jl_table_peek_bp(cache, ty);
     JL_GC_PROMISE_ROOTED(pml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return pml;
 }
@@ -275,13 +293,15 @@ static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent,
     if (a == (jl_array_t*)jl_an_empty_vec_any) {
         a = jl_alloc_vec_any(16);
         jl_atomic_store_release(cache, a);
-        jl_gc_wb(parent, a);
+        if (parent)
+            jl_gc_wb(parent, a);
     }
     a = jl_eqtable_put(a, key, val, &inserted);
     assert(inserted);
     if (a != jl_atomic_load_relaxed(cache)) {
         jl_atomic_store_release(cache, a);
-        jl_gc_wb(parent, a);
+        if (parent)
+            jl_gc_wb(parent, a);
     }
 }
 
@@ -302,8 +322,16 @@ static int jl_typemap_array_visitor(jl_array_t *a, jl_typemap_visitor_fptr fptr,
     for (i = 1; i < l; i += 2) {
         jl_value_t *d = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(d);
-        if (d && !jl_typemap_visitor(d, fptr, closure))
-            return 0;
+        if (d == NULL)
+            continue;
+        if (jl_is_array(d)) {
+            if (!jl_typemap_array_visitor((jl_array_t*)d, fptr, closure))
+                return 0;
+        }
+        else {
+            if (!jl_typemap_visitor(d, fptr, closure))
+                return 0;
+        }
     }
     return 1;
 }
@@ -357,7 +385,7 @@ int jl_typemap_visitor(jl_typemap_t *cache, jl_typemap_visitor_fptr fptr, void *
     }
 }
 
-static unsigned jl_supertype_height(jl_datatype_t *dt)
+static unsigned jl_supertype_height(jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     unsigned height = 1;
     while (dt != jl_any_type) {
@@ -368,8 +396,10 @@ static unsigned jl_supertype_height(jl_datatype_t *dt)
 }
 
 // return true if a and b might intersect in the type domain (over just their type-names)
-static int tname_intersection(jl_datatype_t *a, jl_typename_t *bname, unsigned ha)
+static int tname_intersection_dt(jl_datatype_t *a, jl_typename_t *bname, unsigned ha) JL_NOTSAFEPOINT
 {
+    if (a == jl_any_type)
+        return 1;
     jl_datatype_t *b = (jl_datatype_t*)jl_unwrap_unionall(bname->wrapper);
     unsigned hb = 1;
     while (b != jl_any_type) {
@@ -385,15 +415,70 @@ static int tname_intersection(jl_datatype_t *a, jl_typename_t *bname, unsigned h
     return a->name == bname;
 }
 
-// tparam bit 1 is ::Type{T} (vs. T)
-// tparam bit 2 is typename(T) (vs. T)
-static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty, int tparam,
-                                                 int offs, struct typemap_intersection_env *closure)
+static int tname_intersection(jl_value_t *a, jl_typename_t *bname, int8_t tparam) JL_NOTSAFEPOINT
+{
+    if (a == (jl_value_t*)jl_any_type)
+        return 1;
+    a = jl_unwrap_unionall(a);
+    assert(!jl_is_vararg(a));
+    if (jl_is_uniontype(a))
+        return tname_intersection(((jl_uniontype_t*)a)->a, bname, tparam) ||
+               tname_intersection(((jl_uniontype_t*)a)->b, bname, tparam);
+    if (jl_is_typevar(a))
+        return tname_intersection(((jl_tvar_t*)a)->ub, bname, tparam);
+    if (jl_is_datatype(a)) {
+        if (tparam) {
+            if (!jl_is_type_type(a))
+                return 0;
+            a = jl_unwrap_unionall(jl_tparam0(a));
+            if (!jl_is_datatype(a))
+                return tname_intersection(a, bname, 0);
+        }
+        return tname_intersection_dt((jl_datatype_t*)a, bname, jl_supertype_height((jl_datatype_t*)a));
+    }
+    return 0;
+}
+
+static int concrete_intersects(jl_value_t *t, jl_value_t *ty, int8_t tparam)
+{
+    if (ty == (jl_value_t*)jl_any_type) // easy case: Any always matches
+        return 1;
+    if (tparam & 1)
+        return jl_isa(t, ty); // (Type{t} <: ty), where is_leaf_type(t) => isa(t, ty)
+    else
+        return t == ty || jl_subtype(t, ty);
+}
+
+// tparam bit 0 is ::Type{T} (vs. T)
+// tparam bit 1 is typename(T) (vs. T)
+static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty, int8_t tparam,
+                                                 int8_t offs, struct typemap_intersection_env *closure)
 {
     JL_GC_PUSH1(&a);
     size_t i, l = jl_array_len(a);
     _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
-    unsigned height = tparam & 2 ? jl_supertype_height((jl_datatype_t*)ty) : 0;
+    unsigned height = 0;
+    jl_datatype_t *tydt = jl_any_type;
+    if (tparam & 2) {
+        // try to extract a description of ty for intersections, but since we
+        jl_value_t *ttype = jl_unwrap_unionall(ty);
+        if (tparam & 1)
+            // extract T from Type{T} (if possible)
+            ttype = jl_is_type_type(ttype) ? jl_tparam0(ttype) : NULL;
+        if (ttype && jl_is_datatype(ttype)) {
+            tydt = (jl_datatype_t*)ttype;
+        }
+        else if (ttype) {
+            ttype = jl_type_extract_name(ttype);
+            tydt = ttype ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)ttype)->wrapper) : NULL;
+        }
+        if (tydt == jl_any_type)
+            ty = (jl_value_t*)jl_any_type;
+        else if (tydt == NULL)
+            tydt = jl_any_type;
+        else
+            height = jl_supertype_height(tydt);
+    }
     for (i = 0; i < l; i += 2) {
         jl_value_t *t = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(t);
@@ -402,18 +487,24 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
         if (tparam & 2) {
             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i + 1]);
             JL_GC_PROMISE_ROOTED(ml);
-            if (ty == (jl_value_t*)jl_any_type || // easy case: Any always matches
-                tname_intersection((jl_datatype_t*)ty, (jl_typename_t*)t, height)) {
-                if (!jl_typemap_intersection_visitor(ml, offs + 1, closure))
-                    goto exit;
+            if (tydt == jl_any_type ?
+                    tname_intersection(ty, (jl_typename_t*)t, tparam & 1) :
+                    tname_intersection_dt(tydt, (jl_typename_t*)t, height)) {
+                if ((tparam & 1) && t == (jl_value_t*)jl_typeofbottom_type->name) // skip Type{Union{}} and Type{typeof(Union{})}, since the caller should have already handled those
+                    continue;
+                if (jl_is_array(ml)) {
+                    if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, tparam & ~2, offs, closure))
+                        goto exit;
+                }
+                else {
+                    if (!jl_typemap_intersection_visitor(ml, offs + 1, closure))
+                        goto exit;
+                }
             }
         }
         else {
-            // `t` is a leaftype, so intersection test becomes subtype
-            if (ty == (jl_value_t*)jl_any_type || // easy case: Any always matches
-                (tparam & 1
-                 ? (jl_typeof(t) == ty || jl_isa(t, ty)) // (Type{t} <: ty), where is_leaf_type(t) => isa(t, ty)
-                 : (t == ty || jl_subtype(t, ty)))) {
+            // `t` is a leaftype, so intersection test becomes subtype (after excluding kinds)
+            if (concrete_intersects(t, ty, tparam)) {
                 jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i + 1]);
                 JL_GC_PROMISE_ROOTED(ml);
                 // NOTE: ml might be NULL if we're racing with the thread that's inserting the item
@@ -430,6 +521,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
     return 0;
 }
 
+
 // calls fptr on each jl_typemap_entry_t in cache in sort order
 // for which type ∩ ml->type != Union{}, until fptr return false
 static int jl_typemap_intersection_node_visitor(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure)
@@ -438,38 +530,64 @@ static int jl_typemap_intersection_node_visitor(jl_typemap_entry_t *ml, struct t
     // mark this `register` because (for branch prediction)
     // that can be absolutely critical for speed
     register jl_typemap_intersection_visitor_fptr fptr = closure->fptr;
-    while (ml != (void*)jl_nothing) {
-        if (closure->type == (jl_value_t*)ml->sig) {
-            // fast-path for the intersection of a type with itself
-            if (closure->env)
-                closure->env = jl_outer_unionall_vars((jl_value_t*)ml->sig);
-            closure->ti = closure->type;
-            closure->issubty = 1;
-            if (!fptr(ml, closure))
-                return 0;
+    for (;  ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
+        if (closure->max_valid < ml->min_world)
+            continue;
+        if (closure->min_valid > ml->max_world)
+            continue;
+        jl_svec_t **penv = NULL;
+        if (closure->env) {
+            closure->env = jl_emptysvec;
+            penv = &closure->env;
         }
-        else {
-            jl_svec_t **penv = NULL;
-            if (closure->env) {
-                closure->env = jl_emptysvec;
-                penv = &closure->env;
-            }
-            closure->ti = jl_type_intersection_env_s(closure->type, (jl_value_t*)ml->sig, penv, &closure->issubty);
-            if (closure->ti != (jl_value_t*)jl_bottom_type) {
-                // In some corner cases type intersection is conservative and returns something
-                // for intersect(A, B) even though A is a dispatch tuple and !(A <: B).
-                // For dispatch purposes in such a case we know there's no match. This check
-                // fixes issue #30394.
-                if (closure->issubty || !jl_is_dispatch_tupletype(closure->type))
-                    if (!fptr(ml, closure))
-                        return 0;
-            }
+        closure->ti = jl_type_intersection_env_s(closure->type, (jl_value_t*)ml->sig, penv, &closure->issubty);
+        if (closure->ti != (jl_value_t*)jl_bottom_type) {
+            // In some corner cases type intersection is conservative and returns something
+            // for intersect(A, B) even though A is a dispatch tuple and !(A <: B).
+            // For dispatch purposes in such a case we know there's no match. This check
+            // fixes issue #30394.
+            if (closure->issubty || !jl_is_dispatch_tupletype(closure->type))
+                if (!fptr(ml, closure))
+                    return 0;
         }
-        ml = jl_atomic_load_relaxed(&ml->next);
     }
     return 1;
 }
 
+int jl_has_intersect_type_not_kind(jl_value_t *t);
+int jl_has_intersect_kind_not_type(jl_value_t *t);
+
+// if TypeVar tv is used covariantly, it cannot be Union{}
+int has_covariant_var(jl_datatype_t *ttypes, jl_tvar_t *tv)
+{
+    size_t i, l = jl_nparams(ttypes);
+    for (i = 0; i < l; i++)
+        if (jl_tparam(ttypes, i) == (jl_value_t*)tv)
+            return 1;
+    return 0;
+}
+
+void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure)
+{
+    // n.b. we could consider mt->max_args here too, so this optimization
+    //      usually works even if the user forgets the `slurp...` argument, but
+    //      there is discussion that parameter may be going away? (and it is
+    //      already not accurately up-to-date for all tables currently anyways)
+    if (closure->search_slurp && ml->va) {
+        jl_value_t *sig = jl_unwrap_unionall((jl_value_t*)ml->sig);
+        size_t nargs = jl_nparams(sig);
+        if (nargs > 1 && nargs - 1 == closure->search_slurp) {
+            jl_vararg_t *va = (jl_vararg_t*)jl_tparam(sig, nargs - 1);
+            assert(jl_is_vararg((jl_value_t*)va));
+            if (va->T == (jl_value_t*)jl_any_type && va->N == NULL) {
+                // instruct typemap it can set exclude_typeofbottom on parameter nargs
+                // since we found the necessary slurp argument
+                closure->search_slurp = 0;
+            }
+        }
+    }
+}
+
 int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                                     struct typemap_intersection_env *closure)
 {
@@ -478,13 +596,12 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
     //TODO: fast-path for leaf-type tuples?
     //if (ttypes->isdispatchtuple) {
     //    register jl_typemap_intersection_visitor_fptr fptr = closure->fptr;
-    //        struct jl_typemap_assoc search = {(jl_value_t*)closure->type, world, closure->env, 0, ~(size_t)0};
-    //        jl_typemap_entry_t *ml = jl_typemap_assoc_by_type(map, search, offs, /*subtype*/1);
-    //        if (ml) {
-    //            closure->env = search->env;
-    //            if (!fptr(ml, closure))
-    //                return 0;
-    //        }
+    //    struct jl_typemap_assoc search = {(jl_value_t*)closure->type, world, closure->env, 0, ~(size_t)0};
+    //    jl_typemap_entry_t *ml = jl_typemap_assoc_by_type(map, search, offs, /*subtype*/1);
+    //    if (ml) {
+    //        closure->env = search->env;
+    //        if (!fptr(ml, closure))
+    //            return 0;
     //    }
     //    return 1;
     //}
@@ -506,115 +623,186 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
         if (ty) {
             while (jl_is_typevar(ty))
                 ty = ((jl_tvar_t*)ty)->ub;
-            jl_value_t *typetype = jl_unwrap_unionall(ty);
-            typetype = jl_is_type_type(typetype) ? jl_tparam0(typetype) : NULL;
             // approxify the tparam until we have a valid type
-            if (jl_has_free_typevars(ty)) {
-                ty = jl_unwrap_unionall(ty);
-                if (jl_is_datatype(ty))
-                    ty = ((jl_datatype_t*)ty)->name->wrapper;
-                else
-                    ty = (jl_value_t*)jl_any_type;
-            }
+            if (jl_has_free_typevars(ty))
+                ty = jl_rewrap_unionall(ty, closure->type);
+            JL_GC_PUSH1(&ty);
             jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-            if (targ != (jl_array_t*)jl_an_empty_vec_any) {
-                if (typetype && !jl_has_free_typevars(typetype)) {
-                    if (is_cache_leaf(typetype, 1)) {
-                        // direct lookup of leaf types
-                        jl_typemap_t *ml = mtcache_hash_lookup(targ, typetype);
-                        if (ml != jl_nothing) {
-                            if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
+            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            int maybe_type = 0;
+            int maybe_kind = 0;
+            int exclude_typeofbottom = 0;
+            jl_value_t *typetype = NULL;
+            jl_value_t *name = NULL;
+            // pre-check: optimized pre-intersection test to see if `ty` could intersect with any Type or Kind
+            if (targ != (jl_array_t*)jl_an_empty_vec_any || tname != (jl_array_t*)jl_an_empty_vec_any) {
+                maybe_kind = jl_has_intersect_kind_not_type(ty);
+                maybe_type = maybe_kind || jl_has_intersect_type_not_kind(ty);
+                if (maybe_type && !maybe_kind) {
+                    typetype = jl_unwrap_unionall(ty);
+                    typetype = jl_is_type_type(typetype) ? jl_tparam0(typetype) : NULL;
+                    name = typetype ? jl_type_extract_name(typetype) : NULL;
+                    if (!typetype)
+                        exclude_typeofbottom = !jl_subtype((jl_value_t*)jl_typeofbottom_type, ty);
+                    else if (jl_is_typevar(typetype))
+                        exclude_typeofbottom = has_covariant_var((jl_datatype_t*)ttypes, (jl_tvar_t*)typetype);
+                    else
+                        exclude_typeofbottom = !jl_parameter_includes_bottom(typetype);
+                }
+            }
+            // First check for intersections with methods defined on Type{T}, where T was a concrete type
+            if (targ != (jl_array_t*)jl_an_empty_vec_any && maybe_type &&
+                    (!typetype || jl_has_free_typevars(typetype) || is_cache_leaf(typetype, 1))) { // otherwise cannot contain this particular kind, so don't bother with checking
+                if (!exclude_typeofbottom) {
+                    // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
+                    // otherwise the possibility of encountering `Type{Union{}}` in this intersection may
+                    // be forcing us to do some extra work here whenever we see a typevar, even though
+                    // the likelihood of that value actually occurring is frequently likely to be
+                    // zero (or result in an ambiguous match)
+                    targ = jl_atomic_load_relaxed(&cache->targ); // may be GC'd during type-intersection
+                    jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)jl_typeofbottom_type->name);
+                    if (ml != jl_nothing) {
+                        size_t search_slurp = closure->search_slurp;
+                        closure->search_slurp = offs + 1;
+                        if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) {
+                            closure->search_slurp = search_slurp;
+                            JL_GC_POP();
+                            return 0;
                         }
+                        if (closure->search_slurp == 0)
+                            exclude_typeofbottom = 1;
+                        closure->search_slurp = search_slurp;
                     }
                 }
-                else {
-                    // else an array scan is required to check subtypes
-                    // first, fast-path: optimized pre-intersection test to see if `ty` could intersect with any Type
-                    if (typetype || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
-                        targ = jl_atomic_load_relaxed(&cache->targ); // may be GC'd during type-intersection
-                        if (!jl_typemap_intersection_array_visitor(targ, ty, 1, offs, closure)) return 0;
+                if (name != (jl_value_t*)jl_typeofbottom_type->name) {
+                    targ = jl_atomic_load_relaxed(&cache->targ); // may be GC'd earlier
+                    if (exclude_typeofbottom && name && jl_type_extract_name_precise(typetype, 1)) {
+                        // attempt semi-direct lookup of types via their names
+                        // consider the type name first
+                        jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
+                        if (jl_is_array(ml)) {
+                            if (typetype && !jl_has_free_typevars(typetype)) {
+                                // direct lookup of leaf types
+                                if (is_cache_leaf(typetype, 1)) {
+                                    ml = mtcache_hash_lookup((jl_array_t*)ml, typetype);
+                                    if (ml != jl_nothing) {
+                                        if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                                    }
+                                }
+                            }
+                            else {
+                                if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, 1, offs, closure)) { JL_GC_POP(); return 0; }
+                            }
+                        }
+                        else if (ml != jl_nothing) {
+                            if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                        }
+                    }
+                    else {
+                        // else an array scan is required to consider all the possible subtypes
+                        if (!jl_typemap_intersection_array_visitor(targ, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
             }
             jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
             if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
                 if (is_cache_leaf(ty, 0)) {
+                    jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
                     // direct lookup of leaf types
-                    jl_typemap_t *ml = mtcache_hash_lookup(cachearg1, ty);
+                    jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
+                    if (jl_is_array(ml))
+                        ml = mtcache_hash_lookup((jl_array_t*)ml, ty);
                     if (ml != jl_nothing) {
-                        if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
+                        if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    if (!jl_typemap_intersection_array_visitor(cachearg1, ty, 0, offs, closure)) return 0;
+                    jl_value_t *name = jl_type_extract_name(ty);
+                    if (name && jl_type_extract_name_precise(ty, 0)) {
+                        // direct lookup of leaf types
+                        jl_value_t *ml = mtcache_hash_lookup(cachearg1, name);
+                        if (jl_is_array(ml)) {
+                            if (!jl_typemap_intersection_array_visitor((jl_array_t*)ml, ty, 0, offs, closure)) { JL_GC_POP(); return 0; }
+                        }
+                        else {
+                            if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                        }
+                    }
+                    else {
+                        // else an array scan is required to check subtypes
+                        if (!jl_typemap_intersection_array_visitor(cachearg1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
+                    }
                 }
             }
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-            if (tname != (jl_array_t*)jl_an_empty_vec_any) {
-                jl_value_t *name = typetype ? jl_type_extract_name(typetype) : NULL;
-                if (name && !jl_is_typevar(typetype)) {
-                    // semi-direct lookup of types
-                    // TODO: the possibility of encountering `Type{Union{}}` in this intersection may
+            // Next check for intersections with methods defined on Type{T}, where T was not concrete (it might even have been a TypeVar), but had an extractable TypeName
+            if (tname != (jl_array_t*)jl_an_empty_vec_any && maybe_type) {
+                if (!exclude_typeofbottom || (!typetype && jl_isa((jl_value_t*)jl_typeofbottom_type, ty))) {
+                    // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
+                    // otherwise the possibility of encountering `Type{Union{}}` in this intersection may
                     // be forcing us to do some extra work here whenever we see a typevar, even though
                     // the likelihood of that value actually occurring is frequently likely to be
                     // zero (or result in an ambiguous match)
-                    jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
-                    if (jl_type_extract_name_precise(typetype, 1)) {
-                        // just consider the type and its direct super types
-                        while (1) {
-                            tname = jl_atomic_load_relaxed(&cache->tname); // reload after callback
-                            jl_typemap_t *ml = mtcache_hash_lookup(tname, (jl_value_t*)super->name);
-                            if (ml != jl_nothing) {
-                                if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
-                            }
-                            if (super == jl_any_type)
-                                break;
-                            super = super->super;
+                    tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd earlier
+                    jl_value_t *ml = mtcache_hash_lookup(tname, (jl_value_t*)jl_typeofbottom_type->name);
+                    if (ml != jl_nothing) {
+                        size_t search_slurp = closure->search_slurp;
+                        closure->search_slurp = offs + 1;
+                        if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) {
+                            closure->search_slurp = search_slurp;
+                            JL_GC_POP();
+                            return 0;
                         }
+                        if (closure->search_slurp == 0)
+                            exclude_typeofbottom = 1;
+                        closure->search_slurp = search_slurp;
                     }
-                    else {
-                        // consider all of the possible subtypes
-                        if (!jl_typemap_intersection_array_visitor(tname, (jl_value_t*)super, 3, offs, closure)) return 0;
+                }
+                if (exclude_typeofbottom && name && jl_type_extract_name_precise(typetype, 1)) {
+                    // semi-direct lookup of types
+                    // just consider the type and its direct super types
+                    jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
+                    if (super->name == jl_typeofbottom_type->name)
+                        super = super->super; // this was handled above
+                    while (1) {
+                        tname = jl_atomic_load_relaxed(&cache->tname); // reload after callback
+                        jl_typemap_t *ml = mtcache_hash_lookup(tname, (jl_value_t*)super->name);
+                        if (ml != jl_nothing) {
+                            if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                        }
+                        if (super == jl_any_type)
+                            break;
+                        super = super->super;
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    // first, fast-path: optimized pre-intersection test to see if `ty` could intersect with any Type
-                    if (name || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
-                        tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd during type-intersection
-                        if (!jl_typemap_intersection_array_visitor(tname, (jl_value_t*)jl_any_type, 3, offs, closure)) return 0;
-                    }
+                    // else an array scan is required to check subtypes of typetype too
+                    tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd earlier
+                    if (!jl_typemap_intersection_array_visitor(tname, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
             jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
             if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
                 jl_value_t *name = jl_type_extract_name(ty);
-                if (name) {
+                if (name && jl_type_extract_name_precise(ty, 0)) {
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
-                    if (jl_type_extract_name_precise(ty, 0)) {
-                        // direct lookup of concrete types
-                        while (1) {
-                            name1 = jl_atomic_load_relaxed(&cache->name1); // reload after callback
-                            jl_typemap_t *ml = mtcache_hash_lookup(name1, (jl_value_t*)super->name);
-                            if (ml != jl_nothing) {
-                                if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
-                            }
-                            if (super == jl_any_type)
-                                break;
-                            super = super->super;
+                    // direct lookup of concrete types
+                    while (1) {
+                        name1 = jl_atomic_load_relaxed(&cache->name1); // reload after callback
+                        jl_typemap_t *ml = mtcache_hash_lookup(name1, (jl_value_t*)super->name);
+                        if (ml != jl_nothing) {
+                            if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                         }
-                    }
-                    else {
-                        // consider all of the possible subtypes too
-                        if (!jl_typemap_intersection_array_visitor(name1, (jl_value_t*)super, 2, offs, closure)) return 0;
+                        if (super == jl_any_type)
+                            break;
+                        super = super->super;
                     }
                 }
                 else {
                     // else an array scan is required to check subtypes
-                    if (!jl_typemap_intersection_array_visitor(name1, (jl_value_t*)jl_any_type, 2, offs, closure)) return 0;
+                    if (!jl_typemap_intersection_array_visitor(name1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
+            JL_GC_POP();
         }
         if (!jl_typemap_intersection_node_visitor(jl_atomic_load_relaxed(&cache->linear), closure))
             return 0;
@@ -648,6 +836,10 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
     size_t n = jl_nparams(unw);
     int typesisva = n == 0 ? 0 : jl_is_vararg(jl_tparam(unw, n-1));
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
+        if (search->max_valid < ml->min_world)
+            continue;
+        if (search->min_valid > ml->max_world)
+            continue;
         size_t lensig = jl_nparams(jl_unwrap_unionall((jl_value_t*)ml->sig));
         if (lensig == n || (ml->va && lensig <= n+1)) {
             int resetenv = 0, ismatch = 1;
@@ -799,9 +991,12 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 if (is_cache_leaf(a0, 1)) {
                     jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
                     if (targ != (jl_array_t*)jl_an_empty_vec_any) {
-                        jl_typemap_t *ml = mtcache_hash_lookup(targ, a0);
+                        jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
+                        jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
+                        if (jl_is_array(ml))
+                            ml = mtcache_hash_lookup((jl_array_t*)ml, a0);
                         if (ml != jl_nothing) {
-                            jl_typemap_entry_t *li = jl_typemap_assoc_by_type(ml, search, offs + 1, subtype);
+                            jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                             if (li) return li;
                         }
                     }
@@ -811,9 +1006,12 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
             if (is_cache_leaf(ty, 0)) {
                 jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
                 if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
-                    jl_typemap_t *ml = mtcache_hash_lookup(cachearg1, ty);
+                    jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
+                    jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
+                    if (jl_is_array(ml))
+                        ml = mtcache_hash_lookup((jl_array_t*)ml, ty);
                     if (ml != jl_nothing) {
-                        jl_typemap_entry_t *li = jl_typemap_assoc_by_type(ml, search, offs + 1, subtype);
+                        jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                         if (li) return li;
                     }
                 }
@@ -1001,15 +1199,21 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
         jl_value_t *ty = jl_typeof(a1);
         assert(jl_is_datatype(ty));
         jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-        if (ty == (jl_value_t*)jl_datatype_type && targ != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(a1, 1)) {
-            jl_typemap_t *ml_or_cache = mtcache_hash_lookup(targ, a1);
+        if (targ != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(a1, 1)) {
+            jl_typename_t *name = a1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a1)->name;
+            jl_value_t *ml_or_cache = mtcache_hash_lookup(targ, (jl_value_t*)name);
+            if (jl_is_array(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_array_t*)ml_or_cache, a1);
             jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
         jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
         if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(ty, 0)) {
-            jl_typemap_t *ml_or_cache = mtcache_hash_lookup(cachearg1, ty);
-            jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs+1, world);
+            jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
+            jl_value_t *ml_or_cache = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
+            if (jl_is_array(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_array_t*)ml_or_cache, ty);
+            jl_typemap_entry_t *ml = jl_typemap_assoc_exact((jl_typemap_t*)ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
         jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
@@ -1102,10 +1306,14 @@ static jl_typemap_level_t *jl_new_typemap_level(void)
     return cache;
 }
 
-static jl_typemap_level_t *jl_method_convert_list_to_cache(
-        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t offs)
+static void jl_typemap_array_insert_(
+        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+        jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit);
+
+static jl_value_t *jl_method_convert_list_to_cache(
+        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t tparam, int8_t offs, int8_t doublesplit)
 {
-    jl_typemap_level_t *cache = jl_new_typemap_level();
+    jl_value_t *cache = doublesplit ? jl_an_empty_vec_any : (jl_value_t*)jl_new_typemap_level();
     jl_typemap_entry_t *next = NULL;
     JL_GC_PUSH3(&cache, &next, &ml);
     while (ml != (void*)jl_nothing) {
@@ -1113,7 +1321,25 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
         jl_atomic_store_relaxed(&ml->next, (jl_typemap_entry_t*)jl_nothing);
         // n.b. this is being done concurrently with lookups!
         // TODO: is it safe to be doing this concurrently with lookups?
-        jl_typemap_level_insert_(map, cache, ml, offs);
+        if (doublesplit) {
+            jl_value_t *key = jl_unwrap_unionall((jl_value_t*)ml->sig);
+            size_t len = jl_nparams(key);
+            if (offs < len-1)
+                key = jl_tparam(key, offs);
+            else
+                key = jl_tparam(key, len-1);
+            if (jl_is_vararg(key))
+                key = jl_unwrap_vararg(key);
+            if (key == (jl_value_t*)jl_typeofbottom_type)
+                key = (jl_value_t*)jl_assume(jl_typeofbottom_type)->super;
+            if (tparam) {
+                assert(jl_is_type_type(key));
+                key = jl_tparam0(key);
+            }
+            jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)&cache, key, ml, NULL, 0, offs, NULL);
+        }
+        else
+            jl_typemap_level_insert_(map, (jl_typemap_level_t*)cache, ml, offs);
         ml = next;
     }
     JL_GC_POP();
@@ -1139,23 +1365,33 @@ static void jl_typemap_list_insert_(
     jl_gc_wb(parent, newrec);
 }
 
+// n.b. tparam value only needed if doublesplit is set (for jl_method_convert_list_to_cache)
 static void jl_typemap_insert_generic(
-        jl_typemap_t *map, _Atomic(jl_typemap_t*) *pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs)
+        jl_typemap_t *map, _Atomic(jl_value_t*) *pml, jl_value_t *parent,
+        jl_typemap_entry_t *newrec, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
-    jl_typemap_t *ml = jl_atomic_load_relaxed(pml);
+    jl_value_t *ml = jl_atomic_load_relaxed(pml);
+    if (jl_is_array(ml)) {
+        assert(doublesplit);
+        jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+        return;
+    }
     if (jl_typeof(ml) == (jl_value_t*)jl_typemap_level_type) {
+        assert(!doublesplit);
         jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
     unsigned count = jl_typemap_list_count_locked((jl_typemap_entry_t*)ml);
     if (count > MAX_METHLIST_COUNT) {
-        ml = (jl_typemap_t*)jl_method_convert_list_to_cache(
-            map, (jl_typemap_entry_t*)ml, offs);
+        ml = jl_method_convert_list_to_cache(
+            map, (jl_typemap_entry_t*)ml, tparam, offs, doublesplit != NULL);
         jl_atomic_store_release(pml, ml);
         jl_gc_wb(parent, ml);
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
+        if (doublesplit)
+            jl_typemap_array_insert_(map, (_Atomic(jl_array_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+        else
+            jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
@@ -1165,14 +1401,14 @@ static void jl_typemap_insert_generic(
 
 static void jl_typemap_array_insert_(
         jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
-        jl_value_t *parent, int8_t offs)
+        jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
     jl_array_t *cache = jl_atomic_load_relaxed(pcache);
-    _Atomic(jl_typemap_t*) *pml = mtcache_hash_lookup_bp(cache, key);
-    if (pml != NULL)
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, offs+1);
-    else
+    _Atomic(jl_value_t*) *pml = mtcache_hash_lookup_bp(cache, key);
+    if (pml == NULL)
         mtcache_hash_insert(pcache, parent, key, (jl_typemap_t*)newrec);
+    else
+        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, tparam, offs + (doublesplit ? 0 : 1), doublesplit);
 }
 
 static void jl_typemap_level_insert_(
@@ -1203,7 +1439,7 @@ static void jl_typemap_level_insert_(
         t1 = (jl_value_t*)jl_assume(jl_typeofbottom_type)->super;
     // If the type at `offs` is Any, put it in the Any list
     if (t1 && jl_is_any(t1)) {
-        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, offs+1);
+        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, 0, offs+1, NULL);
         return;
     }
     // Don't put Varargs in the optimized caches (too hard to handle in lookup and bp)
@@ -1214,12 +1450,14 @@ static void jl_typemap_level_insert_(
             // and we use the table indexed for that purpose.
             jl_value_t *a0 = jl_tparam0(t1);
             if (is_cache_leaf(a0, 1)) {
-                jl_typemap_array_insert_(map, &cache->targ, a0, newrec, (jl_value_t*)cache, offs);
+                jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
+                jl_typemap_array_insert_(map, &cache->targ, (jl_value_t*)name, newrec, (jl_value_t*)cache, 1, offs, jl_is_datatype(name->wrapper) ? NULL : a0);
                 return;
             }
         }
         if (is_cache_leaf(t1, 0)) {
-            jl_typemap_array_insert_(map, &cache->arg1, t1, newrec, (jl_value_t*)cache, offs);
+            jl_typename_t *name = t1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)t1)->name;
+            jl_typemap_array_insert_(map, &cache->arg1, (jl_value_t*)name, newrec, (jl_value_t*)cache, 0, offs, jl_is_datatype(name->wrapper) ? NULL : t1);
             return;
         }
 
@@ -1229,12 +1467,12 @@ static void jl_typemap_level_insert_(
         if (jl_is_type_type(t1)) {
             a0 = jl_type_extract_name(jl_tparam0(t1));
             jl_datatype_t *super = a0 ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper) : jl_any_type;
-            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, offs);
+            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, 1, offs, NULL);
             return;
         }
         a0 = jl_type_extract_name(t1);
         if (a0 && a0 != (jl_value_t*)jl_any_type->name) {
-            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, offs);
+            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, 0, offs, NULL);
             return;
         }
     }
@@ -1290,7 +1528,7 @@ void jl_typemap_insert(_Atomic(jl_typemap_t *) *pcache, jl_value_t *parent,
         jl_typemap_entry_t *newrec, int8_t offs)
 {
     jl_typemap_t *cache = jl_atomic_load_relaxed(pcache);
-    jl_typemap_insert_generic(cache, pcache, parent, newrec, offs);
+    jl_typemap_insert_generic(cache, pcache, parent, newrec, 0, offs, NULL);
 }
 
 #ifdef __cplusplus
diff --git a/src/work-stealing-queue.h b/src/work-stealing-queue.h
new file mode 100644
index 0000000000000..38429e02886e9
--- /dev/null
+++ b/src/work-stealing-queue.h
@@ -0,0 +1,102 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef WORK_STEALING_QUEUE_H
+#define WORK_STEALING_QUEUE_H
+
+#include "julia_atomics.h"
+#include "assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =======
+// Chase and Lev's work-stealing queue, optimized for
+// weak memory models by Le et al.
+//
+// * Chase D., Lev Y. Dynamic Circular Work-Stealing queue
+// * Le N. M. et al. Correct and Efficient Work-Stealing for
+//   Weak Memory Models
+// =======
+
+typedef struct {
+    char *buffer;
+    int32_t capacity;
+    int32_t mask;
+} ws_array_t;
+
+static inline ws_array_t *create_ws_array(size_t capacity, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    ws_array_t *a = (ws_array_t *)malloc_s(sizeof(ws_array_t));
+    a->buffer = (char *)malloc_s(capacity * eltsz);
+    a->capacity = capacity;
+    a->mask = capacity - 1;
+    return a;
+}
+
+typedef struct {
+    _Atomic(int64_t) top;
+    _Atomic(int64_t) bottom;
+    _Atomic(ws_array_t *) array;
+} ws_queue_t;
+
+static inline ws_array_t *ws_queue_push(ws_queue_t *q, void *elt, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    int64_t b = jl_atomic_load_relaxed(&q->bottom);
+    int64_t t = jl_atomic_load_acquire(&q->top);
+    ws_array_t *ary = jl_atomic_load_relaxed(&q->array);
+    ws_array_t *old_ary = NULL;
+    if (__unlikely(b - t > ary->capacity - 1)) {
+        ws_array_t *new_ary = create_ws_array(2 * ary->capacity, eltsz);
+        for (int i = 0; i < ary->capacity; i++) {
+            memcpy(new_ary->buffer + ((t + i) & new_ary->mask) * eltsz, ary->buffer + ((t + i) & ary->mask) * eltsz, eltsz);
+        }
+        jl_atomic_store_release(&q->array, new_ary);
+        old_ary = ary;
+        ary = new_ary;
+    }
+    memcpy(ary->buffer + (b & ary->mask) * eltsz, elt, eltsz);
+    jl_fence_release();
+    jl_atomic_store_relaxed(&q->bottom, b + 1);
+    return old_ary;
+}
+
+static inline void ws_queue_pop(ws_queue_t *q, void *dest, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    int64_t b = jl_atomic_load_relaxed(&q->bottom) - 1;
+    ws_array_t *ary = jl_atomic_load_relaxed(&q->array);
+    jl_atomic_store_relaxed(&q->bottom, b);
+    jl_fence();
+    int64_t t = jl_atomic_load_relaxed(&q->top);
+    if (__likely(t <= b)) {
+        memcpy(dest, ary->buffer + (b & ary->mask) * eltsz, eltsz);
+        if (t == b) {
+            if (!jl_atomic_cmpswap(&q->top, &t, t + 1))
+                memset(dest, 0, eltsz);
+            jl_atomic_store_relaxed(&q->bottom, b + 1);
+        }
+    }
+    else {
+        memset(dest, 0, eltsz);
+        jl_atomic_store_relaxed(&q->bottom, b + 1);
+    }
+}
+
+static inline void ws_queue_steal_from(ws_queue_t *q, void *dest, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    int64_t t = jl_atomic_load_acquire(&q->top);
+    jl_fence();
+    int64_t b = jl_atomic_load_acquire(&q->bottom);
+    if (t < b) {
+        ws_array_t *ary = jl_atomic_load_relaxed(&q->array);
+        memcpy(dest, ary->buffer + (t & ary->mask) * eltsz, eltsz);
+        if (!jl_atomic_cmpswap(&q->top, &t, t + 1))
+            memset(dest, 0, eltsz);
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index ffbc2f12f52da..dec1745520d4c 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -1,4 +1,6 @@
 /srccache
+/DelimitedFiles-*
+/DelimitedFiles
 /Pkg-*
 /Pkg
 /Statistics-*
@@ -21,3 +23,4 @@
 /SHA
 /*_jll/StdlibArtifacts.toml
 /*/Manifest.toml
+/*.image
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index a9554c95f3151..47812fb993428 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -56,6 +56,23 @@ function artifacts_dirs(args...)
     end
 end
 
+# Recursive function, let's not make this a closure because it then has to
+# be boxed.
+function parse_mapping(mapping::String, name::String, override_file::String)
+    if !isabspath(mapping) && !isempty(mapping)
+        mapping = tryparse(Base.SHA1, mapping)
+        if mapping === nothing
+            @error("Invalid override in '$(override_file)': entry '$(name)' must map to an absolute path or SHA1 hash!")
+        end
+    end
+    return mapping
+end
+function parse_mapping(mapping::Dict, name::String, override_file::String)
+    return Dict(k => parse_mapping(v, name, override_file) for (k, v) in mapping)
+end
+# Fallthrough for invalid Overrides.toml files
+parse_mapping(mapping, name::String, override_file::String) = nothing
+
 """
     ARTIFACT_OVERRIDES
 
@@ -103,24 +120,9 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
         # Load the toml file
         depot_override_dict = parse_toml(override_file)
 
-        function parse_mapping(mapping::String, name::String)
-            if !isabspath(mapping) && !isempty(mapping)
-                mapping = tryparse(Base.SHA1, mapping)
-                if mapping === nothing
-                    @error("Invalid override in '$(override_file)': entry '$(name)' must map to an absolute path or SHA1 hash!")
-                end
-            end
-            return mapping
-        end
-        function parse_mapping(mapping::Dict, name::String)
-            return Dict(k => parse_mapping(v, name) for (k, v) in mapping)
-        end
-        # Fallthrough for invalid Overrides.toml files
-        parse_mapping(mapping, name::String) = nothing
-
         for (k, mapping) in depot_override_dict
             # First, parse the mapping. Is it an absolute path, a valid SHA1-hash, or neither?
-            mapping = parse_mapping(mapping, k)
+            mapping = parse_mapping(mapping, k, override_file)
             if mapping === nothing
                 @error("Invalid override in '$(override_file)': failed to parse entry `$(k)`")
                 continue
@@ -743,5 +745,8 @@ artifact_slash_lookup(name::AbstractString, artifact_dict::Dict, artifacts_toml:
 precompile(load_artifacts_toml, (String,))
 precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},))
 precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String))
+precompile(parse_mapping, (String, String, String))
+precompile(parse_mapping, (Dict{String, Any}, String, String))
+
 
 end # module Artifacts
diff --git a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
index 097659e01b396..bd7a0571f9d5a 100644
--- a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
+++ b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
@@ -14,13 +14,13 @@ export libgfortran, libstdcxx, libgomp
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libgfortran_handle = C_NULL
-libgfortran_path = ""
-libstdcxx_handle = C_NULL
-libstdcxx_path = ""
-libgomp_handle = C_NULL
-libgomp_path = ""
+artifact_dir::String = ""
+libgfortran_handle::Ptr{Cvoid} = C_NULL
+libgfortran_path::String = ""
+libstdcxx_handle::Ptr{Cvoid} = C_NULL
+libstdcxx_path::String = ""
+libgomp_handle::Ptr{Cvoid} = C_NULL
+libgomp_path::String = ""
 
 if Sys.iswindows()
     if arch(HostPlatform()) == "x86_64"
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index e0e09a919a085..aa46f7b827f10 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -1,3 +1,7 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/docs/src/index.md"
+```
+
 # Dates
 
 ```@meta
diff --git a/stdlib/Dates/src/arithmetic.jl b/stdlib/Dates/src/arithmetic.jl
index 6537f4e1caa82..a847f749d0154 100644
--- a/stdlib/Dates/src/arithmetic.jl
+++ b/stdlib/Dates/src/arithmetic.jl
@@ -7,6 +7,7 @@
 # TimeType arithmetic
 (+)(x::TimeType) = x
 (-)(x::T, y::T) where {T<:TimeType} = x.instant - y.instant
+(-)(x::TimeType, y::TimeType) = -(promote(x, y)...)
 
 # Date-Time arithmetic
 """
diff --git a/stdlib/Dates/src/conversions.jl b/stdlib/Dates/src/conversions.jl
index 8493218cc4086..30f1f2581d1fa 100644
--- a/stdlib/Dates/src/conversions.jl
+++ b/stdlib/Dates/src/conversions.jl
@@ -46,9 +46,11 @@ Take the number of seconds since unix epoch `1970-01-01T00:00:00` and convert to
 corresponding `DateTime`.
 """
 function unix2datetime(x)
-    rata = UNIXEPOCH + round(Int64, Int64(1000) * x)
+    # Rounding should match `now` below
+    rata = UNIXEPOCH + trunc(Int64, Int64(1000) * x)
     return DateTime(UTM(rata))
 end
+
 """
     datetime2unix(dt::DateTime) -> Float64
 
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index 485fea5624066..2e684815a3c86 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -10,6 +10,13 @@ using Dates
     b = Dates.Time(11, 59, 59)
     @test Dates.CompoundPeriod(a - b) == Dates.Hour(12)
 end
+
+@testset "TimeType arithmetic" begin
+    a = Date(2023, 5, 1)
+    b = DateTime(2023, 5, 2)
+    @test b - a == Day(1)
+end
+
 @testset "Wrapping arithmetic for Months" begin
     # This ends up being trickier than expected because
     # the user might do 2014-01-01 + Month(-14)
diff --git a/stdlib/Dates/test/conversions.jl b/stdlib/Dates/test/conversions.jl
index 488af4110e884..99572b41b4f90 100644
--- a/stdlib/Dates/test/conversions.jl
+++ b/stdlib/Dates/test/conversions.jl
@@ -60,10 +60,16 @@ end
 
     if Sys.isapple()
         withenv("TZ" => "UTC") do
-            @test abs(Dates.now() - now(Dates.UTC)) < Dates.Second(1)
+            a = Dates.now()
+            b = Dates.now(Dates.UTC)
+            c = Dates.now()
+            @test a <= b <= c
         end
     end
-    @test abs(Dates.now() - now(Dates.UTC)) < Dates.Hour(16)
+    a = Dates.now()
+    b = now(Dates.UTC)
+    c = Dates.now()
+    @test abs(a - b) < Dates.Hour(16) + abs(c - a)
 end
 @testset "Issue #9171, #9169" begin
     let t = Dates.Period[Dates.Week(2), Dates.Day(14), Dates.Hour(14 * 24), Dates.Minute(14 * 24 * 60), Dates.Second(14 * 24 * 60 * 60), Dates.Millisecond(14 * 24 * 60 * 60 * 1000)]
diff --git a/stdlib/DelimitedFiles.version b/stdlib/DelimitedFiles.version
new file mode 100644
index 0000000000000..d741690a96838
--- /dev/null
+++ b/stdlib/DelimitedFiles.version
@@ -0,0 +1,4 @@
+DELIMITEDFILES_BRANCH = main
+DELIMITEDFILES_SHA1 = db79c842f95f55b1f8d8037c0d3363ab21cd3b90
+DELIMITEDFILES_GIT_URL := https://github.com/JuliaData/DelimitedFiles.jl.git
+DELIMITEDFILES_TAR_URL = https://api.github.com/repos/JuliaData/DelimitedFiles.jl/tarball/$1
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
index 3bcbc7b67f60d..a7c5b1778b144 100644
--- a/stdlib/Distributed/src/Distributed.jl
+++ b/stdlib/Distributed/src/Distributed.jl
@@ -7,7 +7,7 @@ module Distributed
 
 # imports for extension
 import Base: getindex, wait, put!, take!, fetch, isready, push!, length,
-             hash, ==, kill, close, isopen, showerror
+             hash, ==, kill, close, isopen, showerror, iterate, IteratorSize
 
 # imports for use
 using Base: Process, Semaphore, JLOptions, buffer_writes, @async_unwrap,
@@ -76,6 +76,9 @@ function _require_callback(mod::Base.PkgId)
         # broadcast top-level (e.g. from Main) import/using from node 1 (only)
         @sync for p in procs()
             p == 1 && continue
+            # Extensions are already loaded on workers by their triggers being loaded
+            # so no need to fire the callback upon extension being loaded on master.
+            Base.loading_extension && continue
             @async_unwrap remotecall_wait(p) do
                 Base.require(mod)
                 nothing
@@ -107,6 +110,7 @@ include("macros.jl")      # @spawn and friends
 include("workerpool.jl")
 include("pmap.jl")
 include("managers.jl")    # LocalManager and SSHManager
+include("precompile.jl")
 
 function __init__()
     init_parallel()
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
index d2cbe55e63270..3fd3d63108297 100644
--- a/stdlib/Distributed/src/cluster.jl
+++ b/stdlib/Distributed/src/cluster.jl
@@ -1331,7 +1331,10 @@ function process_opts(opts)
     end
 
     # Propagate --threads to workers
-    exeflags = opts.nthreads > 0 ? `--threads=$(opts.nthreads)` : ``
+    threads = opts.nthreads > 0 ? `--threads=$(opts.nthreads)` : ``
+    gcthreads = opts.ngcthreads > 0 ? `--gcthreads=$(opts.ngcthreads)` : ``
+
+    exeflags = `$threads $gcthreads`
 
     # add processors
     if opts.nprocs > 0
diff --git a/stdlib/Distributed/src/pmap.jl b/stdlib/Distributed/src/pmap.jl
index 603dfa7e031ce..f884d47fff98e 100644
--- a/stdlib/Distributed/src/pmap.jl
+++ b/stdlib/Distributed/src/pmap.jl
@@ -6,7 +6,7 @@ struct BatchProcessingError <: Exception
 end
 
 """
-    pgenerate([::WorkerPool], f, c...) -> iterator
+    pgenerate([::AbstractWorkerPool], f, c...) -> iterator
 
 Apply `f` to each element of `c` in parallel using available workers and tasks.
 
@@ -18,14 +18,14 @@ Note that `f` must be made available to all worker processes; see
 [Code Availability and Loading Packages](@ref code-availability)
 for details.
 """
-function pgenerate(p::WorkerPool, f, c)
+function pgenerate(p::AbstractWorkerPool, f, c)
     if length(p) == 0
         return AsyncGenerator(f, c; ntasks=()->nworkers(p))
     end
     batches = batchsplit(c, min_batch_count = length(p) * 3)
     return Iterators.flatten(AsyncGenerator(remote(p, b -> asyncmap(f, b)), batches))
 end
-pgenerate(p::WorkerPool, f, c1, c...) = pgenerate(p, a->f(a...), zip(c1, c...))
+pgenerate(p::AbstractWorkerPool, f, c1, c...) = pgenerate(p, a->f(a...), zip(c1, c...))
 pgenerate(f, c) = pgenerate(default_worker_pool(), f, c)
 pgenerate(f, c1, c...) = pgenerate(a->f(a...), zip(c1, c...))
 
diff --git a/stdlib/Distributed/src/precompile.jl b/stdlib/Distributed/src/precompile.jl
new file mode 100644
index 0000000000000..87380f627db7a
--- /dev/null
+++ b/stdlib/Distributed/src/precompile.jl
@@ -0,0 +1,14 @@
+precompile(Tuple{typeof(Distributed.remotecall),Function,Int,Module,Vararg{Any, 100}})
+precompile(Tuple{typeof(Distributed.procs)})
+precompile(Tuple{typeof(Distributed.finalize_ref), Distributed.Future})
+# This is disabled because it doesn't give much benefit
+# and the code in Distributed is poorly typed causing many invalidations
+# TODO: Maybe reenable now that Distributed is not in sysimage.
+#=
+    precompile_script *= """
+    using Distributed
+    addprocs(2)
+    pmap(x->iseven(x) ? 1 : 0, 1:4)
+    @distributed (+) for i = 1:100 Int(rand(Bool)) end
+    """
+=#
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
index 0554f47670eb3..0b1143d855510 100644
--- a/stdlib/Distributed/src/remotecall.jl
+++ b/stdlib/Distributed/src/remotecall.jl
@@ -485,7 +485,7 @@ julia> remotecall_fetch(sqrt, 2, 4)
 julia> remotecall_fetch(sqrt, 2, -4)
 ERROR: On worker 2:
 DomainError with -4.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 ...
 ```
 """
@@ -778,3 +778,23 @@ function getindex(r::RemoteChannel, args...)
     end
     return remotecall_fetch(getindex, r.where, r, args...)
 end
+
+function iterate(c::RemoteChannel, state=nothing)
+    if isopen(c) || isready(c)
+        try
+            return (take!(c), nothing)
+        catch e
+            if isa(e, InvalidStateException) ||
+                (isa(e, RemoteException) &&
+                isa(e.captured.ex, InvalidStateException) &&
+                e.captured.ex.state === :closed)
+                return nothing
+            end
+            rethrow()
+        end
+    else
+        return nothing
+    end
+end
+
+IteratorSize(::Type{<:RemoteChannel}) = SizeUnknown()
diff --git a/stdlib/Distributed/src/workerpool.jl b/stdlib/Distributed/src/workerpool.jl
index 89e52667c82c9..5dd1c07044e09 100644
--- a/stdlib/Distributed/src/workerpool.jl
+++ b/stdlib/Distributed/src/workerpool.jl
@@ -239,12 +239,14 @@ perform a `remote_do` on it.
 """
 remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remote_do, f, pool, args...; kwargs...)
 
-const _default_worker_pool = Ref{Union{WorkerPool, Nothing}}(nothing)
+const _default_worker_pool = Ref{Union{AbstractWorkerPool, Nothing}}(nothing)
 
 """
     default_worker_pool()
 
-[`WorkerPool`](@ref) containing idle [`workers`](@ref) - used by `remote(f)` and [`pmap`](@ref) (by default).
+[`AbstractWorkerPool`](@ref) containing idle [`workers`](@ref) - used by `remote(f)` and [`pmap`](@ref)
+(by default). Unless one is explicitly set via `default_worker_pool!(pool)`, the default worker pool is
+initialized to a [`WorkerPool`](@ref).
 
 # Examples
 ```julia-repl
@@ -267,6 +269,15 @@ function default_worker_pool()
     return _default_worker_pool[]
 end
 
+"""
+    default_worker_pool!(pool::AbstractWorkerPool)
+
+Set a [`AbstractWorkerPool`](@ref) to be used by `remote(f)` and [`pmap`](@ref) (by default).
+"""
+function default_worker_pool!(pool::AbstractWorkerPool)
+    _default_worker_pool[] = pool
+end
+
 """
     remote([p::AbstractWorkerPool], f) -> Function
 
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
index 8471acade993b..43e02c92b5a81 100644
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ b/stdlib/Distributed/test/distributed_exec.jl
@@ -456,6 +456,32 @@ function test_iteration(in_c, out_c)
 end
 
 test_iteration(Channel(10), Channel(10))
+test_iteration(RemoteChannel(() -> Channel(10)), RemoteChannel(() -> Channel(10)))
+
+@everywhere function test_iteration_take(ch)
+    count = 0
+    for x in ch
+        count += 1
+    end
+    return count
+end
+
+@everywhere function test_iteration_put(ch, total)
+    for i in 1:total
+        put!(ch, i)
+    end
+    close(ch)
+end
+
+let ch = RemoteChannel(() -> Channel(1))
+    @async test_iteration_put(ch, 10)
+    @test 10 == @fetchfrom id_other test_iteration_take(ch)
+    # now reverse
+    ch = RemoteChannel(() -> Channel(1))
+    @spawnat id_other test_iteration_put(ch, 10)
+    @test 10 == test_iteration_take(ch)
+end
+
 # make sure exceptions propagate when waiting on Tasks
 @test_throws CompositeException (@sync (@async error("oops")))
 try
@@ -675,6 +701,19 @@ wp = CachingPool(workers())
 clear!(wp)
 @test length(wp.map_obj2ref) == 0
 
+# default_worker_pool! tests
+wp_default = Distributed.default_worker_pool()
+try
+    local wp = CachingPool(workers())
+    Distributed.default_worker_pool!(wp)
+    @test [1:100...] == pmap(x->x, wp, 1:100)
+    @test !isempty(wp.map_obj2ref)
+    clear!(wp)
+    @test isempty(wp.map_obj2ref)
+finally
+    Distributed.default_worker_pool!(wp_default)
+end
+
 # The below block of tests are usually run only on local development systems, since:
 # - tests which print errors
 # - addprocs tests are memory intensive
@@ -1427,7 +1466,7 @@ let thrown = false
         thrown = true
         local b = IOBuffer()
         showerror(b, e)
-        @test occursin("sqrt will only return", String(take!(b)))
+        @test occursin("sqrt was called with a negative real argument", String(take!(b)))
     end
     @test thrown
 end
@@ -1835,7 +1874,7 @@ let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
     using Distributed
     project = mktempdir()
     env = Dict(
-        "JULIA_LOAD_PATH" => LOAD_PATH[1],
+        "JULIA_LOAD_PATH" => string(LOAD_PATH[1], $(repr(pathsep)), "@stdlib"),
         "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
         "TMPDIR" => ENV["TMPDIR"],
     )
@@ -1845,7 +1884,7 @@ let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
     """ * setupcode * """
     for w in workers()
         @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
-        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1]]
+        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1], "@stdlib"]
         @test remotecall_fetch(active_project, w)      == project
         @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
     end
diff --git a/stdlib/Distributed/test/splitrange.jl b/stdlib/Distributed/test/splitrange.jl
index 9f3c9c92a3ffa..1cb12e1952b7d 100644
--- a/stdlib/Distributed/test/splitrange.jl
+++ b/stdlib/Distributed/test/splitrange.jl
@@ -28,6 +28,8 @@ isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE
 using .Main.OffsetArrays
 
 oa = OffsetArray([123, -345], (-2,))
+
+@everywhere using Test
 @sync @distributed for i in eachindex(oa)
     @test i ∈ (-1, 0)
 end
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 16fdbd047c4de..c6db08779e947 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = 030cfb3fefd29e87405cb689fb8178613131f55c
+DOWNLOADS_SHA1 = f97c72fbd726e208a04c53791b35cc34c747569f
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/GMP_jll/src/GMP_jll.jl b/stdlib/GMP_jll/src/GMP_jll.jl
index 90daa24b150ed..fde2fc15acf90 100644
--- a/stdlib/GMP_jll/src/GMP_jll.jl
+++ b/stdlib/GMP_jll/src/GMP_jll.jl
@@ -13,11 +13,11 @@ export libgmp, libgmpxx
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libgmp_handle = C_NULL
-libgmp_path = ""
-libgmpxx_handle = C_NULL
-libgmpxx_path = ""
+artifact_dir::String = ""
+libgmp_handle::Ptr{Cvoid} = C_NULL
+libgmp_path::String = ""
+libgmpxx_handle::Ptr{Cvoid} = C_NULL
+libgmpxx_path::String = ""
 
 if Sys.iswindows()
     const libgmp = "libgmp-10.dll"
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 1f0b05c29c3b5..b0bf24e0d1379 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -21,7 +21,7 @@ include("macros.jl")
 include("clipboard.jl")
 
 """
-    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, minsize::Int = 0)
+    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, recursive::Bool = false, sortby::Symbol = :name, minsize::Int = 0)
 
 Return a markdown table giving information about exported global variables in a module, optionally restricted
 to those matching `pattern`.
@@ -33,6 +33,9 @@ The memory consumption estimate is an approximate lower bound on the size of the
 - `recursive` : recursively include objects in sub-modules, observing the same settings in each.
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
 - `minsize` : only includes objects with size at least `minsize` bytes. Defaults to `0`.
+
+The output of `varinfo` is intended for display purposes only.  See also [`names`](@ref) to get an array of symbols defined in
+a module, which is suitable for more general manipulations.
 """
 function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0)
     sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"))
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index adf676cb8c55a..a4a5118acf8d7 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -154,7 +154,7 @@ Send a printed form of `x` to the operating system clipboard ("copy").
 clipboard(x)
 
 """
-    clipboard() -> AbstractString
+    clipboard() -> String
 
 Return a string with the contents of the operating system clipboard ("paste").
 """
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index 8c0658142c019..29a64343b8370 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -189,7 +189,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
             # OC was constructed from inferred source. There's only one
             # specialization and we can't infer anything more precise either.
             world = f.source.primary_world
-            linfo = f.source.specializations[1]
+            linfo = f.source.specializations::Core.MethodInstance
             Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING)
         else
             linfo = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec())
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 135c207654ca0..53242a422140b 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -2,7 +2,7 @@
 
 # macro wrappers for various reflection functions
 
-import Base: typesof, insert!, replace_ref_begin_end!
+import Base: typesof, insert!, replace_ref_begin_end!, infer_effects
 
 separate_kwargs(args...; kwargs...) = (args, values(kwargs))
 
@@ -212,7 +212,7 @@ macro which(ex0::Symbol)
     return :(which($__module__, $ex0))
 end
 
-for fname in [:code_warntype, :code_llvm, :code_native]
+for fname in [:code_warntype, :code_llvm, :code_native, :infer_effects]
     @eval begin
         macro ($fname)(ex0...)
             gen_call_with_extracted_types_and_kwargs(__module__, $(Expr(:quote, fname)), ex0)
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index 02fb4b25ec43f..5f90491fd8151 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -699,7 +699,7 @@ end
 
 @testset "code_llvm on opaque_closure" begin
     let ci = code_typed(+, (Int, Int))[1][1]
-        ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Int])
+        ir = Core.Compiler.inflate_ir(ci)
         oc = Core.OpaqueClosure(ir)
         @test (code_llvm(devnull, oc, Tuple{Int, Int}); true)
         let io = IOBuffer()
@@ -719,3 +719,5 @@ end
         end
     end
 end
+
+@test Base.infer_effects(sin, (Int,)) == InteractiveUtils.@infer_effects sin(42)
diff --git a/stdlib/LLD_jll/Project.toml b/stdlib/LLD_jll/Project.toml
index 7fbb85963f798..90d867ca0f7da 100644
--- a/stdlib/LLD_jll/Project.toml
+++ b/stdlib/LLD_jll/Project.toml
@@ -1,18 +1,16 @@
 name = "LLD_jll"
 uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
-version = "14.0.5+3"
+version = "15.0.7+5"
 
 [deps]
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.9"
-libLLVM_jll = "14.0.5"
+libLLVM_jll = "15.0.7"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl
index 80653353a7c17..55ccec9cc4005 100644
--- a/stdlib/LLD_jll/src/LLD_jll.jl
+++ b/stdlib/LLD_jll/src/LLD_jll.jl
@@ -14,8 +14,8 @@ export lld
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-lld_path = ""
+artifact_dir::String = ""
+lld_path::String = ""
 if Sys.iswindows()
     const lld_exe = "lld.exe"
 else
@@ -70,8 +70,8 @@ end
 
 function init_lld_path()
     # Prefer our own bundled lld, but if we don't have one, pick it up off of the PATH
-    # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `libexec`
-    for bundled_lld_path in (joinpath(Sys.BINDIR, Base.LIBEXECDIR, lld_exe),
+    # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `private_libexecdir`
+    for bundled_lld_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, lld_exe),
                              joinpath(Sys.BINDIR, "..", "tools", lld_exe),
                              joinpath(Sys.BINDIR, lld_exe))
         if isfile(bundled_lld_path)
diff --git a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
index 2196323ad35aa..5c4026291a673 100644
--- a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
+++ b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
@@ -14,9 +14,9 @@ export llvmlibunwind
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-llvmlibunwind_handle = C_NULL
-llvmlibunwind_path = ""
+artifact_dir::String = ""
+llvmlibunwind_handle::Ptr{Cvoid} = C_NULL
+llvmlibunwind_path::String = ""
 
 const llvmlibunwind = "libunwind"
 
diff --git a/stdlib/LibCURL.version b/stdlib/LibCURL.version
index 715ca76a40cdf..216ab4e7aca22 100644
--- a/stdlib/LibCURL.version
+++ b/stdlib/LibCURL.version
@@ -1,4 +1,4 @@
 LIBCURL_BRANCH = master
-LIBCURL_SHA1 = fd8af649b38ae20c3ff7f5dca53753512ca00376
+LIBCURL_SHA1 = a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0
 LIBCURL_GIT_URL := https://github.com/JuliaWeb/LibCURL.jl.git
 LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
diff --git a/stdlib/LibCURL_jll/Project.toml b/stdlib/LibCURL_jll/Project.toml
index 45dbb45830837..0ef46598b3118 100644
--- a/stdlib/LibCURL_jll/Project.toml
+++ b/stdlib/LibCURL_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibCURL_jll"
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "7.84.0+0"
+version = "8.0.1+0"
 
 [deps]
 LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
diff --git a/stdlib/LibCURL_jll/src/LibCURL_jll.jl b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
index 0911e68678657..cd67bfac0006a 100644
--- a/stdlib/LibCURL_jll/src/LibCURL_jll.jl
+++ b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
@@ -14,16 +14,16 @@ export libcurl
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libcurl_handle = C_NULL
-libcurl_path = ""
+artifact_dir::String = ""
+libcurl_handle::Ptr{Cvoid} = C_NULL
+libcurl_path::String = ""
 
 if Sys.iswindows()
     const libcurl = "libcurl-4.dll"
 elseif Sys.isapple()
     const libcurl = "@rpath/libcurl.4.dylib"
 else
-    const libcurl = "libcurl.so"
+    const libcurl = "libcurl.so.4"
 end
 
 function __init__()
diff --git a/stdlib/LibGit2/test/libgit2-tests.jl b/stdlib/LibGit2/test/libgit2-tests.jl
index 4ace98a0b1ac8..7dbbd10af6f67 100644
--- a/stdlib/LibGit2/test/libgit2-tests.jl
+++ b/stdlib/LibGit2/test/libgit2-tests.jl
@@ -3151,63 +3151,76 @@ mktempdir() do dir
                 run(pipeline(`openssl req -new -x509 -newkey rsa:2048 -sha256 -nodes -keyout $key -out $cert -days 1 -subj "/CN=$common_name"`, stderr=devnull))
                 run(`openssl x509 -in $cert -out $pem -outform PEM`)
 
-                # Find an available port by listening
-                port, server = listenany(49152)
-                close(server)
-
-                # Make a fake Julia package and minimal HTTPS server with our generated
-                # certificate. The minimal server can't actually serve a Git repository.
-                mkdir(joinpath(root, "Example.jl"))
-                pobj = cd(root) do
-                    run(pipeline(`openssl s_server -key $key -cert $cert -WWW -accept $port`, stderr=RawFD(2)), wait=false)
-                end
-
-                errfile = joinpath(root, "error")
-                repo_url = "https://$common_name:$port/Example.jl"
-                repo_dir = joinpath(root, "dest")
-                code = """
-                    using Serialization
-                    import LibGit2
-                    dest_dir = "$repo_dir"
-                    open("$errfile", "w+") do f
-                        try
-                            repo = LibGit2.clone("$repo_url", dest_dir)
-                        catch err
-                            serialize(f, err)
-                        finally
-                            isdir(dest_dir) && rm(dest_dir, recursive=true)
-                        end
+                local pobj, port
+                for attempt in 1:10
+                    # Find an available port by listening, but there's a race condition where
+                    # another process could grab this port, so retry on failure
+                    port, server = listenany(49152)
+                    close(server)
+
+                    # Make a fake Julia package and minimal HTTPS server with our generated
+                    # certificate. The minimal server can't actually serve a Git repository.
+                    mkdir(joinpath(root, "Example.jl"))
+                    pobj = cd(root) do
+                        run(pipeline(`openssl s_server -key $key -cert $cert -WWW -accept $port`, stderr=RawFD(2)), wait=false)
                     end
-                """
-                cmd = `$(Base.julia_cmd()) --startup-file=no -e $code`
+                    @test readuntil(pobj, "ACCEPT") == ""
 
-                try
-                    # The generated certificate is normally invalid
-                    run(cmd)
-                    err = open(errfile, "r") do f
-                        deserialize(f)
-                    end
-                    @test err.code == LibGit2.Error.ERROR
-                    @test startswith(lowercase(err.msg),
-                                     lowercase("user rejected certificate for localhost"))
+                    # Two options: Either we reached "ACCEPT" and the process is running and ready
+                    # or it failed to listen and exited, in which case we try again.
+                    process_running(pobj) && break
+                end
 
-                    rm(errfile)
+                @test process_running(pobj)
+
+                if process_running(pobj)
+                    errfile = joinpath(root, "error")
+                    repo_url = "https://$common_name:$port/Example.jl"
+                    repo_dir = joinpath(root, "dest")
+                    code = """
+                        using Serialization
+                        import LibGit2
+                        dest_dir = "$repo_dir"
+                        open("$errfile", "w+") do f
+                            try
+                                repo = LibGit2.clone("$repo_url", dest_dir)
+                            catch err
+                                serialize(f, err)
+                            finally
+                                isdir(dest_dir) && rm(dest_dir, recursive=true)
+                            end
+                        end
+                    """
+                    cmd = `$(Base.julia_cmd()) --startup-file=no -e $code`
 
-                    # Specify that Julia use only the custom certificate. Note: we need to
-                    # spawn a new Julia process in order for this ENV variable to take effect.
-                    withenv("SSL_CERT_FILE" => pem) do
+                    try
+                        # The generated certificate is normally invalid
                         run(cmd)
                         err = open(errfile, "r") do f
                             deserialize(f)
                         end
                         @test err.code == LibGit2.Error.ERROR
-                        @test occursin(r"invalid content-type: '?text/plain'?"i, err.msg)
-                    end
+                        @test startswith(lowercase(err.msg),
+                                        lowercase("user rejected certificate for localhost"))
+
+                        rm(errfile)
+
+                        # Specify that Julia use only the custom certificate. Note: we need to
+                        # spawn a new Julia process in order for this ENV variable to take effect.
+                        withenv("SSL_CERT_FILE" => pem) do
+                            run(cmd)
+                            err = open(errfile, "r") do f
+                                deserialize(f)
+                            end
+                            @test err.code == LibGit2.Error.ERROR
+                            @test occursin(r"invalid content-type: '?text/plain'?"i, err.msg)
+                        end
 
-                    # OpenSSL s_server should still be running
-                    @test process_running(pobj)
-                finally
-                    kill(pobj)
+                        # OpenSSL s_server should still be running
+                        @test process_running(pobj)
+                    finally
+                        kill(pobj)
+                    end
                 end
             end
         end
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
index 676653de04a62..4c16c1fb72e42 100644
--- a/stdlib/LibGit2_jll/Project.toml
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibGit2_jll"
 uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
-version = "1.5.0+1"
+version = "1.6.1+0"
 
 [deps]
 MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
diff --git a/stdlib/LibGit2_jll/src/LibGit2_jll.jl b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
index d672996f96ad9..f8e814f1f7c30 100644
--- a/stdlib/LibGit2_jll/src/LibGit2_jll.jl
+++ b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
@@ -14,16 +14,16 @@ export libgit2
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libgit2_handle = C_NULL
-libgit2_path = ""
+artifact_dir::String = ""
+libgit2_handle::Ptr{Cvoid} = C_NULL
+libgit2_path::String = ""
 
 if Sys.iswindows()
     const libgit2 = "libgit2.dll"
 elseif Sys.isapple()
-    const libgit2 = "@rpath/libgit2.1.5.dylib"
+    const libgit2 = "@rpath/libgit2.1.6.dylib"
 else
-    const libgit2 = "libgit2.so.1.5"
+    const libgit2 = "libgit2.so.1.6"
 end
 
 function __init__()
diff --git a/stdlib/LibGit2_jll/test/runtests.jl b/stdlib/LibGit2_jll/test/runtests.jl
index 402b61a4581ab..32ada173f01a0 100644
--- a/stdlib/LibGit2_jll/test/runtests.jl
+++ b/stdlib/LibGit2_jll/test/runtests.jl
@@ -7,5 +7,5 @@ using Test, Libdl, LibGit2_jll
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
     @test ccall((:git_libgit2_version, libgit2), Cint, (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch) == 0
-    @test VersionNumber(major[], minor[], patch[]) == v"1.5.0"
+    @test VersionNumber(major[], minor[], patch[]) == v"1.6.1"
 end
diff --git a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
index 66987b30d090c..a809f7a912d6b 100644
--- a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
+++ b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
@@ -14,9 +14,9 @@ export libssh2
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libssh2_handle = C_NULL
-libssh2_path = ""
+artifact_dir::String = ""
+libssh2_handle::Ptr{Cvoid} = C_NULL
+libssh2_path::String = ""
 
 if Sys.iswindows()
     const libssh2 = "libssh2.dll"
diff --git a/stdlib/LibUV_jll/src/LibUV_jll.jl b/stdlib/LibUV_jll/src/LibUV_jll.jl
index e4897138cc6cc..f6714fae536e9 100644
--- a/stdlib/LibUV_jll/src/LibUV_jll.jl
+++ b/stdlib/LibUV_jll/src/LibUV_jll.jl
@@ -14,9 +14,9 @@ export libuv
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libuv_handle = C_NULL
-libuv_path = ""
+artifact_dir::String = ""
+libuv_handle::Ptr{Cvoid} = C_NULL
+libuv_path::String = ""
 
 if Sys.iswindows()
     const libuv = "libuv-2.dll"
diff --git a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
index ae79e790a999b..12abeaf598151 100644
--- a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
+++ b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
@@ -14,9 +14,9 @@ export libunwind
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libunwind_handle = C_NULL
-libunwind_path = ""
+artifact_dir::String = ""
+libunwind_handle::Ptr{Cvoid} = C_NULL
+libunwind_path::String = ""
 
 const libunwind = "libunwind.so.8"
 
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index 9f12af174a4ff..00ce21ed6fcae 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -150,7 +150,10 @@ julia> sB\x
  -1.1086956521739126
  -1.4565217391304346
 ```
-The `\` operation here performs the linear solution. The left-division operator is pretty powerful and it's easy to write compact, readable code that is flexible enough to solve all sorts of systems of linear equations.
+
+The `\` operation here performs the linear solution. The left-division operator is pretty
+powerful and it's easy to write compact, readable code that is flexible enough to solve all
+sorts of systems of linear equations.
 
 ## Special matrices
 
@@ -276,12 +279,11 @@ to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](
 Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related
 operations like determinants.
 
-
 ## [Matrix factorizations](@id man-linalg-factorizations)
 
 [Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition)
 compute the factorization of a matrix into a product of matrices, and are one of the central concepts
-in linear algebra.
+in (numerical) linear algebra.
 
 The following table summarizes the types of matrix factorizations that have been implemented in
 Julia. Details of their associated methods can be found in the [Standard functions](@ref) section
@@ -306,6 +308,98 @@ of the Linear Algebra documentation.
 | `Schur`            | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition)                                       |
 | `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) |
 
+Adjoints and transposes of [`Factorization`](@ref) objects are lazily wrapped in
+`AdjointFactorization` and `TransposeFactorization` objects, respectively. Generically,
+transpose of real `Factorization`s are wrapped as `AdjointFactorization`.
+
+## [Orthogonal matrices (`AbstractQ`)](@id man-linalg-abstractq)
+
+Some matrix factorizations generate orthogonal/unitary "matrix" factors. These
+factorizations include QR-related factorizations obtained from calls to [`qr`](@ref), i.e.,
+`QR`, `QRCompactWY` and `QRPivoted`, the Hessenberg factorization obtained from calls to
+[`hessenberg`](@ref), and the LQ factorization obtained from [`lq`](@ref). While these
+orthogonal/unitary factors admit a matrix representation, their internal representation
+is, for performance and memory reasons, different. Hence, they should be rather viewed as
+matrix-backed, function-based linear operators. In particular, reading, for instance, a
+column of its matrix representation requires running "matrix"-vector multiplication code,
+rather than simply reading out data from memory (possibly filling parts of the vector with
+structural zeros). Another clear distinction from other, non-triangular matrix types is
+that the underlying multiplication code allows for in-place modification during multiplication.
+Furthermore, objects of specific `AbstractQ` subtypes as those created via [`qr`](@ref),
+[`hessenberg`](@ref) and [`lq`](@ref) can behave like a square or a rectangular matrix
+depending on context:
+
+```julia
+julia> using LinearAlgebra
+
+julia> Q = qr(rand(3,2)).Q
+3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
+
+julia> Matrix(Q)
+3×2 Matrix{Float64}:
+ -0.320597   0.865734
+ -0.765834  -0.475694
+ -0.557419   0.155628
+
+julia> Q*I
+3×3 Matrix{Float64}:
+ -0.320597   0.865734  -0.384346
+ -0.765834  -0.475694  -0.432683
+ -0.557419   0.155628   0.815514
+
+julia> Q*ones(2)
+3-element Vector{Float64}:
+  0.5451367118802273
+ -1.241527373086654
+ -0.40179067589600226
+
+julia> Q*ones(3)
+3-element Vector{Float64}:
+  0.16079054743832022
+ -1.674209978965636
+  0.41372375588835797
+
+julia> ones(1,2) * Q'
+1×3 Matrix{Float64}:
+ 0.545137  -1.24153  -0.401791
+
+julia> ones(1,3) * Q'
+1×3 Matrix{Float64}:
+ 0.160791  -1.67421  0.413724
+```
+
+Due to this distinction from dense or structured matrices, the abstract `AbstractQ` type
+does not subtype `AbstractMatrix`, but instead has its own type hierarchy. Custom types
+that subtype `AbstractQ` can rely on generic fallbacks if the following interface is satisfied.
+For example, for
+
+```julia
+struct MyQ{T} <: LinearAlgebra.AbstractQ{T}
+    # required fields
+end
+```
+
+provide overloads for
+
+```julia
+Base.size(Q::MyQ) # size of corresponding square matrix representation
+Base.convert(::Type{AbstractQ{T}}, Q::MyQ) # eltype promotion [optional]
+LinearAlgebra.lmul!(Q::MyQ, x::AbstractVecOrMat) # left-multiplication
+LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) # right-multiplication
+```
+
+If `eltype` promotion is not of interest, the `convert` method is unnecessary, since by
+default `convert(::Type{AbstractQ{T}}, Q::AbstractQ{T})` returns `Q` itself.
+Adjoints of `AbstractQ`-typed objects are lazily wrapped in an `AdjointQ` wrapper type,
+which requires its own `LinearAlgebra.lmul!` and `LinearAlgebra.rmul!` methods. Given this
+set of methods, any `Q::MyQ` can be used like a matrix, preferably in a multiplicative
+context: multiplication via `*` with scalars, vectors and matrices from left and right,
+obtaining a matrix representation of `Q` via `Matrix(Q)` (or `Q*I`) and indexing into the
+matrix representation all work. In contrast, addition and subtraction as well as more
+generally broadcasting over elements in the matrix representation fail because that would
+be highly inefficient. For such use cases, consider computing the matrix representation
+up front and cache it for future reuse.
+
 ## Standard functions
 
 Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/).
@@ -460,13 +554,17 @@ LinearAlgebra.ishermitian
 Base.transpose
 LinearAlgebra.transpose!
 LinearAlgebra.Transpose
+LinearAlgebra.TransposeFactorization
 Base.adjoint
 LinearAlgebra.adjoint!
 LinearAlgebra.Adjoint
+LinearAlgebra.AdjointFactorization
 Base.copy(::Union{Transpose,Adjoint})
 LinearAlgebra.stride1
 LinearAlgebra.checksquare
 LinearAlgebra.peakflops
+LinearAlgebra.hermitianpart
+LinearAlgebra.hermitianpart!
 ```
 
 ## Low-level matrix operations
@@ -498,6 +596,7 @@ four methods defined, for [`Float32`](@ref), [`Float64`](@ref), [`ComplexF32`](@
 and [`ComplexF64`](@ref Complex) arrays.
 
 ### [BLAS character arguments](@id stdlib-blas-chars)
+
 Many BLAS functions accept arguments that determine whether to transpose an argument (`trans`),
 which triangle of a matrix to reference (`uplo` or `ul`),
 whether the diagonal of a triangular matrix can be assumed to
@@ -505,18 +604,21 @@ be all ones (`dA`) or which side of a matrix multiplication
 the input argument belongs on (`side`). The possibilities are:
 
 #### [Multiplication order](@id stdlib-blas-side)
+
 | `side` | Meaning                                                             |
 |:-------|:--------------------------------------------------------------------|
 | `'L'`  | The argument goes on the *left* side of a matrix-matrix operation.  |
 | `'R'`  | The argument goes on the *right* side of a matrix-matrix operation. |
 
 #### [Triangle referencing](@id stdlib-blas-uplo)
+
 | `uplo`/`ul` | Meaning                                               |
 |:------------|:------------------------------------------------------|
 | `'U'`       | Only the *upper* triangle of the matrix will be used. |
 | `'L'`       | Only the *lower* triangle of the matrix will be used. |
 
 #### [Transposition operation](@id stdlib-blas-trans)
+
 | `trans`/`tX` | Meaning                                                 |
 |:-------------|:--------------------------------------------------------|
 | `'N'`        | The input matrix `X` is not transposed or conjugated.   |
@@ -524,12 +626,12 @@ the input argument belongs on (`side`). The possibilities are:
 | `'C'`        | The input matrix `X` will be conjugated and transposed. |
 
 #### [Unit diagonal](@id stdlib-blas-diag)
+
 | `diag`/`dX` | Meaning                                                   |
 |:------------|:----------------------------------------------------------|
 | `'N'`       | The diagonal values of the matrix `X` will be read.       |
 | `'U'`       | The diagonal of the matrix `X` is assumed to be all ones. |
 
-
 ```@docs
 LinearAlgebra.BLAS
 LinearAlgebra.BLAS.set_num_threads
@@ -575,6 +677,7 @@ and define matrix-vector operations.
 [Dongarra-1988]: https://dl.acm.org/doi/10.1145/42288.42291
 
 **return a vector**
+
 ```@docs
 LinearAlgebra.BLAS.gemv!
 LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any)
@@ -604,6 +707,7 @@ LinearAlgebra.BLAS.trsv
 ```
 
 **return a matrix**
+
 ```@docs
 LinearAlgebra.BLAS.ger!
 # xGERU
@@ -693,6 +797,7 @@ LinearAlgebra.LAPACK.ggsvd!
 LinearAlgebra.LAPACK.ggsvd3!
 LinearAlgebra.LAPACK.geevx!
 LinearAlgebra.LAPACK.ggev!
+LinearAlgebra.LAPACK.ggev3!
 LinearAlgebra.LAPACK.gtsv!
 LinearAlgebra.LAPACK.gttrf!
 LinearAlgebra.LAPACK.gttrs!
@@ -733,6 +838,7 @@ LinearAlgebra.LAPACK.hetri!
 LinearAlgebra.LAPACK.hetrs!
 LinearAlgebra.LAPACK.syev!
 LinearAlgebra.LAPACK.syevr!
+LinearAlgebra.LAPACK.syevd!
 LinearAlgebra.LAPACK.sygvd!
 LinearAlgebra.LAPACK.bdsqr!
 LinearAlgebra.LAPACK.bdsdc!
@@ -741,6 +847,7 @@ LinearAlgebra.LAPACK.gehrd!
 LinearAlgebra.LAPACK.orghr!
 LinearAlgebra.LAPACK.gees!
 LinearAlgebra.LAPACK.gges!
+LinearAlgebra.LAPACK.gges3!
 LinearAlgebra.LAPACK.trexc!
 LinearAlgebra.LAPACK.trsen!
 LinearAlgebra.LAPACK.tgsen!
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index 624cca69b84d9..a29c259dae607 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -15,7 +15,7 @@ import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, as
     length, log, map, ndims, one, oneunit, parent, permutedims, power_by_squaring,
     print_matrix, promote_rule, real, round, sec, sech, setindex!, show, similar, sin,
     sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat,
-    vec, zero
+    vec, view, zero
 using Base: IndexLinear, promote_eltype, promote_op, promote_typeof,
     @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
     splat
@@ -94,6 +94,8 @@ export
     eigvecs,
     factorize,
     givens,
+    hermitianpart,
+    hermitianpart!,
     hessenberg,
     hessenberg!,
     isdiag,
@@ -429,8 +431,6 @@ include("tridiag.jl")
 include("triangular.jl")
 
 include("factorization.jl")
-include("qr.jl")
-include("lq.jl")
 include("eigen.jl")
 include("svd.jl")
 include("symmetric.jl")
@@ -441,7 +441,10 @@ include("diagonal.jl")
 include("symmetriceigen.jl")
 include("bidiag.jl")
 include("uniformscaling.jl")
+include("qr.jl")
+include("lq.jl")
 include("hessenberg.jl")
+include("abstractq.jl")
 include("givens.jl")
 include("special.jl")
 include("bitarray.jl")
@@ -501,7 +504,7 @@ _initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
 # While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
 # which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence,
 # we restrict this method to only the LAPACK factorizations in LinearAlgebra.
-# The definition is put here since it explicitly references all the Factorizion structs so it has
+# The definition is put here since it explicitly references all the Factorization structs so it has
 # to be located after all the files that define the structs.
 const LAPACKFactorizations{T,S} = Union{
     BunchKaufman{T,S},
@@ -512,7 +515,12 @@ const LAPACKFactorizations{T,S} = Union{
     QRCompactWY{T,S},
     QRPivoted{T,S},
     SVD{T,<:Real,S}}
-function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorizations}}, B::AbstractVecOrMat)
+
+(\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B)
+(\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B)
+(\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B)
+
+function ldiv(F::Factorization, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(F)
     if m != size(B, 1)
@@ -542,7 +550,11 @@ function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorization
 end
 # disambiguate
 (\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    invoke(\, Tuple{Factorization{T}, VecOrMat{Complex{T}}}, F, B)
+    @invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}})
+(\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    ldiv(F, B)
+(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    ldiv(F, B)
 
 """
     LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)
diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl
new file mode 100644
index 0000000000000..88610dac2e6f6
--- /dev/null
+++ b/stdlib/LinearAlgebra/src/abstractq.jl
@@ -0,0 +1,622 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+abstract type AbstractQ{T} end
+
+struct AdjointQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
+    Q::S
+end
+
+parent(adjQ::AdjointQ) = adjQ.Q
+eltype(::Type{<:AbstractQ{T}}) where {T} = T
+ndims(::AbstractQ) = 2
+
+# inversion/adjoint/transpose
+inv(Q::AbstractQ) = Q'
+adjoint(Q::AbstractQ) = AdjointQ(Q)
+transpose(Q::AbstractQ{<:Real}) = AdjointQ(Q)
+transpose(Q::AbstractQ) = error("transpose not implemented for $(typeof(Q)). Consider using adjoint instead of transpose.")
+adjoint(adjQ::AdjointQ) = adjQ.Q
+
+# promotion with AbstractMatrix, at least for equal eltypes
+promote_rule(::Type{<:AbstractMatrix{T}}, ::Type{<:AbstractQ{T}}) where {T} =
+    (@inline; Union{AbstractMatrix{T},AbstractQ{T}})
+
+# conversion
+# the following eltype promotion should be defined for each subtype `QType`
+# convert(::Type{AbstractQ{T}}, Q::QType) where {T} = QType{T}(Q)
+# and then care has to be taken that
+# QType{T}(Q::QType{T}) where T = ...
+# is implemented as a no-op
+
+# the following conversion method ensures functionality when the above method is not defined
+# (as for HessenbergQ), but no eltype conversion is required either (say, in multiplication)
+convert(::Type{AbstractQ{T}}, Q::AbstractQ{T}) where {T} = Q
+convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ
+convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)'
+
+# ... to matrix
+Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix
+Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ))))
+Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)
+Array{T}(Q::AbstractQ) where {T} = Matrix{T}(Q)
+Array(Q::AbstractQ) = Matrix(Q)
+convert(::Type{T}, Q::AbstractQ) where {T<:AbstractArray} = T(Q)
+# legacy
+@deprecate(convert(::Type{AbstractMatrix{T}}, Q::AbstractQ) where {T},
+    convert(LinearAlgebra.AbstractQ{T}, Q))
+
+function size(Q::AbstractQ, dim::Integer)
+    if dim < 1
+        throw(BoundsError())
+    elseif dim <= 2 # && 1 <= dim
+        return size(Q)[dim]
+    else # 2 < dim
+        return 1
+    end
+end
+size(adjQ::AdjointQ) = reverse(size(adjQ.Q))
+
+# pseudo-array behaviour, required for indexing with `begin` or `end`
+axes(Q::AbstractQ) = map(Base.oneto, size(Q))
+axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1)
+
+copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
+copy(Q::AbstractQ) = copymutable(Q)
+
+# getindex
+@inline function getindex(Q::AbstractQ, inds...)
+    @boundscheck Base.checkbounds_indices(Bool, axes(Q), inds) || Base.throw_boundserror(Q, inds)
+    return _getindex(Q, inds...)
+end
+@inline getindex(Q::AbstractQ, ::Colon) = copymutable(Q)[:]
+@inline getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
+
+@inline _getindex(Q::AbstractQ, inds...) = @inbounds copymutable(Q)[inds...]
+@inline function _getindex(Q::AbstractQ, ::Colon, J::AbstractVector{<:Integer})
+    Y = zeros(eltype(Q), size(Q, 2), length(J))
+    @inbounds for (i,j) in enumerate(J)
+        Y[j,i] = oneunit(eltype(Q))
+    end
+    lmul!(Q, Y)
+end
+@inline _getindex(Q::AbstractQ, I::AbstractVector{Int}, J::AbstractVector{Int}) = @inbounds Q[:,J][I,:]
+@inline function _getindex(Q::AbstractQ, ::Colon, j::Int)
+    y = zeros(eltype(Q), size(Q, 2))
+    y[j] = oneunit(eltype(Q))
+    lmul!(Q, y)
+end
+@inline _getindex(Q::AbstractQ, i::Int, j::Int) = @inbounds Q[:,j][i]
+
+# needed because AbstractQ does not subtype AbstractMatrix
+qr(Q::AbstractQ{T}, arg...; kwargs...) where {T} = qr!(Matrix{_qreltype(T)}(Q), arg...; kwargs...)
+lq(Q::AbstractQ{T}, arg...; kwargs...) where {T} = lq!(Matrix{lq_eltype(T)}(Q), arg...; kwargs...)
+hessenberg(Q::AbstractQ{T}) where {T} = hessenberg!(Matrix{eigtype(T)}(Q))
+
+# needed when used interchangeably with AbstractMatrix (analogous to views of ranges)
+view(A::AbstractQ, I...) = getindex(A, I...)
+
+# specialization avoiding the fallback using slow `getindex`
+function copyto!(dest::AbstractMatrix, src::AbstractQ)
+    copyto!(dest, I)
+    lmul!(src, dest)
+end
+# needed to resolve method ambiguities
+function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
+    if perm == (1, 2)
+        copyto!(parent(dest), src)
+    else
+        @assert perm == (2, 1) # there are no other permutations of two indices
+        if T <: Real
+            copyto!(parent(dest), I)
+            lmul!(src', parent(dest))
+        else
+            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
+            tmp = similar(parent(dest))
+            copyto!(tmp, I)
+            rmul!(tmp, src)
+            permutedims!(parent(dest), tmp, (2, 1))
+        end
+    end
+    return dest
+end
+
+function show(io::IO, ::MIME{Symbol("text/plain")}, Q::AbstractQ)
+    print(io, Base.dims2string(size(Q)), ' ', summary(Q))
+end
+
+# multiplication
+(*)(Q::AbstractQ, J::UniformScaling) = Q*J.λ
+function (*)(Q::AbstractQ, b::Number)
+    T = promote_type(eltype(Q), typeof(b))
+    lmul!(convert(AbstractQ{T}, Q), Matrix{T}(b*I, size(Q)))
+end
+function (*)(A::AbstractQ, B::AbstractVecOrMat)
+    T = promote_type(eltype(A), eltype(B))
+    lmul!(convert(AbstractQ{T}, A), copy_similar(B, T))
+end
+
+(*)(J::UniformScaling, Q::AbstractQ) = J.λ*Q
+function (*)(a::Number, Q::AbstractQ)
+    T = promote_type(typeof(a), eltype(Q))
+    rmul!(Matrix{T}(a*I, size(Q)), convert(AbstractQ{T}, Q))
+end
+*(a::AbstractVector, Q::AbstractQ) = reshape(a, length(a), 1) * Q
+function (*)(A::AbstractMatrix, Q::AbstractQ)
+    T = promote_type(eltype(A), eltype(Q))
+    return rmul!(copy_similar(A, T), convert(AbstractQ{T}, Q))
+end
+(*)(u::AdjointAbsVec, Q::AbstractQ) = (Q'u')'
+
+### Q*Q (including adjoints)
+*(Q::AbstractQ, P::AbstractQ) = Q * (P*I)
+
+### mul!
+function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat{T},AbstractQ{T}}) where {T}
+    require_one_based_indexing(C, B)
+    mB = size(B, 1)
+    mC = size(C, 1)
+    if mB < mC
+        inds = CartesianIndices(axes(B))
+        copyto!(view(C, inds), B)
+        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
+        return lmul!(Q, C)
+    else
+        return lmul!(Q, copyto!(C, B))
+    end
+end
+mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat{T}, Q::AbstractQ{T}) where {T} = rmul!(copyto!(C, A), Q)
+mul!(C::AbstractVecOrMat{T}, adjQ::AdjointQ{T}, B::AbstractVecOrMat{T}) where {T} = lmul!(adjQ, copyto!(C, B))
+mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat{T}, adjQ::AdjointQ{T}) where {T} = rmul!(copyto!(C, A), adjQ)
+
+### division
+\(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A
+/(A::AbstractVecOrMat, Q::AbstractQ) = A*Q'
+ldiv!(Q::AbstractQ, A::AbstractVecOrMat) = lmul!(Q', A)
+ldiv!(C::AbstractVecOrMat, Q::AbstractQ, A::AbstractVecOrMat) = mul!(C, Q', A)
+rdiv!(A::AbstractVecOrMat, Q::AbstractQ) = rmul!(A, Q')
+
+logabsdet(Q::AbstractQ) = (d = det(Q); return log(abs(d)), sign(d))
+function logdet(A::AbstractQ)
+    d, s = logabsdet(A)
+    return d + log(s)
+end
+
+###########################################################
+################ Q from QR decompositions #################
+###########################################################
+
+"""
+    QRPackedQ <: LinearAlgebra.AbstractQ
+
+The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
+[`QRPivoted`](@ref) format.
+"""
+struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
+    factors::S
+    τ::C
+
+    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
+        require_one_based_indexing(factors, τ)
+        new{T,S,C}(factors, τ)
+    end
+end
+QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
+    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
+QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
+    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
+           QRPackedQ{T,S,typeof(τ)}(factors, τ), false)
+
+"""
+    QRCompactWYQ <: LinearAlgebra.AbstractQ
+
+The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
+format.
+"""
+struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
+    factors::M
+    T::C
+
+    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
+        require_one_based_indexing(factors, T)
+        new{S,M,C}(factors, T)
+    end
+end
+QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
+    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
+QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
+    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
+           QRCompactWYQ{S,M,typeof(T)}(factors, T), false)
+
+QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
+QRCompactWYQ{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ(convert(AbstractMatrix{S}, Q.factors), convert(AbstractMatrix{S}, Q.T))
+
+# override generic square fallback
+Matrix{T}(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {T,S} =
+    convert(Matrix{T}, lmul!(Q, Matrix{S}(I, size(Q, 1), min(size(Q.factors)...))))
+Matrix(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {S} = Matrix{S}(Q)
+
+convert(::Type{AbstractQ{T}}, Q::QRPackedQ) where {T} = QRPackedQ{T}(Q)
+convert(::Type{AbstractQ{T}}, Q::QRCompactWYQ) where {T} = QRCompactWYQ{T}(Q)
+
+size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
+    size(Q.factors, dim == 2 ? 1 : dim)
+size(Q::Union{QRCompactWYQ,QRPackedQ}) = (n = size(Q.factors, 1); (n, n))
+
+## Multiplication
+### QB
+lmul!(A::QRCompactWYQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
+lmul!(A::QRPackedQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
+function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    mA, nA = size(A.factors)
+    mB, nB = size(B,1), size(B,2)
+    if mA != mB
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+    end
+    Afactors = A.factors
+    @inbounds begin
+        for k = min(mA,nA):-1:1
+            for j = 1:nB
+                vBj = B[k,j]
+                for i = k+1:mB
+                    vBj += conj(Afactors[i,k])*B[i,j]
+                end
+                vBj = A.τ[k]*vBj
+                B[k,j] -= vBj
+                for i = k+1:mB
+                    B[i,j] -= Afactors[i,k]*vBj
+                end
+            end
+        end
+    end
+    B
+end
+
+### QcB
+lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'T', Q.factors, Q.T, B))
+lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'C', Q.factors, Q.T, B))
+lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.ormqr!('L', 'T', Q.factors, Q.τ, B))
+lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.ormqr!('L', 'C', Q.factors, Q.τ, B))
+function lmul!(adjA::AdjointQ{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    A = adjA.Q
+    mA, nA = size(A.factors)
+    mB, nB = size(B,1), size(B,2)
+    if mA != mB
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+    end
+    Afactors = A.factors
+    @inbounds begin
+        for k = 1:min(mA,nA)
+            for j = 1:nB
+                vBj = B[k,j]
+                for i = k+1:mB
+                    vBj += conj(Afactors[i,k])*B[i,j]
+                end
+                vBj = conj(A.τ[k])*vBj
+                B[k,j] -= vBj
+                for i = k+1:mB
+                    B[i,j] -= Afactors[i,k]*vBj
+                end
+            end
+        end
+    end
+    B
+end
+
+### AQ
+rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
+    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
+rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
+    LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
+function rmul!(A::AbstractMatrix, Q::QRPackedQ)
+    require_one_based_indexing(A)
+    mQ, nQ = size(Q.factors)
+    mA, nA = size(A,1), size(A,2)
+    if nA != mQ
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
+    end
+    Qfactors = Q.factors
+    @inbounds begin
+        for k = 1:min(mQ,nQ)
+            for i = 1:mA
+                vAi = A[i,k]
+                for j = k+1:mQ
+                    vAi += A[i,j]*Qfactors[j,k]
+                end
+                vAi = vAi*Q.τ[k]
+                A[i,k] -= vAi
+                for j = k+1:nA
+                    A[i,j] -= vAi*conj(Qfactors[j,k])
+                end
+            end
+        end
+    end
+    A
+end
+
+### AQc
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'T', Q.factors, Q.T, A))
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'C', Q.factors, Q.T, A))
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.ormqr!('R', 'T', Q.factors, Q.τ, A))
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.ormqr!('R', 'C', Q.factors, Q.τ, A))
+function rmul!(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:QRPackedQ})
+    require_one_based_indexing(A)
+    Q = adjQ.Q
+    mQ, nQ = size(Q.factors)
+    mA, nA = size(A,1), size(A,2)
+    if nA != mQ
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
+    end
+    Qfactors = Q.factors
+    @inbounds begin
+        for k = min(mQ,nQ):-1:1
+            for i = 1:mA
+                vAi = A[i,k]
+                for j = k+1:mQ
+                    vAi += A[i,j]*Qfactors[j,k]
+                end
+                vAi = vAi*conj(Q.τ[k])
+                A[i,k] -= vAi
+                for j = k+1:nA
+                    A[i,j] -= vAi*conj(Qfactors[j,k])
+                end
+            end
+        end
+    end
+    A
+end
+
+det(Q::QRPackedQ) = _det_tau(Q.τ)
+det(Q::QRCompactWYQ) =
+    prod(i -> _det_tau(_diagview(Q.T[:, i:min(i + size(Q.T, 1), size(Q.T, 2))])),
+         1:size(Q.T, 1):size(Q.T, 2))
+
+_diagview(A) = @view A[diagind(A)]
+
+# Compute `det` from the number of Householder reflections.  Handle
+# the case `Q.τ` contains zeros.
+_det_tau(τs::AbstractVector{<:Real}) =
+    isodd(count(!iszero, τs)) ? -one(eltype(τs)) : one(eltype(τs))
+
+# In complex case, we need to compute the non-unit eigenvalue `λ = 1 - c*τ`
+# (where `c = v'v`) of each Householder reflector.  As we know that the
+# reflector must have the determinant of 1, it must satisfy `abs2(λ) == 1`.
+# Combining this with the constraint `c > 0`, it turns out that the eigenvalue
+# (hence the determinant) can be computed as `λ = -sign(τ)^2`.
+# See: https://github.com/JuliaLang/julia/pull/32887#issuecomment-521935716
+_det_tau(τs) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, τs)
+
+###########################################################
+######## Q from Hessenberg decomposition ##################
+###########################################################
+
+"""
+    HessenbergQ <: AbstractQ
+
+Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns
+a `HessenbergQ` object, which is an implicit representation of the unitary
+matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`.
+This `F.Q` object can be efficiently multiplied by matrices or vectors,
+and can be converted to an ordinary matrix type with `Matrix(F.Q)`.
+"""
+struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T}
+    uplo::Char
+    factors::S
+    τ::W
+    function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym}
+        new(uplo, factors, τ)
+    end
+end
+HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ)
+HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ)
+
+size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
+size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
+
+# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra)
+const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym}
+
+## reconstruct the original matrix
+Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ))
+Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ))
+
+lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
+    LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
+lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
+rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
+
+lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
+    LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
+lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
+rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
+
+lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
+lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
+
+# flexible left-multiplication (and adjoint right-multiplication)
+function (*)(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, b::AbstractVector)
+    T = promote_type(eltype(Q), eltype(b))
+    if size(Q.factors, 1) == length(b)
+        bnew = copy_similar(b, T)
+    elseif size(Q.factors, 2) == length(b)
+        bnew = [b; zeros(T, size(Q.factors, 1) - length(b))]
+    else
+        throw(DimensionMismatch("vector must have length either $(size(Q.factors, 1)) or $(size(Q.factors, 2))"))
+    end
+    lmul!(convert(AbstractQ{T}, Q), bnew)
+end
+function (*)(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractMatrix)
+    T = promote_type(eltype(Q), eltype(B))
+    if size(Q.factors, 1) == size(B, 1)
+        Bnew = copy_similar(B, T)
+    elseif size(Q.factors, 2) == size(B, 1)
+        Bnew = [B; zeros(T, size(Q.factors, 1) - size(B,1), size(B, 2))]
+    else
+        throw(DimensionMismatch("first dimension of matrix must have size either $(size(Q.factors, 1)) or $(size(Q.factors, 2))"))
+    end
+    lmul!(convert(AbstractQ{T}, Q), Bnew)
+end
+function (*)(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}})
+    Q = adjQ.Q
+    T = promote_type(eltype(A), eltype(adjQ))
+    adjQQ = convert(AbstractQ{T}, adjQ)
+    if size(A, 2) == size(Q.factors, 1)
+        AA = copy_similar(A, T)
+        return rmul!(AA, adjQQ)
+    elseif size(A, 2) == size(Q.factors, 2)
+        return rmul!([A zeros(T, size(A, 1), size(Q.factors, 1) - size(Q.factors, 2))], adjQQ)
+    else
+        throw(DimensionMismatch("matrix A has dimensions $(size(A)) but Q-matrix B has dimensions $(size(adjQ))"))
+    end
+end
+(*)(u::AdjointAbsVec, Q::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) = (Q'u')'
+
+det(Q::HessenbergQ) = _det_tau(Q.τ)
+
+###########################################################
+################ Q from LQ decomposition ##################
+###########################################################
+
+struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
+    factors::S
+    τ::C
+end
+
+LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
+@deprecate(AbstractMatrix{T}(Q::LQPackedQ) where {T},
+    convert(AbstractQ{T}, Q),
+    false)
+Matrix{T}(A::LQPackedQ) where {T} = convert(Matrix{T}, LAPACK.orglq!(copy(A.factors), A.τ))
+convert(::Type{AbstractQ{T}}, Q::LQPackedQ) where {T} = LQPackedQ{T}(Q)
+
+# size(Q::LQPackedQ) yields the shape of Q's square form
+size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n)
+
+## Multiplication
+### QB / QcB
+lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
+lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+    (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
+lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+    (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
+
+function (*)(adjA::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVector)
+    A = adjA.Q
+    T = promote_type(eltype(A), eltype(B))
+    if length(B) == size(A.factors, 2)
+        C = copy_similar(B, T)
+    elseif length(B) == size(A.factors, 1)
+        C = [B; zeros(T, size(A.factors, 2) - size(A.factors, 1), size(B, 2))]
+    else
+        throw(DimensionMismatch("length of B, $(length(B)), must equal one of the dimensions of A, $(size(A))"))
+    end
+    lmul!(convert(AbstractQ{T}, adjA), C)
+end
+function (*)(adjA::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractMatrix)
+    A = adjA.Q
+    T = promote_type(eltype(A), eltype(B))
+    if size(B,1) == size(A.factors,2)
+        C = copy_similar(B, T)
+    elseif size(B,1) == size(A.factors,1)
+        C = [B; zeros(T, size(A.factors, 2) - size(A.factors, 1), size(B, 2))]
+    else
+        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of A, $(size(A))"))
+    end
+    lmul!(convert(AbstractQ{T}, adjA), C)
+end
+
+# in-place right-application of LQPackedQs
+# these methods require that the applied-to matrix's (A's) number of columns
+# match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place
+# operation, and the underlying LAPACK routine (ormlq) treats the implicit Q
+# as its (nQ-by-nQ) square form)
+rmul!(A::StridedMatrix{T}, B::LQPackedQ{T}) where {T<:BlasFloat} =
+    LAPACK.ormlq!('R', 'N', B.factors, B.τ, A)
+rmul!(A::StridedMatrix{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} =
+    (B = adjB.Q; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A))
+rmul!(A::StridedMatrix{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} =
+    (B = adjB.Q; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A))
+
+# out-of-place right application of LQPackedQs
+#
+# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension
+# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q),
+# and if so effectively apply Q's square form to A without additional shenanigans; and
+# (2) if the preceding dimensions do not match, check whether the appropriate dimension of
+# A instead matches the number of rows of the matrix of which Q is a factor (i.e.
+# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending
+# A as necessary for check (1) to pass (if possible) and then applying Q's square form
+#
+function (*)(A::AbstractVector, Q::LQPackedQ)
+    T = promote_type(eltype(A), eltype(Q))
+    if 1 == size(Q.factors, 2)
+        C = copy_similar(A, T)
+    elseif 1 == size(Q.factors, 1)
+        C = zeros(T, length(A), size(Q.factors, 2))
+        copyto!(C, 1, A, 1, length(A))
+    else
+        _rightappdimmismatch("columns")
+    end
+    return rmul!(C, convert(AbstractQ{T}, Q))
+end
+function (*)(A::AbstractMatrix, Q::LQPackedQ)
+    T = promote_type(eltype(A), eltype(Q))
+    if size(A, 2) == size(Q.factors, 2)
+        C = copy_similar(A, T)
+    elseif size(A, 2) == size(Q.factors, 1)
+        C = zeros(T, size(A, 1), size(Q.factors, 2))
+        copyto!(C, 1, A, 1, length(A))
+    else
+        _rightappdimmismatch("columns")
+    end
+    return rmul!(C, convert(AbstractQ{T}, Q))
+end
+function (*)(adjA::AdjointAbsMat, Q::LQPackedQ)
+    A = adjA.parent
+    T = promote_type(eltype(A), eltype(Q))
+    if size(A, 1) == size(Q.factors, 2)
+        C = copy_similar(adjA, T)
+    elseif size(A, 1) == size(Q.factors, 1)
+        C = zeros(T, size(A, 2), size(Q.factors, 2))
+        adjoint!(view(C, :, 1:size(A, 1)), A)
+    else
+        _rightappdimmismatch("rows")
+    end
+    return rmul!(C, convert(AbstractQ{T}, Q))
+end
+(*)(u::AdjointAbsVec, Q::LQPackedQ) = (Q'u')'
+
+_rightappdimmismatch(rowsorcols) =
+    throw(DimensionMismatch(string("the number of $(rowsorcols) of the matrix on the left ",
+        "must match either (1) the number of columns of the (LQPackedQ) matrix on the right ",
+        "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ",
+        "(the factorization's originating matrix's number of rows)")))
+
+# In LQ factorization, `Q` is expressed as the product of the adjoint of the
+# reflectors.  Thus, `det` has to be conjugated.
+det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index 1e9687ef0f31a..2f5c5508e0ee3 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -1,8 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Base: @propagate_inbounds
-import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, convert, similar
-
 ### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
 
 # note that Adjoint and Transpose must be able to wrap not only vectors and matrices
@@ -12,7 +9,7 @@ import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, c
     Adjoint
 
 Lazy wrapper type for an adjoint view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`, but also some `Factorization`, for instance.
+usually an `AbstractVector`/`AbstractMatrix`.
 Usually, the `Adjoint` constructor should not be called directly, use [`adjoint`](@ref)
 instead. To materialize the view use [`copy`](@ref).
 
@@ -39,7 +36,7 @@ end
     Transpose
 
 Lazy wrapper type for a transpose view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`, but also some `Factorization`, for instance.
+usually an `AbstractVector`/`AbstractMatrix`.
 Usually, the `Transpose` constructor should not be called directly, use [`transpose`](@ref)
 instead. To materialize the view use [`copy`](@ref).
 
@@ -67,6 +64,42 @@ end
 Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A)
 Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A)
 
+"""
+    adj_or_trans(::AbstractArray) -> adjoint|transpose|identity
+    adj_or_trans(::Type{<:AbstractArray}) -> adjoint|transpose|identity
+
+Return [`adjoint`](@ref) from an `Adjoint` type or object and
+[`transpose`](@ref) from a `Transpose` type or object. Otherwise,
+return [`identity`](@ref). Note that `Adjoint` and `Transpose` have
+to be the outer-most wrapper object for a non-`identity` function to be
+returned.
+"""
+adj_or_trans(::T) where {T<:AbstractArray} = adj_or_trans(T)
+adj_or_trans(::Type{<:AbstractArray}) = identity
+adj_or_trans(::Type{<:Adjoint}) = adjoint
+adj_or_trans(::Type{<:Transpose}) = transpose
+
+"""
+    inplace_adj_or_trans(::AbstractArray) -> adjoint!|transpose!|copyto!
+    inplace_adj_or_trans(::Type{<:AbstractArray}) -> adjoint!|transpose!|copyto!
+
+Return [`adjoint!`](@ref) from an `Adjoint` type or object and
+[`transpose!`](@ref) from a `Transpose` type or object. Otherwise,
+return [`copyto!`](@ref). Note that `Adjoint` and `Transpose` have
+to be the outer-most wrapper object for a non-`identity` function to be
+returned.
+"""
+inplace_adj_or_trans(::T) where {T <: AbstractArray} = inplace_adj_or_trans(T)
+inplace_adj_or_trans(::Type{<:AbstractArray}) = copyto!
+inplace_adj_or_trans(::Type{<:Adjoint}) = adjoint!
+inplace_adj_or_trans(::Type{<:Transpose}) = transpose!
+
+adj_or_trans_char(::T) where {T<:AbstractArray} = adj_or_trans_char(T)
+adj_or_trans_char(::Type{<:AbstractArray}) = 'N'
+adj_or_trans_char(::Type{<:Adjoint}) = 'C'
+adj_or_trans_char(::Type{<:Adjoint{<:Real}}) = 'T'
+adj_or_trans_char(::Type{<:Transpose}) = 'T'
+
 Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent)
 Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent))
 
@@ -291,6 +324,9 @@ wrapperop(_) = identity
 wrapperop(::Adjoint) = adjoint
 wrapperop(::Transpose) = transpose
 
+# the following fallbacks can be removed if Adjoint/Transpose are restricted to AbstractVecOrMat
+size(A::AdjOrTrans) = reverse(size(A.parent))
+axes(A::AdjOrTrans) = reverse(axes(A.parent))
 # AbstractArray interface, basic definitions
 length(A::AdjOrTrans) = length(A.parent)
 size(v::AdjOrTransAbsVec) = (1, length(v.parent))
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 90f5c03f7fcfb..dd3783d67b0cf 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -405,18 +405,14 @@ function ==(A::Bidiagonal, B::Bidiagonal)
     end
 end
 
+const BandedMatrix = Union{Bidiagonal,Diagonal,Tridiagonal,SymTridiagonal} # or BiDiTriSym
 const BiTriSym = Union{Bidiagonal,Tridiagonal,SymTridiagonal}
 const BiTri = Union{Bidiagonal,Tridiagonal}
-@inline mul!(C::AbstractVector, A::BiTriSym, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::AbstractMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractMatrix, B::BiTriSym, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::BiTriSym, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Diagonal, B::BiTriSym, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractVector, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::AbstractMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
 
 function check_A_mul_B!_sizes(C, A, B)
     mA, nA = size(A)
@@ -743,52 +739,36 @@ function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
     end
     return c
 end
-ldiv!(A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-ldiv!(A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-ldiv!(c::AbstractVecOrMat, A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
-    (_rdiv!(transpose(c), transpose(b), transpose(A)); return c)
-ldiv!(c::AbstractVecOrMat, A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
-    (_rdiv!(adjoint(c), adjoint(b), adjoint(A)); return c)
+ldiv!(A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
+ldiv!(c::AbstractVecOrMat, A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
+    (t = adj_or_trans(A); _rdiv!(t(c), t(b), t(A)); return c)
 
 ### Generic promotion methods and fallbacks
 \(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(_initarray(\, eltype(A), eltype(B), B), A, B)
-\(tA::Transpose{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(tA) \ B
-\(adjA::Adjoint{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(adjA) \ B
+\(xA::AdjOrTrans{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(xA) \ B
 
 ### Triangular specializations
-function \(B::Bidiagonal, U::UpperTriangular)
-    A = ldiv!(_initarray(\, eltype(B), eltype(U), U), B, U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(B::Bidiagonal, U::UnitUpperTriangular)
-    A = ldiv!(_initarray(\, eltype(B), eltype(U), U), B, U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(B::Bidiagonal, L::LowerTriangular)
-    A = ldiv!(_initarray(\, eltype(B), eltype(L), L), B, L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:UpperTriangular, :UnitUpperTriangular)
+    @eval function \(B::Bidiagonal, U::$tri)
+        A = ldiv!(_initarray(\, eltype(B), eltype(U), U), B, U)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
+    @eval function \(U::$tri, B::Bidiagonal)
+        A = ldiv!(_initarray(\, eltype(U), eltype(B), U), U, B)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
 end
-function \(B::Bidiagonal, L::UnitLowerTriangular)
-    A = ldiv!(_initarray(\, eltype(B), eltype(L), L), B, L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:LowerTriangular, :UnitLowerTriangular)
+    @eval function \(B::Bidiagonal, L::$tri)
+        A = ldiv!(_initarray(\, eltype(B), eltype(L), L), B, L)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
+    @eval function \(L::$tri, B::Bidiagonal)
+        A = ldiv!(_initarray(\, eltype(L), eltype(B), L), L, B)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
 end
 
-function \(U::UpperTriangular, B::Bidiagonal)
-    A = ldiv!(U, copy_similar(B, _init_eltype(\, eltype(U), eltype(B))))
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(U::UnitUpperTriangular, B::Bidiagonal)
-    A = ldiv!(U, copy_similar(B, _init_eltype(\, eltype(U), eltype(B))))
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(L::LowerTriangular, B::Bidiagonal)
-    A = ldiv!(L, copy_similar(B, _init_eltype(\, eltype(L), eltype(B))))
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
-function \(L::UnitLowerTriangular, B::Bidiagonal)
-    A = ldiv!(L, copy_similar(B, _init_eltype(\, eltype(L), eltype(B))))
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
 ### Diagonal specialization
 function \(B::Bidiagonal, D::Diagonal)
     A = ldiv!(_initarray(\, eltype(B), eltype(D), D), B, D)
@@ -837,48 +817,34 @@ function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
     C
 end
 rdiv!(A::AbstractMatrix, B::Bidiagonal) = @inline _rdiv!(A, A, B)
-rdiv!(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
-rdiv!(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
-_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) =
-    (ldiv!(adjoint(C), adjoint(B), adjoint(A)); return C)
-_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) =
-    (ldiv!(transpose(C), transpose(B), transpose(A)); return C)
+rdiv!(A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
+_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) =
+    (t = adj_or_trans(B); ldiv!(t(C), t(B), t(A)); return C)
 
 /(A::AbstractMatrix, B::Bidiagonal) = _rdiv!(_initarray(/, eltype(A), eltype(B), A), A, B)
 
 ### Triangular specializations
-function /(U::UpperTriangular, B::Bidiagonal)
-    A = _rdiv!(_initarray(/, eltype(U), eltype(B), U), U, B)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(U::UnitUpperTriangular, B::Bidiagonal)
-    A = _rdiv!(_initarray(/, eltype(U), eltype(B), U), U, B)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(L::LowerTriangular, B::Bidiagonal)
-    A = _rdiv!(_initarray(/, eltype(L), eltype(B), L), L, B)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
-function /(L::UnitLowerTriangular, B::Bidiagonal)
-    A = _rdiv!(_initarray(/, eltype(L), eltype(B), L), L, B)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
-function /(B::Bidiagonal, U::UpperTriangular)
-    A = rdiv!(copy_similar(B, _init_eltype(/, eltype(B), eltype(U))), U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(B::Bidiagonal, U::UnitUpperTriangular)
-    A = rdiv!(copy_similar(B, _init_eltype(/, eltype(B), eltype(U))), U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(B::Bidiagonal, L::LowerTriangular)
-    A = rdiv!(copy_similar(B, _init_eltype(/, eltype(B), eltype(L))), L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:UpperTriangular, :UnitUpperTriangular)
+    @eval function /(U::$tri, B::Bidiagonal)
+        A = _rdiv!(_initarray(/, eltype(U), eltype(B), U), U, B)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
+    @eval function /(B::Bidiagonal, U::$tri)
+        A = _rdiv!(_initarray(/, eltype(B), eltype(U), U), B, U)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
 end
-function /(B::Bidiagonal, L::UnitLowerTriangular)
-    A = rdiv!(copy_similar(B, _init_eltype(/, eltype(B), eltype(L))), L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:LowerTriangular, :UnitLowerTriangular)
+    @eval function /(L::$tri, B::Bidiagonal)
+        A = _rdiv!(_initarray(/, eltype(L), eltype(B), L), L, B)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
+    @eval function /(B::Bidiagonal, L::$tri)
+        A = _rdiv!(_initarray(/, eltype(B), eltype(L), L), B, L)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
 end
+
 ### Diagonal specialization
 function /(D::Diagonal, B::Bidiagonal)
     A = _rdiv!(_initarray(/, eltype(D), eltype(B), D), D, B)
@@ -898,8 +864,8 @@ end
 factorize(A::Bidiagonal) = A
 function inv(B::Bidiagonal{T}) where T
     n = size(B, 1)
-    dest = zeros(typeof(oneunit(T)\one(T)), (n, n))
-    ldiv!(dest, B, Diagonal{typeof(one(T)\one(T))}(I, n))
+    dest = zeros(typeof(inv(oneunit(T))), (n, n))
+    ldiv!(dest, B, Diagonal{typeof(one(T)/one(T))}(I, n))
     return B.uplo == 'U' ? UpperTriangular(dest) : LowerTriangular(dest)
 end
 
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index 560c29cf89508..82f138db7d7b9 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -206,7 +206,7 @@ function _chol!(A::AbstractMatrix, ::Type{UpperTriangular})
             A[k,k] = Akk
             Akk, info = _chol!(Akk, UpperTriangular)
             if info != 0
-                return UpperTriangular(A), info
+                return UpperTriangular(A), convert(BlasInt, k)
             end
             A[k,k] = Akk
             AkkInv = inv(copy(Akk'))
@@ -233,7 +233,7 @@ function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
             A[k,k] = Akk
             Akk, info = _chol!(Akk, LowerTriangular)
             if info != 0
-                return LowerTriangular(A), info
+                return LowerTriangular(A), convert(BlasInt, k)
             end
             A[k,k] = Akk
             AkkInv = inv(Akk)
@@ -251,11 +251,12 @@ function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
 end
 
 ## Numbers
-function _chol!(x::Number, uplo)
+function _chol!(x::Number, _)
     rx = real(x)
+    iszero(rx) && return (rx, convert(BlasInt, 1))
     rxr = sqrt(abs(rx))
     rval =  convert(promote_type(typeof(x), typeof(rxr)), rxr)
-    rx == abs(x) ? (rval, convert(BlasInt, 0)) : (rval, convert(BlasInt, 1))
+    return (rval, convert(BlasInt, rx != abs(x)))
 end
 
 ## for StridedMatrices, check that matrix is symmetric/Hermitian
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index 7f5e44382f5c5..56c5954cc28fe 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -907,14 +907,12 @@ sqrt(A::TransposeAbsMat) = transpose(sqrt(parent(A)))
 
 function inv(A::StridedMatrix{T}) where T
     checksquare(A)
-    S = typeof((oneunit(T)*zero(T) + oneunit(T)*zero(T))/oneunit(T))
-    AA = convert(AbstractArray{S}, A)
-    if istriu(AA)
-        Ai = triu!(parent(inv(UpperTriangular(AA))))
-    elseif istril(AA)
-        Ai = tril!(parent(inv(LowerTriangular(AA))))
+    if istriu(A)
+        Ai = triu!(parent(inv(UpperTriangular(A))))
+    elseif istril(A)
+        Ai = tril!(parent(inv(LowerTriangular(A))))
     else
-        Ai = inv!(lu(AA))
+        Ai = inv!(lu(A))
         Ai = convert(typeof(parent(Ai)), Ai)
     end
     return Ai
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index 03c5a2bbdeba4..b9fa98a9b12b3 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -96,6 +96,11 @@ julia> diag(A, 2)
 ```
 """
 Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
+Diagonal{T}(A::AbstractMatrix) where T = Diagonal{T}(diag(A))
+function convert(::Type{T}, A::AbstractMatrix) where T<:Diagonal
+    checksquare(A)
+    isdiag(A) ? T(A) : throw(InexactError(:convert, T, A))
+end
 
 Diagonal(D::Diagonal) = D
 Diagonal{T}(D::Diagonal{T}) where {T} = D
@@ -281,26 +286,22 @@ end
 rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
 lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
 
-function *(A::AdjOrTransAbsMat, D::Diagonal)
+function (*)(A::AdjOrTransAbsMat, D::Diagonal)
     Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
     rmul!(Ac, D)
 end
-
-*(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
-    rmul!(Array{promote_type(eltype(D), eltype(adjQ))}(D), adjQ)
-
-function *(D::Diagonal, A::AdjOrTransAbsMat)
+function (*)(D::Diagonal, A::AdjOrTransAbsMat)
     Ac = copy_similar(A, promote_op(*, eltype(A), eltype(D.diag)))
     lmul!(D, Ac)
 end
 
-@inline function __muldiag!(out, D::Diagonal, B, alpha, beta)
-    require_one_based_indexing(B)
-    require_one_based_indexing(out)
+function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+    require_one_based_indexing(out, B)
+    alpha, beta = _add.alpha, _add.beta
     if iszero(alpha)
         _rmul_or_fill!(out, beta)
     else
-        if iszero(beta)
+        if bis0
             @inbounds for j in axes(B, 2)
                 @simd for i in axes(B, 1)
                     out[i,j] = D.diag[i] * B[i,j] * alpha
@@ -316,13 +317,13 @@ end
     end
     return out
 end
-@inline function __muldiag!(out, A, D::Diagonal, alpha, beta)
-    require_one_based_indexing(A)
-    require_one_based_indexing(out)
+function __muldiag!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+    require_one_based_indexing(out, A)
+    alpha, beta = _add.alpha, _add.beta
     if iszero(alpha)
         _rmul_or_fill!(out, beta)
     else
-        if iszero(beta)
+        if bis0
             @inbounds for j in axes(A, 2)
                 dja = D.diag[j] * alpha
                 @simd for i in axes(A, 1)
@@ -340,13 +341,14 @@ end
     end
     return out
 end
-@inline function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, alpha, beta)
+function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
     d1 = D1.diag
     d2 = D2.diag
+    alpha, beta = _add.alpha, _add.beta
     if iszero(alpha)
         _rmul_or_fill!(out.diag, beta)
     else
-        if iszero(beta)
+        if bis0
             @inbounds @simd for i in eachindex(out.diag)
                 out.diag[i] = d1[i] * d2[i] * alpha
             end
@@ -358,8 +360,9 @@ end
     end
     return out
 end
-@inline function __muldiag!(out, D1::Diagonal, D2::Diagonal, alpha, beta)
+function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
     require_one_based_indexing(out)
+    alpha, beta = _add.alpha, _add.beta
     mA = size(D1, 1)
     d1 = D1.diag
     d2 = D2.diag
@@ -372,42 +375,38 @@ end
     return out
 end
 
-@inline function _muldiag!(out, A, B, alpha, beta)
+function _mul_diag!(out, A, B, _add)
     _muldiag_size_check(out, A, B)
-    __muldiag!(out, A, B, alpha, beta)
+    __muldiag!(out, A, B, _add)
     return out
 end
 
+_mul!(out::AbstractVecOrMat, D::Diagonal, V::AbstractVector, _add) =
+    _mul_diag!(out, D, V, _add)
+_mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, _add) =
+    _mul_diag!(out, D, B, _add)
+_mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, _add) =
+    _mul_diag!(out, A, D, _add)
+_mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, _add) =
+    _mul_diag!(C, Da, Db, _add)
+_mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, _add) =
+    _mul_diag!(C, Da, Db, _add)
+
 function (*)(Da::Diagonal, A::AbstractMatrix, Db::Diagonal)
     _muldiag_size_check(Da, A)
     _muldiag_size_check(A, Db)
     return broadcast(*, Da.diag, A, permutedims(Db.diag))
 end
 
-# Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
-@inline mul!(out::AbstractVector, D::Diagonal, V::AbstractVector, alpha::Number, beta::Number) =
-    _muldiag!(out, D, V, alpha, beta)
-@inline mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, alpha::Number, beta::Number) =
-    _muldiag!(out, D, B, alpha, beta)
-@inline mul!(out::AbstractMatrix, D::Diagonal, B::Adjoint{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
-@inline mul!(out::AbstractMatrix, D::Diagonal, B::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
-
-@inline mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, alpha::Number, beta::Number) =
-    _muldiag!(out, A, D, alpha, beta)
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, D::Diagonal,
-             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, D::Diagonal,
-             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
-@inline mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
-    _muldiag!(C, Da, Db, alpha, beta)
-
-mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
-    _muldiag!(C, Da, Db, alpha, beta)
+function (*)(Da::Diagonal, Db::Diagonal, Dc::Diagonal)
+    _muldiag_size_check(Da, Db)
+    _muldiag_size_check(Db, Dc)
+    return Diagonal(Da.diag .* Db.diag .* Dc.diag)
+end
 
 /(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D))), A, D)
 /(A::HermOrSym, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D)), size(A)), A, D)
+
 rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
 # avoid copy when possible via internal 3-arg backend
 function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
@@ -570,22 +569,23 @@ for Tri in (:UpperTriangular, :LowerTriangular)
     # 3-arg ldiv!
     @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data))
     @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag))
-    # 3-arg mul!: invoke 5-arg mul! rather than lmul!
-    @eval mul!(C::$Tri, A::Union{$Tri,$UTri}, D::Diagonal) = mul!(C, A, D, true, false)
+    # 3-arg mul! is disambiguated in special.jl
     # 5-arg mul!
-    @eval @inline mul!(C::$Tri, D::Diagonal, A::$Tri, α::Number, β::Number) = $Tri(mul!(C.data, D, A.data, α, β))
-    @eval @inline function mul!(C::$Tri, D::Diagonal, A::$UTri, α::Number, β::Number)
+    @eval _mul!(C::$Tri, D::Diagonal, A::$Tri, _add) = $Tri(mul!(C.data, D, A.data, _add.alpha, _add.beta))
+    @eval function _mul!(C::$Tri, D::Diagonal, A::$UTri, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+        α, β = _add.alpha, _add.beta
         iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = iszero(β) ? nothing : diag(C)
+        diag′ = bis0 ? nothing : diag(C)
         data = mul!(C.data, D, A.data, α, β)
-        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+        $Tri(_setdiag!(data, _add, D.diag, diag′))
     end
-    @eval @inline mul!(C::$Tri, A::$Tri, D::Diagonal, α::Number, β::Number) = $Tri(mul!(C.data, A.data, D, α, β))
-    @eval @inline function mul!(C::$Tri, A::$UTri, D::Diagonal, α::Number, β::Number)
+    @eval _mul!(C::$Tri, A::$Tri, D::Diagonal, _add) = $Tri(mul!(C.data, A.data, D, _add.alpha, _add.beta))
+    @eval function _mul!(C::$Tri, A::$UTri, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+        α, β = _add.alpha, _add.beta
         iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = iszero(β) ? nothing : diag(C)
+        diag′ = bis0 ? nothing : diag(C)
         data = mul!(C.data, A.data, D, α, β)
-        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+        $Tri(_setdiag!(data, _add, D.diag, diag′))
     end
 end
 
@@ -668,7 +668,8 @@ end
 conj(D::Diagonal) = Diagonal(conj(D.diag))
 transpose(D::Diagonal{<:Number}) = D
 transpose(D::Diagonal) = Diagonal(transpose.(D.diag))
-adjoint(D::Diagonal{<:Number}) = conj(D)
+adjoint(D::Diagonal{<:Number}) = Diagonal(vec(adjoint(D.diag)))
+adjoint(D::Diagonal{<:Number,<:Base.ReshapedArray{<:Number,1,<:Adjoint}}) = Diagonal(adjoint(parent(D.diag)))
 adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
 permutedims(D::Diagonal) = D
 permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
index 14676ad6df6eb..185061b0a3a7d 100644
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ b/stdlib/LinearAlgebra/src/eigen.jl
@@ -182,7 +182,9 @@ end
 
 Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
 which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
+matrix `F.vectors`. This corresponds to solving an eigenvalue problem of the form
+`Ax =  λx`, where `A` is a matrix, `x` is an eigenvector, and `λ` is an eigenvalue.
+(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
 
 Iterating the decomposition produces the components `F.values` and `F.vectors`.
 
@@ -440,7 +442,11 @@ det(A::Eigen) = prod(A.values)
 function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
     issymmetric(A) && isposdef(B) && return eigen!(Symmetric(A), Symmetric(B), sortby=sortby)
     n = size(A, 1)
-    alphar, alphai, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alphar, alphai, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    else
+        alphar, alphai, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
+    end
     iszero(alphai) && return GeneralizedEigen(sorteig!(alphar ./ beta, vr, sortby)...)
 
     vecs = zeros(Complex{T}, n, n)
@@ -462,7 +468,11 @@ end
 
 function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
     ishermitian(A) && isposdef(B) && return eigen!(Hermitian(A), Hermitian(B), sortby=sortby)
-    alpha, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alpha, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    else
+        alpha, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
+    end
     return GeneralizedEigen(sorteig!(alpha./beta, vr, sortby)...)
 end
 
@@ -472,6 +482,8 @@ end
 Compute the generalized eigenvalue decomposition of `A` and `B`, returning a
 [`GeneralizedEigen`](@ref) factorization object `F` which contains the generalized eigenvalues in
 `F.values` and the generalized eigenvectors in the columns of the matrix `F.vectors`.
+This corresponds to solving a generalized eigenvalue problem of the form
+`Ax =  λBx`, where `A, B` are matrices, `x` is an eigenvector, and `λ` is an eigenvalue.
 (The `k`th generalized eigenvector can be obtained from the slice `F.vectors[:, k]`.)
 
 Iterating the decomposition produces the components `F.values` and `F.vectors`.
@@ -565,12 +577,20 @@ julia> B
 """
 function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
     issymmetric(A) && isposdef(B) && return sorteig!(eigvals!(Symmetric(A), Symmetric(B)), sortby)
-    alphar, alphai, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alphar, alphai, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    else
+        alphar, alphai, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
+    end
     return sorteig!((iszero(alphai) ? alphar : complex.(alphar, alphai))./beta, sortby)
 end
 function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
     ishermitian(A) && isposdef(B) && return sorteig!(eigvals!(Hermitian(A), Hermitian(B)), sortby)
-    alpha, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alpha, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    else
+        alpha, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
+    end
     return sorteig!(alpha./beta, sortby)
 end
 
diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
index 44668bfe9c212..8c35a23e6b6d5 100644
--- a/stdlib/LinearAlgebra/src/factorization.jl
+++ b/stdlib/LinearAlgebra/src/factorization.jl
@@ -11,9 +11,58 @@ matrix factorizations.
 """
 abstract type Factorization{T} end
 
+"""
+    AdjointFactorization
+
+Lazy wrapper type for the adjoint of the underlying `Factorization` object. Usually, the
+`AdjointFactorization` constructor should not be called directly, use
+[`adjoint(:: Factorization)`](@ref) instead.
+"""
+struct AdjointFactorization{T,S<:Factorization} <: Factorization{T}
+    parent::S
+end
+AdjointFactorization(F::Factorization) =
+    AdjointFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
+
+"""
+    TransposeFactorization
+
+Lazy wrapper type for the transpose of the underlying `Factorization` object. Usually, the
+`TransposeFactorization` constructor should not be called directly, use
+[`transpose(:: Factorization)`](@ref) instead.
+"""
+struct TransposeFactorization{T,S<:Factorization} <: Factorization{T}
+    parent::S
+end
+TransposeFactorization(F::Factorization) =
+    TransposeFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
+
 eltype(::Type{<:Factorization{T}}) where {T} = T
-size(F::Adjoint{<:Any,<:Factorization}) = reverse(size(parent(F)))
-size(F::Transpose{<:Any,<:Factorization}) = reverse(size(parent(F)))
+size(F::AdjointFactorization) = reverse(size(parent(F)))
+size(F::TransposeFactorization) = reverse(size(parent(F)))
+size(F::Union{AdjointFactorization,TransposeFactorization}, d::Integer) = d in (1, 2) ? size(F)[d] : 1
+parent(F::Union{AdjointFactorization,TransposeFactorization}) = F.parent
+
+"""
+    adjoint(F::Factorization)
+
+Lazy adjoint of the factorization `F`. By default, returns an
+[`AdjointFactorization`](@ref) wrapper.
+"""
+adjoint(F::Factorization) = AdjointFactorization(F)
+"""
+    transpose(F::Factorization)
+
+Lazy transpose of the factorization `F`. By default, returns a [`TransposeFactorization`](@ref),
+except for `Factorization`s with real `eltype`, in which case returns an [`AdjointFactorization`](@ref).
+"""
+transpose(F::Factorization) = TransposeFactorization(F)
+transpose(F::Factorization{<:Real}) = AdjointFactorization(F)
+adjoint(F::AdjointFactorization) = F.parent
+transpose(F::TransposeFactorization) = F.parent
+transpose(F::AdjointFactorization{<:Real}) = F.parent
+conj(A::TransposeFactorization) = adjoint(A.parent)
+conj(A::AdjointFactorization) = transpose(A.parent)
 
 checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
 checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
@@ -60,64 +109,77 @@ convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)::T
 
 ### General promotion rules
 Factorization{T}(F::Factorization{T}) where {T} = F
-# This is a bit odd since the return is not a Factorization but it works well in generic code
-Factorization{T}(A::Adjoint{<:Any,<:Factorization}) where {T} =
+# This no longer looks odd since the return _is_ a Factorization!
+Factorization{T}(A::AdjointFactorization) where {T} =
     adjoint(Factorization{T}(parent(A)))
+Factorization{T}(A::TransposeFactorization) where {T} =
+    transpose(Factorization{T}(parent(A)))
 inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
 
 Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
 Base.:(==)(  F::T, G::T) where {T<:Factorization} = all(f -> getfield(F, f) == getfield(G, f), 1:nfields(F))
 Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F, f), getfield(G, f)), 1:nfields(F))::Bool
 
-function Base.show(io::IO, x::Adjoint{<:Any,<:Factorization})
-    print(io, "Adjoint of ")
+function Base.show(io::IO, x::AdjointFactorization)
+    print(io, "adjoint of ")
     show(io, parent(x))
 end
-function Base.show(io::IO, x::Transpose{<:Any,<:Factorization})
-    print(io, "Transpose of ")
+function Base.show(io::IO, x::TransposeFactorization)
+    print(io, "transpose of ")
     show(io, parent(x))
 end
-function Base.show(io::IO, ::MIME"text/plain", x::Adjoint{<:Any,<:Factorization})
-    print(io, "Adjoint of ")
+function Base.show(io::IO, ::MIME"text/plain", x::AdjointFactorization)
+    print(io, "adjoint of ")
     show(io, MIME"text/plain"(), parent(x))
 end
-function Base.show(io::IO, ::MIME"text/plain", x::Transpose{<:Any,<:Factorization})
-    print(io, "Transpose of ")
+function Base.show(io::IO, ::MIME"text/plain", x::TransposeFactorization)
+    print(io, "transpose of ")
     show(io, MIME"text/plain"(), parent(x))
 end
 
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns or rows
-function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
+function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal}
     require_one_based_indexing(B)
     c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
     x = ldiv!(F, c2r)
     return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(x, div(length(x), 2), 2))))), _ret_size(F, B))
 end
-function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where T<:BlasReal
+# don't do the reinterpretation for [Adjoint/Transpose]Factorization
+(\)(F::TransposeFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    conj!(adjoint(parent(F)) \ conj.(B))
+(\)(F::AdjointFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    @invoke \(F::typeof(F), B::VecOrMat)
+
+function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where {T<:BlasReal}
     require_one_based_indexing(B)
     x = rdiv!(copy(reinterpret(T, B)), F)
     return copy(reinterpret(Complex{T}, x))
 end
+# don't do the reinterpretation for [Adjoint/Transpose]Factorization
+(/)(B::VecOrMat{Complex{T}}, F::TransposeFactorization{T}) where {T<:BlasReal} =
+    conj!(adjoint(parent(F)) \ conj.(B))
+(/)(B::VecOrMat{Complex{T}}, F::AdjointFactorization{T}) where {T<:BlasReal} =
+    @invoke /(B::VecOrMat{Complex{T}}, F::Factorization{T})
 
-function \(F::Union{Factorization, Adjoint{<:Any,<:Factorization}}, B::AbstractVecOrMat)
+function (\)(F::Factorization, B::AbstractVecOrMat)
     require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
+    TFB = typeof(oneunit(eltype(F)) \ oneunit(eltype(B)))
     ldiv!(F, copy_similar(B, TFB))
 end
+(\)(F::TransposeFactorization, B::AbstractVecOrMat) = conj!(adjoint(F.parent) \ conj.(B))
 
-function /(B::AbstractMatrix, F::Union{Factorization, Adjoint{<:Any,<:Factorization}})
+function (/)(B::AbstractMatrix, F::Factorization)
     require_one_based_indexing(B)
     TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
     rdiv!(copy_similar(B, TFB), F)
 end
-/(adjB::AdjointAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjB.parent)
-/(B::TransposeAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjoint(B))
-
+(/)(A::AbstractMatrix, F::AdjointFactorization) = adjoint(adjoint(F) \ adjoint(A))
+(/)(A::AbstractMatrix, F::TransposeFactorization) = transpose(transpose(F) \ transpose(A))
 
 function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
     require_one_based_indexing(Y, B)
-    m, n = size(A, 1), size(A, 2)
+    m, n = size(A)
     if m > n
         Bc = copy(B)
         ldiv!(A, Bc)
@@ -128,7 +190,7 @@ function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
 end
 function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
     require_one_based_indexing(Y, B)
-    m, n = size(A, 1), size(A, 2)
+    m, n = size(A)
     if m > n
         Bc = copy(B)
         ldiv!(A, Bc)
@@ -138,14 +200,3 @@ function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
         return ldiv!(A, Y)
     end
 end
-
-# fallback methods for transposed solves
-\(F::Transpose{<:Any,<:Factorization{<:Real}}, B::AbstractVecOrMat) = adjoint(F.parent) \ B
-\(F::Transpose{<:Any,<:Factorization}, B::AbstractVecOrMat) = conj.(adjoint(F.parent) \ conj.(B))
-
-/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
-/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = transpose(transpose(F) \ transpose(B))
-/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
-/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization}) = transpose(transpose(F) \ transpose(B))
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index 4759f352035f6..c66f59838e8ba 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -805,7 +805,7 @@ opnorm(v::AdjointAbsVec, q::Real) = q == Inf ? norm(conj(v.parent), 1) : norm(co
 opnorm(v::AdjointAbsVec) = norm(conj(v.parent))
 opnorm(v::TransposeAbsVec) = norm(v.parent)
 
-norm(v::Union{TransposeAbsVec,AdjointAbsVec}, p::Real) = norm(v.parent, p)
+norm(v::AdjOrTrans, p::Real) = norm(v.parent, p)
 
 """
     dot(x, y)
@@ -947,13 +947,22 @@ dot(x::AbstractVector, transA::Transpose{<:Real}, y::AbstractVector) = adjoint(d
     rank(A::AbstractMatrix; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
     rank(A::AbstractMatrix, rtol::Real)
 
-Compute the rank of a matrix by counting how many singular
-values of `A` have magnitude greater than `max(atol, rtol*σ₁)` where `σ₁` is
-`A`'s largest singular value. `atol` and `rtol` are the absolute and relative
+Compute the numerical rank of a matrix by counting how many outputs of
+`svdvals(A)` are greater than `max(atol, rtol*σ₁)` where `σ₁` is `A`'s largest
+calculated singular value. `atol` and `rtol` are the absolute and relative
 tolerances, respectively. The default relative tolerance is `n*ϵ`, where `n`
 is the size of the smallest dimension of `A`, and `ϵ` is the [`eps`](@ref) of
 the element type of `A`.
 
+!!! note
+    Numerical rank can be a sensitive and imprecise characterization of
+    ill-conditioned matrices with singular values that are close to the threshold
+    tolerance `max(atol, rtol*σ₁)`. In such cases, slight perturbations to the
+    singular-value computation or to the matrix can change the result of `rank`
+    by pushing one or more singular values across the threshold. These variations
+    can even occur due to changes in floating-point errors between different Julia
+    versions, architectures, compilers, or operating systems.
+
 !!! compat "Julia 1.1"
     The `atol` and `rtol` keyword arguments requires at least Julia 1.1.
     In Julia 1.0 `rtol` is available as a positional argument, but this
@@ -981,7 +990,7 @@ function rank(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)
     isempty(A) && return 0 # 0-dimensional case
     s = svdvals(A)
     tol = max(atol, rtol*s[1])
-    count(x -> x > tol, s)
+    count(>(tol), s)
 end
 rank(x::Union{Number,AbstractVector}) = iszero(x) ? 0 : 1
 
@@ -1795,21 +1804,18 @@ function normalize!(a::AbstractArray, p::Real=2)
     __normalize!(a, nrm)
 end
 
-@inline function __normalize!(a::AbstractArray, nrm::Real)
+@inline function __normalize!(a::AbstractArray, nrm)
     # The largest positive floating point number whose inverse is less than infinity
     δ = inv(prevfloat(typemax(nrm)))
-
     if nrm ≥ δ # Safe to multiply with inverse
         invnrm = inv(nrm)
         rmul!(a, invnrm)
-
     else # scale elements to avoid overflow
         εδ = eps(one(nrm))/δ
         rmul!(a, εδ)
         rmul!(a, inv(nrm*εδ))
     end
-
-    a
+    return a
 end
 
 """
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index 17d630765e424..75b3e121f9086 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -129,67 +129,32 @@ for T = (:Number, :UniformScaling, :Diagonal)
 end
 
 function *(H::UpperHessenberg, U::UpperOrUnitUpperTriangular)
-    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    rmul!(HH, U)
+    HH = _mulmattri!(_initarray(*, eltype(H), eltype(U), H), H, U)
     UpperHessenberg(HH)
 end
 function *(U::UpperOrUnitUpperTriangular, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    lmul!(U, HH)
+    HH = _multrimat!(_initarray(*, eltype(U), eltype(H), H), U, H)
     UpperHessenberg(HH)
 end
 
 function /(H::UpperHessenberg, U::UpperTriangular)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    rdiv!(HH, U)
+    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
     UpperHessenberg(HH)
 end
 function /(H::UpperHessenberg, U::UnitUpperTriangular)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    rdiv!(HH, U)
+    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
     UpperHessenberg(HH)
 end
 
 function \(U::UpperTriangular, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
-    HH = copy_similar(H, T)
-    ldiv!(U, HH)
+    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
     UpperHessenberg(HH)
 end
 function \(U::UnitUpperTriangular, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
-    HH = copy_similar(H, T)
-    ldiv!(U, HH)
+    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
     UpperHessenberg(HH)
 end
 
-function *(H::UpperHessenberg, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(H), eltype(B))
-    A = mul!(similar(H, TS, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-function *(B::Bidiagonal, H::UpperHessenberg)
-    TS = promote_op(matprod, eltype(B), eltype(H))
-    A = mul!(similar(H, TS, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-function /(H::UpperHessenberg, B::Bidiagonal)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
-    A = _rdiv!(similar(H, T, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-function \(B::Bidiagonal, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
-    A = ldiv!(similar(H, T, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
 # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory
 # (in-place in x) by the RQ algorithm from:
 #
@@ -422,10 +387,12 @@ Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo;
 
 copy(F::Hessenberg{<:Any,<:UpperHessenberg}) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ)
 copy(F::Hessenberg{<:Any,<:SymTridiagonal}) = Hessenberg(copy(F.factors), copy(F.τ), copy(F.H), F.uplo; μ=F.μ)
-size(F::Hessenberg, d) = size(F.H, d)
+size(F::Hessenberg, d::Integer) = size(F.H, d)
 size(F::Hessenberg) = size(F.H)
 
-adjoint(F::Hessenberg) = Adjoint(F)
+transpose(F::Hessenberg{<:Real}) = F'
+transpose(::Hessenberg) =
+    throw(ArgumentError("transpose of Hessenberg decomposition is not supported, consider using adjoint"))
 
 # iteration for destructuring into components
 Base.iterate(S::Hessenberg) = (S.Q, Val(:H))
@@ -480,10 +447,7 @@ julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.]
 julia> F = hessenberg(A)
 Hessenberg{Float64, UpperHessenberg{Float64, Matrix{Float64}}, Matrix{Float64}, Vector{Float64}, Bool}
 Q factor:
-3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false}:
- 1.0   0.0        0.0
- 0.0  -0.707107  -0.707107
- 0.0  -0.707107   0.707107
+3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false}
 H factor:
 3×3 UpperHessenberg{Float64, Matrix{Float64}}:
   4.0      -11.3137       -1.41421
@@ -516,43 +480,14 @@ function show(io::IO, mime::MIME"text/plain", F::Hessenberg)
     show(io, mime, F.H)
 end
 
-"""
-    HessenbergQ <: AbstractQ
-
-Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns
-a `HessenbergQ` object, which is an implicit representation of the unitary
-matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`.
-This `F.Q` object can be efficiently multiplied by matrices or vectors,
-and can be converted to an ordinary matrix type with `Matrix(F.Q)`.
-"""
-struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T}
-    uplo::Char
-    factors::S
-    τ::W
-    function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym}
-        new(uplo, factors, τ)
-    end
-end
-HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ)
-HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ)
-
 function getproperty(F::Hessenberg, d::Symbol)
     d === :Q && return HessenbergQ(F)
     return getfield(F, d)
 end
 
-size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
-
 Base.propertynames(F::Hessenberg, private::Bool=false) =
     (:Q, :H, :μ, (private ? (:τ, :factors, :uplo) : ())...)
 
-# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra)
-const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym}
-
-## reconstruct the original matrix
-Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ))
-Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ))
 AbstractArray(F::Hessenberg) = AbstractMatrix(F)
 Matrix(F::Hessenberg) = Array(AbstractArray(F))
 Array(F::Hessenberg) = Matrix(F)
@@ -572,31 +507,6 @@ function AbstractMatrix(F::Hessenberg)
     end
 end
 
-# adjoint(Q::HessenbergQ{<:Real})
-
-lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-
-lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
-lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-
-lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
-lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
-
 # multiply x by the entries of M in the upper-k triangle, which contains
 # the entries of the upper-Hessenberg matrix H for k=-1
 function rmul_triu!(M::AbstractMatrix, x, k::Integer=0)
@@ -687,8 +597,8 @@ function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Any,<:A
     return B .= Complex.(Br,Bi)
 end
 
-ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
-rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')'
+ldiv!(F::AdjointFactorization{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
+rdiv!(B::AbstractMatrix, F::AdjointFactorization{<:Any,<:Hessenberg}) = ldiv!(F', B')'
 
 det(F::Hessenberg) = det(F.H; shift=F.μ)
 logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ)
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index 82ce01fd8428b..066a858cacb30 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -1010,6 +1010,9 @@ for (gels, gesv, getrs, getri, elty) in
             if n != size(B, 1)
                 throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
             end
+            if n != length(ipiv)
+                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+            end
             nrhs = size(B, 2)
             info = Ref{BlasInt}()
             ccall((@blasfunc($getrs), libblastrampoline), Cvoid,
@@ -2023,9 +2026,9 @@ the orthogonal/unitary matrix `Q` is computed. If `jobu`, `jobv`, or `jobq` is
 ggsvd3!
 
 ## Expert driver and generalized eigenvalue problem
-for (geevx, ggev, elty) in
-    ((:dgeevx_,:dggev_,:Float64),
-     (:sgeevx_,:sggev_,:Float32))
+for (geevx, ggev, ggev3, elty) in
+    ((:dgeevx_,:dggev_,:dggev3_,:Float64),
+     (:sgeevx_,:sggev_,:sggev3_,:Float32))
     @eval begin
         #     SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI,
         #                          VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM,
@@ -2093,7 +2096,7 @@ for (geevx, ggev, elty) in
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
+                       Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
                        Clong, Clong, Clong, Clong),
                        balanc, jobvl, jobvr, sense,
                        n, A, lda, wr,
@@ -2160,7 +2163,71 @@ for (geevx, ggev, elty) in
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}, Clong, Clong),
+                     Ref{BlasInt}, Clong, Clong),
+                    jobvl, jobvr, n, A,
+                    lda, B, ldb, alphar,
+                    alphai, beta, vl, ldvl,
+                    vr, ldvr, work, lwork,
+                    info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(work[1])
+                    resize!(work, lwork)
+                end
+            end
+            alphar, alphai, beta, vl, vr
+        end
+
+        #       SUBROUTINE DGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
+        #      $                   BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVL, JOBVR
+        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+        #      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
+        #      $                   VR( LDVR, * ), WORK( * )
+        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            chkstride1(A,B)
+            n, m = checksquare(A,B)
+            if n != m
+                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+            end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alphar = similar(A, $elty, n)
+            alphai = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvl = 0
+            if jobvl == 'V'
+                ldvl = n
+            elseif jobvl == 'N'
+                ldvl = 1
+            else
+                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+            end
+            vl = similar(A, $elty, ldvl, n)
+            ldvr = 0
+            if jobvr == 'V'
+                ldvr = n
+            elseif jobvr == 'N'
+                ldvr = 1
+            else
+                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+            end
+            vr = similar(A, $elty, ldvr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
+                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
+                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                     Ref{BlasInt}, Clong, Clong),
                     jobvl, jobvr, n, A,
                     lda, B, ldb, alphar,
                     alphai, beta, vl, ldvl,
@@ -2177,9 +2244,9 @@ for (geevx, ggev, elty) in
     end
 end
 
-for (geevx, ggev, elty, relty) in
-    ((:zgeevx_,:zggev_,:ComplexF64,:Float64),
-     (:cgeevx_,:cggev_,:ComplexF32,:Float32))
+for (geevx, ggev, ggev3, elty, relty) in
+    ((:zgeevx_,:zggev_,:zggev3_,:ComplexF64,:Float64),
+     (:cgeevx_,:cggev_,:cggev3_,:ComplexF32,:Float32))
     @eval begin
         #     SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL,
         #                          LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE,
@@ -2241,7 +2308,7 @@ for (geevx, ggev, elty, relty) in
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{$relty},
                        Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong, Clong),
+                       Ptr{$relty}, Ref{BlasInt}, Clong, Clong, Clong, Clong),
                        balanc, jobvl, jobvr, sense,
                        n, A, lda, w,
                        VL, max(1,ldvl), VR, max(1,ldvr),
@@ -2307,7 +2374,72 @@ for (geevx, ggev, elty, relty) in
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ptr{BlasInt}, Clong, Clong),
+                     Ref{BlasInt}, Clong, Clong),
+                    jobvl, jobvr, n, A,
+                    lda, B, ldb, alpha,
+                    beta, vl, ldvl, vr,
+                    ldvr, work, lwork, rwork,
+                    info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(work[1])
+                    resize!(work, lwork)
+                end
+            end
+            alpha, beta, vl, vr
+        end
+
+        # SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
+        #      $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVL, JOBVR
+        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       DOUBLE PRECISION   RWORK( * )
+        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+        #      $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+        #      $                   WORK( * )
+        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            chkstride1(A, B)
+            n, m = checksquare(A, B)
+            if n != m
+                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+            end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alpha = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvl = 0
+            if jobvl == 'V'
+                ldvl = n
+            elseif jobvl == 'N'
+                ldvl = 1
+            else
+                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+            end
+            vl = similar(A, $elty, ldvl, n)
+            ldvr = 0
+            if jobvr == 'V'
+                ldvr = n
+            elseif jobvr == 'N'
+                ldvr = 1
+            else
+                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+            end
+            vr = similar(A, $elty, ldvr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            rwork = Vector{$relty}(undef, 8n)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
+                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
+                     Ref{BlasInt}, Clong, Clong),
                     jobvl, jobvr, n, A,
                     lda, B, ldb, alpha,
                     beta, vl, ldvl, vr,
@@ -2353,6 +2485,17 @@ corresponding eigenvectors are computed.
 """
 ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
 
+"""
+    ggev3!(jobvl, jobvr, A, B) -> (alpha, beta, vl, vr)
+
+Finds the generalized eigendecomposition of `A` and `B` using a blocked
+algorithm. If `jobvl = N`, the left eigenvectors aren't computed. If
+`jobvr = N`, the right eigenvectors aren't computed. If `jobvl = V` or
+`jobvr = V`, the corresponding eigenvectors are computed.  This function
+requires LAPACK 3.6.0.
+"""
+ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
+
 # One step incremental condition estimation of max/min singular values
 for (laic1, elty) in
     ((:dlaic1_,:Float64),
@@ -5030,9 +5173,9 @@ solution `X`.
 hetrs!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
 
 # Symmetric (real) eigensolvers
-for (syev, syevr, sygvd, elty) in
-    ((:dsyev_,:dsyevr_,:dsygvd_,:Float64),
-     (:ssyev_,:ssyevr_,:ssygvd_,:Float32))
+for (syev, syevr, syevd, sygvd, elty) in
+    ((:dsyev_,:dsyevr_,:dsyevd_,:dsygvd_,:Float64),
+     (:ssyev_,:ssyevr_,:ssyevd_,:ssygvd_,:Float32))
     @eval begin
         #       SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
         # *     .. Scalar Arguments ..
@@ -5085,7 +5228,7 @@ for (syev, syevr, sygvd, elty) in
             end
             lda = stride(A,2)
             m = Ref{BlasInt}()
-            w = similar(A, $elty, n)
+            W = similar(A, $elty, n)
             ldz = n
             if jobz == 'N'
                 Z = similar(A, $elty, ldz, 0)
@@ -5109,7 +5252,7 @@ for (syev, syevr, sygvd, elty) in
                     jobz, range, uplo, n,
                     A, max(1,lda), vl, vu,
                     il, iu, abstol, m,
-                    w, Z, max(1,ldz), isuppz,
+                    W, Z, max(1,ldz), isuppz,
                     work, lwork, iwork, liwork,
                     info, 1, 1, 1)
                 chklapackerror(info[])
@@ -5120,11 +5263,51 @@ for (syev, syevr, sygvd, elty) in
                     resize!(iwork, liwork)
                 end
             end
-            w[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
+            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
         end
         syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
             syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
 
+        #       SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
+        #      $                   IWORK, LIWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBZ, UPLO
+        #       INTEGER            INFO, LDA, LIWORK, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       INTEGER            IWORK( * )
+        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
+        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            chkstride1(A)
+            n = checksquare(A)
+            chkuplofinite(A, uplo)
+            lda = stride(A,2)
+            m = Ref{BlasInt}()
+            W = similar(A, $elty, n)
+            work   = Vector{$elty}(undef, 1)
+            lwork  = BlasInt(-1)
+            iwork  = Vector{BlasInt}(undef, 1)
+            liwork = BlasInt(-1)
+            info   = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
+                ccall((@blasfunc($syevd), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
+                        Ptr{BlasInt}, Clong, Clong),
+                    jobz, uplo, n, A, max(1,lda),
+                    W, work, lwork, iwork, liwork,
+                    info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                    liwork = iwork[1]
+                    resize!(iwork, liwork)
+                end
+            end
+            jobz == 'V' ? (W, A) : W
+        end
+
         # Generalized eigenproblem
         #           SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
         #      $                   LWORK, IWORK, LIWORK, INFO )
@@ -5173,9 +5356,9 @@ for (syev, syevr, sygvd, elty) in
     end
 end
 # Hermitian eigensolvers
-for (syev, syevr, sygvd, elty, relty) in
-    ((:zheev_,:zheevr_,:zhegvd_,:ComplexF64,:Float64),
-     (:cheev_,:cheevr_,:chegvd_,:ComplexF32,:Float32))
+for (syev, syevr, syevd, sygvd, elty, relty) in
+    ((:zheev_,:zheevr_,:zheevd_,:zhegvd_,:ComplexF64,:Float64),
+     (:cheev_,:cheevr_,:cheevd_,:chegvd_,:ComplexF32,:Float32))
     @eval begin
         # SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO )
         # *     .. Scalar Arguments ..
@@ -5236,7 +5419,7 @@ for (syev, syevr, sygvd, elty, relty) in
             end
             lda = max(1,stride(A,2))
             m = Ref{BlasInt}()
-            w = similar(A, $relty, n)
+            W = similar(A, $relty, n)
             if jobz == 'N'
                 ldz = 1
                 Z = similar(A, $elty, ldz, 0)
@@ -5264,7 +5447,7 @@ for (syev, syevr, sygvd, elty, relty) in
                       jobz, range, uplo, n,
                       A, lda, vl, vu,
                       il, iu, abstol, m,
-                      w, Z, ldz, isuppz,
+                      W, Z, ldz, isuppz,
                       work, lwork, rwork, lrwork,
                       iwork, liwork, info,
                       1, 1, 1)
@@ -5278,11 +5461,56 @@ for (syev, syevr, sygvd, elty, relty) in
                     resize!(iwork, liwork)
                 end
             end
-            w[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
+            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
         end
         syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
             syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
 
+        #       SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
+        #      $                   LRWORK, IWORK, LIWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBZ, UPLO
+        #       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       INTEGER            IWORK( * )
+        #       DOUBLE PRECISION   RWORK( * )
+        #       COMPLEX*16         A( LDA, * ), WORK( * )
+        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            chkstride1(A)
+            chkuplofinite(A, uplo)
+            n = checksquare(A)
+            lda = max(1, stride(A,2))
+            m = Ref{BlasInt}()
+            W = similar(A, $relty, n)
+            work   = Vector{$elty}(undef, 1)
+            lwork  = BlasInt(-1)
+            rwork  = Vector{$relty}(undef, 1)
+            lrwork = BlasInt(-1)
+            iwork  = Vector{BlasInt}(undef, 1)
+            liwork = BlasInt(-1)
+            info   = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
+                ccall((@blasfunc($syevd), liblapack), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                    Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
+                    Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
+                    jobz, uplo, n, A, stride(A,2),
+                    W, work, lwork, rwork, lrwork,
+                    iwork, liwork, info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                    lrwork = BlasInt(rwork[1])
+                    resize!(rwork, lrwork)
+                    liwork = iwork[1]
+                    resize!(iwork, liwork)
+                end
+            end
+            jobz == 'V' ? (W, A) : W
+        end
+
         #       SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
         #      $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
         # *     .. Scalar Arguments ..
@@ -5364,6 +5592,20 @@ The eigenvalues are returned in `W` and the eigenvectors in `Z`.
 syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix,
        vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
 
+"""
+    syevd!(jobz, uplo, A)
+
+Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
+(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
+of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
+
+Use the divide-and-conquer method, instead of the QR iteration used by
+`syev!` or multiple relatively robust representations used by `syevr!`.
+See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
+a comparison of the accuracy and performatce of different methods.
+"""
+syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
+
 """
     sygvd!(itype, jobz, uplo, A, B) -> (w, A, B)
 
@@ -5993,9 +6235,9 @@ for (ormtr, elty) in
     end
 end
 
-for (gees, gges, elty) in
-    ((:dgees_,:dgges_,:Float64),
-     (:sgees_,:sgges_,:Float32))
+for (gees, gges, gges3, elty) in
+    ((:dgees_,:dgges_,:dgges3_,:Float64),
+     (:sgees_,:sgges_,:sgges3_,:Float32))
     @eval begin
         #     .. Scalar Arguments ..
         #     CHARACTER          JOBVS, SORT
@@ -6022,7 +6264,7 @@ for (gees, gges, elty) in
                     (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{BlasInt}, Clong, Clong),
+                        Ref{BlasInt}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
                     jobvs, 'N', C_NULL, n,
                         A, max(1, stride(A, 2)), sdim, wr,
                         wi, vs, ldvs, work,
@@ -6069,7 +6311,56 @@ for (gees, gges, elty) in
                         Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ptr{BlasInt}, Clong, Clong, Clong),
+                        Ref{BlasInt}, Clong, Clong, Clong),
+                    jobvsl, jobvsr, 'N', C_NULL,
+                    n, A, max(1,stride(A, 2)), B,
+                    max(1,stride(B, 2)), sdim, alphar, alphai,
+                    beta, vsl, ldvsl, vsr,
+                    ldvsr, work, lwork, C_NULL,
+                    info, 1, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                end
+            end
+            A, B, complex.(alphar, alphai), beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
+        end
+
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVSL, JOBVSR, SORT
+        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+        # *     ..
+        # *     .. Array Arguments ..
+        #       LOGICAL            BWORK( * )
+        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+        #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
+        #      $                   VSR( LDVSR, * ), WORK( * )
+        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkstride1(A, B)
+            n, m = checksquare(A, B)
+            if n != m
+                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+            end
+            sdim = BlasInt(0)
+            alphar = similar(A, $elty, n)
+            alphai = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
+            vsl = similar(A, $elty, ldvsl, n)
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
+            vsr = similar(A, $elty, ldvsr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
+                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
+                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
+                        Ref{BlasInt}, Clong, Clong, Clong),
                     jobvsl, jobvsr, 'N', C_NULL,
                     n, A, max(1,stride(A, 2)), B,
                     max(1,stride(B, 2)), sdim, alphar, alphai,
@@ -6087,9 +6378,9 @@ for (gees, gges, elty) in
     end
 end
 
-for (gees, gges, elty, relty) in
-    ((:zgees_,:zgges_,:ComplexF64,:Float64),
-     (:cgees_,:cgges_,:ComplexF32,:Float32))
+for (gees, gges, gges3, elty, relty) in
+    ((:zgees_,:zgges_,:zgges3_,:ComplexF64,:Float64),
+     (:cgees_,:cgges_,:cgges3_,:ComplexF32,:Float32))
     @eval begin
         # *     .. Scalar Arguments ..
         #       CHARACTER          JOBVS, SORT
@@ -6117,7 +6408,7 @@ for (gees, gges, elty, relty) in
                     (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$relty}, Ptr{Cvoid}, Ptr{BlasInt}, Clong, Clong),
+                        Ptr{$relty}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
                     jobvs, sort, C_NULL, n,
                         A, max(1, stride(A, 2)), sdim, w,
                         vs, ldvs, work, lwork,
@@ -6165,7 +6456,57 @@ for (gees, gges, elty, relty) in
                         Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ptr{BlasInt}, Clong, Clong, Clong),
+                        Ref{BlasInt}, Clong, Clong, Clong),
+                    jobvsl, jobvsr, 'N', C_NULL,
+                    n, A, max(1, stride(A, 2)), B,
+                    max(1, stride(B, 2)), sdim, alpha, beta,
+                    vsl, ldvsl, vsr, ldvsr,
+                    work, lwork, rwork, C_NULL,
+                    info, 1, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                end
+            end
+            A, B, alpha, beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
+        end
+
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVSL, JOBVSR, SORT
+        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+        # *     ..
+        # *     .. Array Arguments ..
+        #       LOGICAL            BWORK( * )
+        #       DOUBLE PRECISION   RWORK( * )
+        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+        #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+        #      $                   WORK( * )
+        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkstride1(A, B)
+            n, m = checksquare(A, B)
+            if n != m
+                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+            end
+            sdim = BlasInt(0)
+            alpha = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
+            vsl = similar(A, $elty, ldvsl, n)
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
+            vsr = similar(A, $elty, ldvsr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            rwork = Vector{$relty}(undef, 8n)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
+                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
+                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
+                        Ref{BlasInt}, Clong, Clong, Clong),
                     jobvsl, jobvsr, 'N', C_NULL,
                     n, A, max(1, stride(A, 2)), B,
                     max(1, stride(B, 2)), sdim, alpha, beta,
@@ -6207,6 +6548,18 @@ vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
 """
 gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
 
+"""
+    gges3!(jobvsl, jobvsr, A, B) -> (A, B, alpha, beta, vsl, vsr)
+
+Computes the generalized eigenvalues, generalized Schur form, left Schur
+vectors (`jobsvl = V`), or right Schur vectors (`jobvsr = V`) of `A` and
+`B` using a blocked algorithm. This function requires LAPACK 3.6.0.
+
+The generalized eigenvalues are returned in `alpha` and `beta`. The left Schur
+vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
+"""
+gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
+
 for (trexc, trsen, tgsen, elty) in
     ((:dtrexc_, :dtrsen_, :dtgsen_, :Float64),
      (:strexc_, :strsen_, :stgsen_, :Float32))
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 3099eb3b765ce..b133741611adc 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -83,11 +83,17 @@ struct lbt_config_t
     exported_symbols::Ptr{Cstring}
     num_exported_symbols::UInt32
 end
-const LBT_BUILDFLAGS_DEEPBINDLESS = 0x01
-const LBT_BUILDFLAGS_F2C_CAPABLE  = 0x02
+const LBT_BUILDFLAGS_DEEPBINDLESS     = 0x01
+const LBT_BUILDFLAGS_F2C_CAPABLE      = 0x02
+const LBT_BUILDFLAGS_CBLAS_DIVERGENCE = 0x04
+const LBT_BUILDFLAGS_COMPLEX_RETSTYLE = 0x08
+const LBT_BUILDFLAGS_SYMBOL_TRIMMING  = 0x10
 const LBT_BUILDFLAGS_MAP = Dict(
     LBT_BUILDFLAGS_DEEPBINDLESS => :deepbindless,
     LBT_BUILDFLAGS_F2C_CAPABLE => :f2c_capable,
+    LBT_BUILDFLAGS_CBLAS_DIVERGENCE => :cblas_divergence,
+    LBT_BUILDFLAGS_COMPLEX_RETSTYLE => :complex_retstyle,
+    LBT_BUILDFLAGS_SYMBOL_TRIMMING  => :symbol_trimming,
 )
 
 struct LBTConfig
@@ -207,9 +213,10 @@ function lbt_set_num_threads(nthreads)
     return ccall((:lbt_set_num_threads, libblastrampoline), Cvoid, (Int32,), nthreads)
 end
 
-function lbt_forward(path; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
+function lbt_forward(path::AbstractString; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
     _clear_config_with() do
-        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring), path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
+        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring),
+                     path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
     end
 end
 
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
index 8c6bfee435186..d3d6234961c44 100644
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ b/stdlib/LinearAlgebra/src/ldlt.jl
@@ -62,7 +62,7 @@ LDLt{T}(F::LDLt) where {T} = LDLt(convert(AbstractMatrix{T}, F.data)::AbstractMa
 Factorization{T}(F::LDLt{T}) where {T} = F
 Factorization{T}(F::LDLt) where {T} = LDLt{T}(F)
 
-function getproperty(F::LDLt, d::Symbol)
+function getproperty(F::LDLt{<:Any, <:SymTridiagonal}, d::Symbol)
     Fdata = getfield(F, :data)
     if d === :d
         return Fdata.dv
@@ -211,7 +211,7 @@ function logabsdet(F::LDLt{<:Any,<:SymTridiagonal})
 end
 
 # Conversion methods
-function SymTridiagonal(F::LDLt)
+function SymTridiagonal(F::LDLt{<:Any, <:SymTridiagonal})
     e = copy(F.data.ev)
     d = copy(F.data.dv)
     e .*= d[1:end-1]
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
index acc68192ed715..33d794906c7e6 100644
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ b/stdlib/LinearAlgebra/src/lq.jl
@@ -28,9 +28,7 @@ L factor:
  -8.60233   0.0
   4.41741  -0.697486
 Q factor:
-2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
- -0.581238  -0.813733
- -0.813733   0.581238
+2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -65,12 +63,6 @@ Base.iterate(S::LQ) = (S.L, Val(:Q))
 Base.iterate(S::LQ, ::Val{:Q}) = (S.Q, Val(:done))
 Base.iterate(S::LQ, ::Val{:done}) = nothing
 
-struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractMatrix{T}
-    factors::S
-    τ::C
-end
-
-
 """
     lq!(A) -> LQ
 
@@ -78,6 +70,7 @@ Compute the [`LQ`](@ref) factorization of `A`, using the input
 matrix as a workspace. See also [`lq`](@ref).
 """
 lq!(A::StridedMatrix{<:BlasFloat}) = LQ(LAPACK.gelqf!(A)...)
+
 """
     lq(A) -> S::LQ
 
@@ -105,9 +98,7 @@ L factor:
  -8.60233   0.0
   4.41741  -0.697486
 Q factor:
-2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
- -0.581238  -0.813733
- -0.813733   0.581238
+2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -135,8 +126,11 @@ AbstractArray(A::LQ) = AbstractMatrix(A)
 Matrix(A::LQ) = Array(AbstractArray(A))
 Array(A::LQ) = Matrix(A)
 
-adjoint(A::LQ) = Adjoint(A)
-Base.copy(F::Adjoint{T,<:LQ{T}}) where {T} =
+transpose(F::LQ{<:Real}) = F'
+transpose(::LQ) =
+    throw(ArgumentError("transpose of LQ decomposition is not supported, consider using adjoint"))
+
+Base.copy(F::AdjointFactorization{T,<:LQ{T}}) where {T} =
     QR{T,typeof(F.parent.factors),typeof(F.parent.τ)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
 
 function getproperty(F::LQ, d::Symbol)
@@ -153,8 +147,8 @@ end
 Base.propertynames(F::LQ, private::Bool=false) =
     (:L, :Q, (private ? fieldnames(typeof(F)) : ())...)
 
-getindex(A::LQPackedQ, i::Integer, j::Integer) =
-    lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
+# getindex(A::LQPackedQ, i::Integer, j::Integer) =
+#     lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
 
 function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
     summary(io, F); println(io)
@@ -164,32 +158,9 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
     show(io, mime, F.Q)
 end
 
-LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
-AbstractMatrix{T}(Q::LQPackedQ) where {T} = LQPackedQ{T}(Q)
-Matrix{T}(A::LQPackedQ) where {T} = convert(Matrix{T}, LAPACK.orglq!(copy(A.factors),A.τ))
-Matrix(A::LQPackedQ{T}) where {T} = Matrix{T}(A)
-Array{T}(A::LQPackedQ{T}) where {T} = Matrix{T}(A)
-Array(A::LQPackedQ) = Matrix(A)
-
 size(F::LQ, dim::Integer) = size(getfield(F, :factors), dim)
 size(F::LQ)               = size(getfield(F, :factors))
 
-# size(Q::LQPackedQ) yields the shape of Q's square form
-function size(Q::LQPackedQ)
-    n = size(Q.factors, 2)
-    return n, n
-end
-function size(Q::LQPackedQ, dim::Integer)
-    if dim < 1
-        throw(BoundsError())
-    elseif dim <= 2 # && 1 <= dim
-        return size(Q.factors, 2)
-    else # 2 < dim
-        return 1
-    end
-end
-
-
 ## Multiplication by LQ
 function lmul!(A::LQ, B::AbstractVecOrMat)
     lmul!(LowerTriangular(A.L), view(lmul!(A.Q, B), 1:size(A,1), axes(B,2)))
@@ -200,127 +171,6 @@ function *(A::LQ{TA}, B::AbstractVecOrMat{TB}) where {TA,TB}
     _cut_B(lmul!(convert(Factorization{TAB}, A), copy_similar(B, TAB)), 1:size(A,1))
 end
 
-## Multiplication by Q
-### QB
-lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
-function (*)(A::LQPackedQ, B::StridedVecOrMat)
-    TAB = promote_type(eltype(A), eltype(B))
-    lmul!(AbstractMatrix{TAB}(A), copymutable_oftype(B, TAB))
-end
-
-### QcB
-lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (A = adjA.parent; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
-lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.parent; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
-
-function *(adjA::Adjoint{<:Any,<:LQPackedQ}, B::StridedVecOrMat)
-    A = adjA.parent
-    TAB = promote_type(eltype(A), eltype(B))
-    if size(B,1) == size(A.factors,2)
-        lmul!(adjoint(AbstractMatrix{TAB}(A)), copymutable_oftype(B, TAB))
-    elseif size(B,1) == size(A.factors,1)
-        lmul!(adjoint(AbstractMatrix{TAB}(A)), [B; zeros(TAB, size(A.factors, 2) - size(A.factors, 1), size(B, 2))])
-    else
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of A, $(size(A))"))
-    end
-end
-
-### QBc/QcBc
-function *(A::LQPackedQ, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    B = adjB.parent
-    TAB = promote_type(eltype(A), eltype(B))
-    BB = similar(B, TAB, (size(B, 2), size(B, 1)))
-    adjoint!(BB, B)
-    return lmul!(A, BB)
-end
-function *(adjA::Adjoint{<:Any,<:LQPackedQ}, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    B = adjB.parent
-    TAB = promote_type(eltype(adjA.parent), eltype(B))
-    BB = similar(B, TAB, (size(B, 2), size(B, 1)))
-    adjoint!(BB, B)
-    return lmul!(adjA, BB)
-end
-
-# in-place right-application of LQPackedQs
-# these methods require that the applied-to matrix's (A's) number of columns
-# match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place
-# operation, and the underlying LAPACK routine (ormlq) treats the implicit Q
-# as its (nQ-by-nQ) square form)
-rmul!(A::StridedMatrix{T}, B::LQPackedQ{T}) where {T<:BlasFloat} =
-    LAPACK.ormlq!('R', 'N', B.factors, B.τ, A)
-rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A))
-rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A))
-
-# out-of-place right application of LQPackedQs
-#
-# LQPackedQ's out-of-place multiplication behavior is context dependent. specifically,
-# if the inner dimension in the multiplication is the LQPackedQ's second dimension,
-# the LQPackedQ behaves like its square form. if the inner dimension in the
-# multiplication is the LQPackedQ's first dimension, the LQPackedQ behaves like either
-# its square form or its truncated form depending on the shape of the other object
-# involved in the multiplication. we treat these cases separately.
-#
-# (1) the inner dimension in the multiplication is the LQPackedQ's second dimension.
-# in this case, the LQPackedQ behaves like its square form.
-#
-function *(A::StridedVecOrMat, adjQ::Adjoint{<:Any,<:LQPackedQ})
-    Q = adjQ.parent
-    TR = promote_type(eltype(A), eltype(Q))
-    return rmul!(copymutable_oftype(A, TR), adjoint(AbstractMatrix{TR}(Q)))
-end
-function *(adjA::Adjoint{<:Any,<:StridedMatrix}, adjQ::Adjoint{<:Any,<:LQPackedQ})
-    A, Q = adjA.parent, adjQ.parent
-    TR = promote_type(eltype(A), eltype(Q))
-    C = adjoint!(similar(A, TR, reverse(size(A))), A)
-    return rmul!(C, adjoint(AbstractMatrix{TR}(Q)))
-end
-#
-# (2) the inner dimension in the multiplication is the LQPackedQ's first dimension.
-# in this case, the LQPackedQ behaves like either its square form or its
-# truncated form depending on the shape of the other object in the multiplication.
-#
-# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension
-# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q),
-# and if so effectively apply Q's square form to A without additional shenanigans; and
-# (2) if the preceding dimensions do not match, check whether the appropriate dimension of
-# A instead matches the number of rows of the matrix of which Q is a factor (i.e.
-# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending
-# A as necessary for check (1) to pass (if possible) and then applying Q's square form
-#
-function *(A::StridedVecOrMat, Q::LQPackedQ)
-    TR = promote_type(eltype(A), eltype(Q))
-    if size(A, 2) == size(Q.factors, 2)
-        C = copymutable_oftype(A, TR)
-    elseif size(A, 2) == size(Q.factors, 1)
-        C = zeros(TR, size(A, 1), size(Q.factors, 2))
-        copyto!(C, 1, A, 1, length(A))
-    else
-        _rightappdimmismatch("columns")
-    end
-    return rmul!(C, AbstractMatrix{TR}(Q))
-end
-function *(adjA::Adjoint{<:Any,<:StridedMatrix}, Q::LQPackedQ)
-    A = adjA.parent
-    TR = promote_type(eltype(A), eltype(Q))
-    if size(A, 1) == size(Q.factors, 2)
-        C = adjoint!(similar(A, TR, reverse(size(A))), A)
-    elseif size(A, 1) == size(Q.factors, 1)
-        C = zeros(TR, size(A, 2), size(Q.factors, 2))
-        adjoint!(view(C, :, 1:size(A, 1)), A)
-    else
-        _rightappdimmismatch("rows")
-    end
-    return rmul!(C, AbstractMatrix{TR}(Q))
-end
-_rightappdimmismatch(rowsorcols) =
-    throw(DimensionMismatch(string("the number of $(rowsorcols) of the matrix on the left ",
-        "must match either (1) the number of columns of the (LQPackedQ) matrix on the right ",
-        "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ",
-        "(the factorization's originating matrix's number of rows)")))
-
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
 function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
@@ -343,7 +193,7 @@ function ldiv!(A::LQ, B::AbstractVecOrMat)
     return lmul!(adjoint(A.Q), B)
 end
 
-function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::AbstractVecOrMat)
+function ldiv!(Fadj::AdjointFactorization{<:Any,<:LQ}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(Fadj)
     m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))
@@ -353,7 +203,3 @@ function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::AbstractVecOrMat)
     ldiv!(UpperTriangular(adjoint(F.L)), view(B, 1:size(F,1), axes(B,2)))
     return B
 end
-
-# In LQ factorization, `Q` is expressed as the product of the adjoint of the
-# reflectors.  Thus, `det` has to be conjugated.
-det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
index df4154b00e9ac..a93803ca2ea45 100644
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ b/stdlib/LinearAlgebra/src/lu.jl
@@ -72,8 +72,9 @@ Base.iterate(S::LU, ::Val{:U}) = (S.U, Val(:p))
 Base.iterate(S::LU, ::Val{:p}) = (S.p, Val(:done))
 Base.iterate(S::LU, ::Val{:done}) = nothing
 
-adjoint(F::LU) = Adjoint(F)
-transpose(F::LU) = Transpose(F)
+# LU prefers transpose over adjoint in the real case, override the generic fallback
+adjoint(F::LU{<:Real}) = TransposeFactorization(F)
+transpose(F::LU{<:Real}) = TransposeFactorization(F)
 
 # the following method is meant to catch calls to lu!(A::LAPACKArray) without a pivoting stategy
 lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check)
@@ -324,7 +325,7 @@ Factorization{T}(F::LU) where {T} = LU{T}(F)
 copy(A::LU{T,S,P}) where {T,S,P} = LU{T,S,P}(copy(A.factors), copy(A.ipiv), A.info)
 
 size(A::LU)    = size(getfield(A, :factors))
-size(A::LU, i) = size(getfield(A, :factors), i)
+size(A::LU, i::Integer) = size(getfield(A, :factors), i)
 
 function ipiv2perm(v::AbstractVector{T}, maxi::Integer) where T
     require_one_based_indexing(v)
@@ -429,49 +430,29 @@ function ldiv!(A::LU, B::AbstractVecOrMat)
     ldiv!(UpperTriangular(A.factors), ldiv!(UnitLowerTriangular(A.factors), B))
 end
 
-ldiv!(transA::Transpose{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+ldiv!(transA::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
     (A = transA.parent; LAPACK.getrs!('T', A.factors, A.ipiv, B))
 
-function ldiv!(transA::Transpose{<:Any,<:LU}, B::AbstractVecOrMat)
+function ldiv!(transA::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
     A = transA.parent
     ldiv!(transpose(UnitLowerTriangular(A.factors)), ldiv!(transpose(UpperTriangular(A.factors)), B))
     _apply_inverse_ipiv_rows!(A, B)
 end
 
-ldiv!(adjF::Adjoint{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:Real} =
-    (F = adjF.parent; ldiv!(transpose(F), B))
-ldiv!(adjA::Adjoint{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+ldiv!(adjA::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
     (A = adjA.parent; LAPACK.getrs!('C', A.factors, A.ipiv, B))
 
-function ldiv!(adjA::Adjoint{<:Any,<:LU}, B::AbstractVecOrMat)
+function ldiv!(adjA::AdjointFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
     A = adjA.parent
     ldiv!(adjoint(UnitLowerTriangular(A.factors)), ldiv!(adjoint(UpperTriangular(A.factors)), B))
     _apply_inverse_ipiv_rows!(A, B)
 end
 
-(\)(A::Adjoint{<:Any,<:LU}, B::Adjoint{<:Any,<:AbstractVecOrMat}) = A \ copy(B)
-(\)(A::Transpose{<:Any,<:LU}, B::Transpose{<:Any,<:AbstractVecOrMat}) = A \ copy(B)
-(\)(A::Adjoint{T,<:LU{T,<:StridedMatrix}}, B::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasComplex} =
+(\)(A::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasComplex} =
     LAPACK.getrs!('C', A.parent.factors, A.parent.ipiv, copy(B))
-(\)(A::Transpose{T,<:LU{T,<:StridedMatrix}}, B::Transpose{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} =
+(\)(A::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::Transpose{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} =
     LAPACK.getrs!('T', A.parent.factors, A.parent.ipiv, copy(B))
 
-function (/)(A::AbstractMatrix, F::Adjoint{<:Any,<:LU})
-    T = promote_type(eltype(A), eltype(F))
-    return adjoint(ldiv!(F.parent, copy_similar(adjoint(A), T)))
-end
-# To avoid ambiguities with definitions in adjtrans.jl and factorizations.jl
-(/)(adjA::AdjointAbsVec, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
-(/)(adjA::AdjointAbsMat, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
-function (/)(trA::TransposeAbsVec, F::Adjoint{<:Any,<:LU})
-    T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copy_similar(trA.parent, T))))
-end
-function (/)(trA::TransposeAbsMat, F::Adjoint{<:Any,<:LU})
-    T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copy_similar(trA.parent, T))))
-end
-
 function det(F::LU{T}) where T
     n = checksquare(F)
     issuccess(F) || return zero(T)
@@ -654,7 +635,7 @@ function ldiv!(A::LU{T,Tridiagonal{T,V}}, B::AbstractVecOrMat) where {T,V}
     return B
 end
 
-function ldiv!(transA::Transpose{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
+function ldiv!(transA::TransposeFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
     require_one_based_indexing(B)
     A = transA.parent
     n = size(A,1)
@@ -691,7 +672,7 @@ function ldiv!(transA::Transpose{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVec
 end
 
 # Ac_ldiv_B!(A::LU{T,Tridiagonal{T}}, B::AbstractVecOrMat) where {T<:Real} = At_ldiv_B!(A,B)
-function ldiv!(adjA::Adjoint{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
+function ldiv!(adjA::AdjointFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
     require_one_based_indexing(B)
     A = adjA.parent
     n = size(A,1)
@@ -728,8 +709,8 @@ function ldiv!(adjA::Adjoint{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMa
 end
 
 rdiv!(B::AbstractMatrix, A::LU) = transpose(ldiv!(transpose(A), transpose(B)))
-rdiv!(B::AbstractMatrix, A::Transpose{<:Any,<:LU}) = transpose(ldiv!(A.parent, transpose(B)))
-rdiv!(B::AbstractMatrix, A::Adjoint{<:Any,<:LU}) = adjoint(ldiv!(A.parent, adjoint(B)))
+rdiv!(B::AbstractMatrix, A::TransposeFactorization{<:Any,<:LU}) = transpose(ldiv!(A.parent, transpose(B)))
+rdiv!(B::AbstractMatrix, A::AdjointFactorization{<:Any,<:LU}) = adjoint(ldiv!(A.parent, adjoint(B)))
 
 # Conversions
 AbstractMatrix(F::LU) = (F.L * F.U)[invperm(F.p),:]
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index 6d00b950525e6..170aacee6682f 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -1,11 +1,16 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# matmul.jl: Everything to do with dense matrix multiplication
+
 # Matrix-matrix multiplication
 
-AdjOrTransStridedMat{T} = Union{Adjoint{T, <:StridedMatrix}, Transpose{T, <:StridedMatrix}}
-StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{T, <:StridedMatrix}, Transpose{T, <:StridedMatrix}}
+AdjOrTransStridedMat{T} = Union{Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
+StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
+StridedMaybeAdjOrTransVecOrMat{T} = Union{StridedVecOrMat{T}, AdjOrTrans{<:Any, <:StridedVecOrMat{T}}}
 
-# matmul.jl: Everything to do with dense matrix multiplication
+_parent(A) = A
+_parent(A::Adjoint) = parent(A)
+_parent(A::Transpose) = parent(A)
 
 matprod(x, y) = x*y + x*y
 
@@ -46,14 +51,14 @@ function *(transx::Transpose{<:Any,<:StridedVector{T}}, y::StridedVector{T}) whe
 end
 
 # Matrix-vector multiplication
-function (*)(A::StridedMatrix{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real}
+function (*)(A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real}
     TS = promote_op(matprod, T, S)
     y = isconcretetype(TS) ? convert(AbstractVector{TS}, x) : x
     mul!(similar(x, TS, size(A,1)), A, y)
 end
 function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,axes(A,1)),A,x)
+    mul!(similar(x, TS, axes(A,1)), A, x)
 end
 
 # these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
@@ -61,68 +66,32 @@ end
 (*)(a::AbstractVector, adjB::AdjointAbsMat) = reshape(a, length(a), 1) * adjB
 (*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
 
-@inline mul!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T},
-             alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemv!(y, 'N', A, x, alpha, beta)
-
+@inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
+                alpha::Number, beta::Number) =
+    generic_matvecmul!(y, adj_or_trans_char(A), _parent(A), x, MulAddMul(alpha, beta))
+# BLAS cases
+# equal eltypes
+@inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
+                _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} =
+    gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta)
+# Real (possibly transposed) matrix times complex vector.
+# Multiply the matrix with the real and imaginary parts separately
+@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
+                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
+    gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta)
 # Complex matrix times real vector.
 # Reinterpret the matrix as a real matrix and do real matvec computation.
-@inline mul!(y::StridedVector{Complex{T}}, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-        alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemv!(y, 'N', A, x, alpha, beta)
-
-# Real matrix times complex vector.
-# Multiply the matrix with the real and imaginary parts separately
-@inline mul!(y::StridedVector{Complex{T}}, A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{Complex{T}},
-        alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemv!(y, A isa StridedArray ? 'N' : 'T', A isa StridedArray ? A : parent(A), x, alpha, beta)
-
-@inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
-             alpha::Number, beta::Number) =
-    generic_matvecmul!(y, 'N', A, x, MulAddMul(alpha, beta))
-
-function *(tA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(tA, 1)), tA, convert(AbstractVector{TS}, x))
-end
-function *(tA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(tA, 1)), tA, x)
-end
-@inline mul!(y::StridedVector{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemv!(y, 'T', tA.parent, x, alpha, beta)
-@inline mul!(y::AbstractVector, tA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number) =
-    generic_matvecmul!(y, 'T', tA.parent, x, MulAddMul(alpha, beta))
-
-function *(adjA::Adjoint{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(adjA, 1)), adjA, convert(AbstractVector{TS}, x))
-end
-function *(adjA::Adjoint{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(adjA, 1)), adjA, x)
-end
-
-@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasReal} =
-    mul!(y, transpose(adjA.parent), x, alpha, beta)
-@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasComplex} =
-    gemv!(y, 'C', adjA.parent, x, alpha, beta)
-@inline mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number) =
-    generic_matvecmul!(y, 'C', adjA.parent, x, MulAddMul(alpha, beta))
+# works only in cooperation with BLAS when A is untransposed (tA == 'N')
+# but that check is included in gemv! anyway
+@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
+    gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta)
 
 # Vector-Matrix multiplication
 (*)(x::AdjointAbsVec,   A::AbstractMatrix) = (A'*x')'
 (*)(x::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A)*transpose(x))
 
-_parent(A) = A
-_parent(A::Adjoint) = parent(A)
-_parent(A::Transpose) = parent(A)
-
+# Matrix-matrix multiplication
 """
     *(A::AbstractMatrix, B::AbstractMatrix)
 
@@ -156,10 +125,6 @@ function (*)(A::StridedMaybeAdjOrTransMat{<:BlasComplex}, B::StridedMaybeAdjOrTr
          wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
 end
 
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                      alpha::Number, beta::Number) where {T<:BlasFloat}
-    return gemm_wrapper!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-end
 # Complex Matrix times real matrix: We use that it is generally faster to reinterpret the
 # first matrix as a real matrix and carry out real matrix matrix multiply
 function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
@@ -299,9 +264,15 @@ julia> C
  730.0  740.0
 ```
 """
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat,
-             alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) =
+    generic_matmatmul!(
+        C,
+        adj_or_trans_char(A),
+        adj_or_trans_char(B),
+        _parent(A),
+        _parent(B),
+        MulAddMul(α, β)
+    )
 
 """
     rmul!(A, B)
@@ -369,103 +340,27 @@ julia> lmul!(F.Q, B)
 """
 lmul!(A, B)
 
-@inline function mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = tA.parent
-    if A === B
-        return syrk_wrapper!(C, 'T', A, MulAddMul(alpha, beta))
-    else
-        return gemm_wrapper!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'T', 'N', tA.parent, B, MulAddMul(alpha, beta))
-
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    B = tB.parent
-    if A === B
-        return syrk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
+@inline function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
+                            _add::MulAddMul=MulAddMul()) where {T<:BlasFloat}
+    if tA == 'T' && tB == 'N' && A === B
+        return syrk_wrapper!(C, 'T', A, _add)
+    elseif tA == 'N' && tB == 'T' && A === B
+        return syrk_wrapper!(C, 'N', A, _add)
+    elseif tA == 'C' && tB == 'N' && A === B
+        return herk_wrapper!(C, 'C', A, _add)
+    elseif tA == 'N' && tB == 'C' && A === B
+        return herk_wrapper!(C, 'N', A, _add)
     else
-        return gemm_wrapper!(C, 'N', 'T', A, B, MulAddMul(alpha, beta))
+        return gemm_wrapper!(C, tA, tB, A, B, _add)
     end
 end
+
 # Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency.
-@inline mul!(C::StridedMatrix{Complex{T}}, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                    alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemm_wrapper!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-@inline mul!(C::StridedMatrix{Complex{T}}, A::StridedVecOrMat{Complex{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                    alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemm_wrapper!(C, 'N', 'T', A, parent(tB), MulAddMul(alpha, beta))
-
-# collapsing the following two defs with C::AbstractVecOrMat yields ambiguities
-@inline mul!(C::AbstractVector, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'T', 'C', tA.parent, adjB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'T', 'C', tA.parent, tB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Real, beta::Real) where {T<:BlasReal} =
-    mul!(C, transpose(adjA.parent), B, alpha, beta)
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Number, beta::Number) where {T<:BlasComplex}
-    A = adjA.parent
-    if A === B
-        return herk_wrapper!(C, 'C', A, MulAddMul(alpha, beta))
-    else
-        return gemm_wrapper!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'C', 'N', adjA.parent, B, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    mul!(C, A, transpose(adjB.parent), alpha, beta)
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasComplex}
-    B = adjB.parent
-    if A === B
-        return herk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
-    else
-        return gemm_wrapper!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
-    end
+@inline function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+                    _add::MulAddMul=MulAddMul()) where {T<:BlasReal}
+    gemm_wrapper!(C, tA, tB, A, B, _add)
 end
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'C', A, adjB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
+
 
 # Supporting functions for matrix multiplication
 
@@ -502,7 +397,7 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::
         !iszero(stride(x, 1)) # We only check input's stride here.
         return BLAS.gemv!(tA, alpha, A, x, beta, y)
     else
-        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
@@ -523,7 +418,7 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa
         BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
         return y
     else
-        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
@@ -546,7 +441,7 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa
         BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :])
         return y
     else
-        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
@@ -673,7 +568,7 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
         stride(C, 2) >= size(C, 1))
         return BLAS.gemm!(tA, tB, alpha, A, B, beta, C)
     end
-    generic_matmatmul!(C, tA, tB, A, B, _add)
+    _generic_matmatmul!(C, tA, tB, A, B, _add)
 end
 
 function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
@@ -716,7 +611,7 @@ function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::Abs
         BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C))
         return C
     end
-    generic_matmatmul!(C, tA, tB, A, B, _add)
+    _generic_matmatmul!(C, tA, tB, A, B, _add)
 end
 
 # blas.jl defines matmul for floats; other integer and mixed precision
@@ -750,8 +645,12 @@ end
 # NOTE: the generic version is also called as fallback for
 #       strides != 1 cases
 
-function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::AbstractVector,
-                            _add::MulAddMul = MulAddMul()) where R
+generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
+                    _add::MulAddMul = MulAddMul()) =
+    _generic_matvecmul!(C, tA, A, B, _add)
+
+function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
+                            _add::MulAddMul = MulAddMul())
     require_one_based_indexing(C, A, B)
     mB = length(B)
     mA, nA = lapack_size(tA, A)
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 1de2c2edadf99..43d04ac5fa415 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -32,7 +32,6 @@ The object has two fields:
     ``v_i`` is the ``i``th column of the matrix `V = I + tril(F.factors, -1)`.
 
 * `τ` is a vector  of length `min(m,n)` containing the coefficients ``\tau_i``.
-
 """
 struct QR{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
     factors::S
@@ -298,7 +297,7 @@ qr!(A::StridedMatrix{<:BlasFloat}, ::ColumnNorm) = QRPivoted(LAPACK.geqp3!(A)...
 """
     qr!(A, pivot = NoPivot(); blocksize)
 
-`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`StridedMatrix`](@ref),
+`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`AbstractMatrix`](@ref),
 but saves space by overwriting the input `A`, instead of creating a copy.
 An [`InexactError`](@ref) exception is thrown if the factorization produces a number not
 representable by the element type of `A`, e.g. for integer types.
@@ -316,9 +315,7 @@ julia> a = [1. 2.; 3. 4.]
 julia> qr!(a)
 LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
 Q factor:
-2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}:
- -0.316228  -0.948683
- -0.948683   0.316228
+2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
 R factor:
 2×2 Matrix{Float64}:
  -3.16228  -4.42719
@@ -387,7 +384,7 @@ orthogonal matrix.
 
 The block size for QR decomposition can be specified by keyword argument
 `blocksize :: Integer` when `pivot == NoPivot()` and `A isa StridedMatrix{<:BlasFloat}`.
-It is ignored when `blocksize > minimum(size(A))`.  See [`QRCompactWY`](@ref).
+It is ignored when `blocksize > minimum(size(A))`. See [`QRCompactWY`](@ref).
 
 !!! compat "Julia 1.4"
     The `blocksize` keyword argument requires Julia 1.4 or later.
@@ -403,10 +400,7 @@ julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0]
 julia> F = qr(A)
 LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
 Q factor:
-3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}:
- -0.6   0.0   0.8
- -0.8   0.0  -0.6
-  0.0  -1.0   0.0
+3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
 R factor:
 2×2 Matrix{Float64}:
  -5.0  10.0
@@ -514,413 +508,112 @@ end
 Base.propertynames(F::QRPivoted, private::Bool=false) =
     (:R, :Q, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
 
-adjoint(F::Union{QR,QRPivoted,QRCompactWY}) = Adjoint(F)
-
-abstract type AbstractQ{T} <: AbstractMatrix{T} end
-
-inv(Q::AbstractQ) = Q'
-
-"""
-    QRPackedQ <: AbstractMatrix
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
-[`QRPivoted`](@ref) format.
-"""
-struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
-    factors::S
-    τ::C
-
-    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors)
-        new{T,S,C}(factors, τ)
-    end
-end
-QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
-QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QRPackedQ{T,S,typeof(τ)}(factors, τ), false)
-
-"""
-    QRCompactWYQ <: AbstractMatrix
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
-format.
-"""
-struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
-    factors::M
-    T::C
-
-    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
-        require_one_based_indexing(factors)
-        new{S,M,C}(factors, T)
-    end
-end
-QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
-    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
-QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
-    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWYQ{S,M,typeof(T)}(factors, T), false)
-
-QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
-AbstractMatrix{T}(Q::QRPackedQ{T}) where {T} = Q
-AbstractMatrix{T}(Q::QRPackedQ) where {T} = QRPackedQ{T}(Q)
-QRCompactWYQ{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ(convert(AbstractMatrix{S}, Q.factors), convert(AbstractMatrix{S}, Q.T))
-AbstractMatrix{S}(Q::QRCompactWYQ{S}) where {S} = Q
-AbstractMatrix{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ{S}(Q)
-Matrix{T}(Q::AbstractQ{S}) where {T,S} = convert(Matrix{T}, lmul!(Q, Matrix{S}(I, size(Q, 1), min(size(Q.factors)...))))
-Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)
-Array{T}(Q::AbstractQ) where {T} = Matrix{T}(Q)
-Array(Q::AbstractQ) = Matrix(Q)
+transpose(F::Union{QR{<:Real},QRPivoted{<:Real},QRCompactWY{<:Real}}) = F'
+transpose(::Union{QR,QRPivoted,QRCompactWY}) =
+    throw(ArgumentError("transpose of QR decomposition is not supported, consider using adjoint"))
 
-size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
 size(F::Union{QR,QRCompactWY,QRPivoted}) = size(getfield(F, :factors))
-size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
-    size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::Union{QRCompactWYQ,QRPackedQ}) = size(Q, 1), size(Q, 2)
-
-copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
-copy(Q::AbstractQ) = copymutable(Q)
-getindex(Q::AbstractQ, inds...) = copymutable(Q)[inds...]
-getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
-
-function getindex(Q::AbstractQ, ::Colon, j::Int)
-    y = zeros(eltype(Q), size(Q, 2))
-    y[j] = 1
-    lmul!(Q, y)
-end
+size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
 
-getindex(Q::AbstractQ, i::Int, j::Int) = Q[:, j][i]
 
-# specialization avoiding the fallback using slow `getindex`
-function copyto!(dest::AbstractMatrix, src::AbstractQ)
-    copyto!(dest, I)
-    lmul!(src, dest)
+function ldiv!(A::QRCompactWY{T}, b::AbstractVector{T}) where {T}
+    require_one_based_indexing(b)
+    m, n = size(A)
+    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
+    return b
 end
-# needed to resolve method ambiguities
-function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
-    if perm == (1, 2)
-        copyto!(parent(dest), src)
-    else
-        @assert perm == (2, 1) # there are no other permutations of two indices
-        if T <: Real
-            copyto!(parent(dest), I)
-            lmul!(src', parent(dest))
-        else
-            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
-            tmp = similar(parent(dest))
-            copyto!(tmp, I)
-            rmul!(tmp, src)
-            permutedims!(parent(dest), tmp, (2, 1))
-        end
-    end
-    return dest
+function ldiv!(A::QRCompactWY{T}, B::AbstractMatrix{T}) where {T}
+    require_one_based_indexing(B)
+    m, n = size(A)
+    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
+    return B
 end
 
-## Multiplication by Q
-### QB
-lmul!(A::QRCompactWYQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
-lmul!(A::QRPackedQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
-function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
+# Julia implementation similar to xgelsy
+function ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}, rcond::Real) where {T<:BlasFloat}
     require_one_based_indexing(B)
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
-    end
-    Afactors = A.factors
-    @inbounds begin
-        for k = min(mA,nA):-1:1
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = A.τ[k]*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
-        end
-    end
-    B
-end
+    m, n = size(A)
 
-function (*)(A::AbstractQ, b::StridedVector)
-    TAb = promote_type(eltype(A), eltype(b))
-    Anew = convert(AbstractMatrix{TAb}, A)
-    if size(A.factors, 1) == length(b)
-        bnew = copymutable_oftype(b, TAb)
-    elseif size(A.factors, 2) == length(b)
-        bnew = [b; zeros(TAb, size(A.factors, 1) - length(b))]
-    else
-        throw(DimensionMismatch("vector must have length either $(size(A.factors, 1)) or $(size(A.factors, 2))"))
-    end
-    lmul!(Anew, bnew)
-end
-function (*)(A::AbstractQ, B::StridedMatrix)
-    TAB = promote_type(eltype(A), eltype(B))
-    Anew = convert(AbstractMatrix{TAB}, A)
-    if size(A.factors, 1) == size(B, 1)
-        Bnew = copymutable_oftype(B, TAB)
-    elseif size(A.factors, 2) == size(B, 1)
-        Bnew = [B; zeros(TAB, size(A.factors, 1) - size(B,1), size(B, 2))]
-    else
-        throw(DimensionMismatch("first dimension of matrix must have size either $(size(A.factors, 1)) or $(size(A.factors, 2))"))
+    if m > size(B, 1) || n > size(B, 1)
+        throw(DimensionMismatch("B has leading dimension $(size(B, 1)) but needs at least $(max(m, n))"))
     end
-    lmul!(Anew, Bnew)
-end
-
-function (*)(A::AbstractQ, b::Number)
-    TAb = promote_type(eltype(A), typeof(b))
-    dest = similar(A, TAb)
-    copyto!(dest, b*I)
-    lmul!(A, dest)
-end
 
-### QcB
-lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L', 'T', A.factors, A.T, B))
-lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L', 'C', A.factors, A.T, B))
-lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L', 'T', A.factors, A.τ, B))
-lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L', 'C', A.factors, A.τ, B))
-function lmul!(adjA::Adjoint{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    A = adjA.parent
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+    if length(A.factors) == 0 || length(B) == 0
+        return B, 0
     end
-    Afactors = A.factors
+
     @inbounds begin
-        for k = 1:min(mA,nA)
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = conj(A.τ[k])*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
+        smin = smax = abs(A.factors[1])
+
+        if smax == 0
+            return fill!(B, 0), 0
         end
-    end
-    B
-end
-function *(adjQ::Adjoint{<:Any,<:AbstractQ}, B::StridedVecOrMat)
-    Q = adjQ.parent
-    TQB = promote_type(eltype(Q), eltype(B))
-    return lmul!(adjoint(convert(AbstractMatrix{TQB}, Q)), copymutable_oftype(B, TQB))
-end
 
-### QBc/QcBc
-function *(Q::AbstractQ, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    B = adjB.parent
-    TQB = promote_type(eltype(Q), eltype(B))
-    Bc = similar(B, TQB, (size(B, 2), size(B, 1)))
-    adjoint!(Bc, B)
-    return lmul!(convert(AbstractMatrix{TQB}, Q), Bc)
-end
-function *(adjQ::Adjoint{<:Any,<:AbstractQ}, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    Q, B = adjQ.parent, adjB.parent
-    TQB = promote_type(eltype(Q), eltype(B))
-    Bc = similar(B, TQB, (size(B, 2), size(B, 1)))
-    adjoint!(Bc, B)
-    return lmul!(adjoint(convert(AbstractMatrix{TQB}, Q)), Bc)
-end
+        mn = min(m, n)
 
-### AQ
-rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
-rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-    LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
-function rmul!(A::StridedMatrix,Q::QRPackedQ)
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = 1:min(mQ,nQ)
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*Q.τ[k]
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
-            end
-        end
-    end
-    A
-end
+        # allocate temporary work space
+        tmp  = Vector{T}(undef, 2mn)
+        wmin = view(tmp, 1:mn)
+        wmax = view(tmp, mn+1:2mn)
 
-function (*)(A::StridedMatrix, Q::AbstractQ)
-    TAQ = promote_type(eltype(A), eltype(Q))
+        rnk = 1
+        wmin[1] = 1
+        wmax[1] = 1
 
-    return rmul!(copymutable_oftype(A, TAQ), convert(AbstractMatrix{TAQ}, Q))
-end
+        while rnk < mn
+            i = rnk + 1
 
-function (*)(a::Number, B::AbstractQ)
-    TaB = promote_type(typeof(a), eltype(B))
-    dest = similar(B, TaB)
-    copyto!(dest, a*I)
-    rmul!(dest, B)
-end
+            smin, s1, c1 = LAPACK.laic1!(2, view(wmin, 1:rnk), smin, view(A.factors, 1:rnk, i), A.factors[i,i])
+            smax, s2, c2 = LAPACK.laic1!(1, view(wmax, 1:rnk), smax, view(A.factors, 1:rnk, i), A.factors[i,i])
 
-### AQc
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.gemqrt!('R', 'T', B.factors, B.T, A))
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.gemqrt!('R', 'C', B.factors, B.T, A))
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.ormqr!('R', 'T', B.factors, B.τ, A))
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.ormqr!('R', 'C', B.factors, B.τ, A))
-function rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRPackedQ})
-    Q = adjQ.parent
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = min(mQ,nQ):-1:1
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*conj(Q.τ[k])
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
+            if smax*rcond > smin
+                break
             end
-        end
-    end
-    A
-end
-function *(A::StridedMatrix, adjB::Adjoint{<:Any,<:AbstractQ})
-    B = adjB.parent
-    TAB = promote_type(eltype(A),eltype(B))
-    BB = convert(AbstractMatrix{TAB}, B)
-    if size(A,2) == size(B.factors, 1)
-        AA = copy_similar(A, TAB)
-        return rmul!(AA, adjoint(BB))
-    elseif size(A,2) == size(B.factors,2)
-        return rmul!([A zeros(TAB, size(A, 1), size(B.factors, 1) - size(B.factors, 2))], adjoint(BB))
-    else
-        throw(DimensionMismatch("matrix A has dimensions $(size(A)) but matrix B has dimensions $(size(B))"))
-    end
-end
-*(u::AdjointAbsVec, A::Adjoint{<:Any,<:AbstractQ}) = adjoint(A.parent * u.parent)
 
+            for j in 1:rnk
+                wmin[j] *= s1
+                wmax[j] *= s2
+            end
+            wmin[i] = c1
+            wmax[i] = c2
 
-### AcQ/AcQc
-function *(adjA::Adjoint{<:Any,<:StridedVecOrMat}, Q::AbstractQ)
-    A = adjA.parent
-    TAQ = promote_type(eltype(A), eltype(Q))
-    Ac = similar(A, TAQ, (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
-    return rmul!(Ac, convert(AbstractMatrix{TAQ}, Q))
-end
-function *(adjA::Adjoint{<:Any,<:StridedVecOrMat}, adjQ::Adjoint{<:Any,<:AbstractQ})
-    A, Q = adjA.parent, adjQ.parent
-    TAQ = promote_type(eltype(A), eltype(Q))
-    Ac = similar(A, TAQ, (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
-    return rmul!(Ac, adjoint(convert(AbstractMatrix{TAQ}, Q)))
-end
+            rnk += 1
+        end
 
-### mul!
-function mul!(C::StridedVecOrMat{T}, Q::AbstractQ{T}, B::StridedVecOrMat{T}) where {T}
-    require_one_based_indexing(C, B)
-    mB = size(B, 1)
-    mC = size(C, 1)
-    if mB < mC
-        inds = CartesianIndices(B)
-        copyto!(C, inds, B, inds)
-        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
-        return lmul!(Q, C)
-    else
-        return lmul!(Q, copyto!(C, B))
-    end
-end
-mul!(C::StridedVecOrMat{T}, A::StridedVecOrMat{T}, Q::AbstractQ{T}) where {T} = rmul!(copyto!(C, A), Q)
-mul!(C::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:AbstractQ{T}}, B::StridedVecOrMat{T}) where {T} = lmul!(adjQ, copyto!(C, B))
-mul!(C::StridedVecOrMat{T}, A::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:AbstractQ{T}}) where {T} = rmul!(copyto!(C, A), adjQ)
+        if rnk < n
+            C, τ = LAPACK.tzrzf!(A.factors[1:rnk, :])
+            work = vec(C)
+        else
+            C, τ = A.factors, A.τ
+            work = resize!(tmp, n)
+        end
 
-function ldiv!(A::QRCompactWY{T}, b::AbstractVector{T}) where {T<:BlasFloat}
-    m,n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
-    return b
-end
-function ldiv!(A::QRCompactWY{T}, B::AbstractMatrix{T}) where {T<:BlasFloat}
-    m,n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
-    return B
-end
+        lmul!(adjoint(A.Q), view(B, 1:m, :))
+        ldiv!(UpperTriangular(view(C, 1:rnk, 1:rnk)), view(B, 1:rnk, :))
 
-# Julia implementation similar to xgelsy
-function ldiv!(A::QRPivoted{T}, B::AbstractMatrix{T}, rcond::Real) where T<:BlasFloat
-    mA, nA = size(A.factors)
-    nr = min(mA,nA)
-    nrhs = size(B, 2)
-    if nr == 0
-        return B, 0
-    end
-    ar = abs(A.factors[1])
-    if ar == 0
-        B[1:nA, :] .= 0
-        return B, 0
-    end
-    rnk = 1
-    xmin = T[1]
-    xmax = T[1]
-    tmin = tmax = ar
-    while rnk < nr
-        tmin, smin, cmin = LAPACK.laic1!(2, xmin, tmin, view(A.factors, 1:rnk, rnk + 1), A.factors[rnk + 1, rnk + 1])
-        tmax, smax, cmax = LAPACK.laic1!(1, xmax, tmax, view(A.factors, 1:rnk, rnk + 1), A.factors[rnk + 1, rnk + 1])
-        tmax*rcond > tmin && break
-        push!(xmin, cmin)
-        push!(xmax, cmax)
-        for i = 1:rnk
-            xmin[i] *= smin
-            xmax[i] *= smax
+        if rnk < n
+            B[rnk+1:n,:] .= zero(T)
+            LAPACK.ormrz!('L', T <: Complex ? 'C' : 'T', C, τ, view(B, 1:n, :))
+        end
+
+        for j in axes(B, 2)
+            for i in 1:n
+                work[A.p[i]] = B[i,j]
+            end
+            for i in 1:n
+                B[i,j] = work[i]
+            end
         end
-        rnk += 1
     end
-    C, τ = LAPACK.tzrzf!(A.factors[1:rnk, :])
-    lmul!(A.Q', view(B, 1:mA, :))
-    ldiv!(UpperTriangular(view(C, :, 1:rnk)), view(B, 1:rnk, :))
-    B[rnk+1:end,:] .= zero(T)
-    LAPACK.ormrz!('L', eltype(B)<:Complex ? 'C' : 'T', C, τ, view(B, 1:nA, :))
-    B[A.p,:] = B[1:nA,:]
+
     return B, rnk
 end
-ldiv!(A::QRPivoted{T}, B::AbstractVector{T}) where {T<:BlasFloat} =
+
+ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractVector{T}) where {T<:BlasFloat} =
     vec(ldiv!(A, reshape(B, length(B), 1)))
-ldiv!(A::QRPivoted{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
-    ldiv!(A, B, min(size(A)...)*eps(real(float(one(eltype(B))))))[1]
+ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
+    ldiv!(A, B, min(size(A)...)*eps(real(T)))[1]
+
 function _wide_qr_ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
     m, n = size(A)
     minmn = min(m,n)
@@ -952,14 +645,14 @@ function _wide_qr_ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
             B[m + 1:mB,1:nB] .= zero(T)
             for j = 1:nB
                 for k = 1:m
-                    vBj = B[k,j]
+                    vBj = B[k,j]'
                     for i = m + 1:n
-                        vBj += B[i,j]*R[k,i]'
+                        vBj += B[i,j]'*R[k,i]'
                     end
                     vBj *= τ[k]
-                    B[k,j] -= vBj
+                    B[k,j] -= vBj'
                     for i = m + 1:n
-                        B[i,j] -= R[k,i]*vBj
+                        B[i,j] -= R[k,i]'*vBj'
                     end
                 end
             end
@@ -1000,9 +693,9 @@ function _apply_permutation!(F::QRPivoted, B::AbstractVecOrMat)
     B[1:length(F.p), :] = B[F.p, :]
     return B
 end
-_apply_permutation!(F::Factorization, B::AbstractVecOrMat) = B
+_apply_permutation!(::Factorization, B::AbstractVecOrMat) = B
 
-function ldiv!(Fadj::Adjoint{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
+function ldiv!(Fadj::AdjointFactorization{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(Fadj)
 
@@ -1061,25 +754,3 @@ end
 ## Lower priority: Add LQ, QL and RQ factorizations
 
 # FIXME! Should add balancing option through xgebal
-
-
-det(Q::QRPackedQ) = _det_tau(Q.τ)
-
-det(Q::QRCompactWYQ) =
-    prod(i -> _det_tau(_diagview(Q.T[:, i:min(i + size(Q.T, 1), size(Q.T, 2))])),
-         1:size(Q.T, 1):size(Q.T, 2))
-
-_diagview(A) = @view A[diagind(A)]
-
-# Compute `det` from the number of Householder reflections.  Handle
-# the case `Q.τ` contains zeros.
-_det_tau(τs::AbstractVector{<:Real}) =
-    isodd(count(!iszero, τs)) ? -one(eltype(τs)) : one(eltype(τs))
-
-# In complex case, we need to compute the non-unit eigenvalue `λ = 1 - c*τ`
-# (where `c = v'v`) of each Householder reflector.  As we know that the
-# reflector must have the determinant of 1, it must satisfy `abs2(λ) == 1`.
-# Combining this with the constraint `c > 0`, it turns out that the eigenvalue
-# (hence the determinant) can be computed as `λ = -sign(τ)^2`.
-# See: https://github.com/JuliaLang/julia/pull/32887#issuecomment-521935716
-_det_tau(τs) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, τs)
diff --git a/stdlib/LinearAlgebra/src/schur.jl b/stdlib/LinearAlgebra/src/schur.jl
index 53741adb48cf9..7257544ff872e 100644
--- a/stdlib/LinearAlgebra/src/schur.jl
+++ b/stdlib/LinearAlgebra/src/schur.jl
@@ -345,8 +345,13 @@ Base.iterate(S::GeneralizedSchur, ::Val{:done}) = nothing
 
 Same as [`schur`](@ref) but uses the input matrices `A` and `B` as workspace.
 """
-schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-    GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
+function schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat}
+    if LAPACK.version() < v"3.6.0"
+        GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
+    else
+        GeneralizedSchur(LinearAlgebra.LAPACK.gges3!('V', 'V', A, B)...)
+    end
+end
 
 """
     schur(A, B) -> F::GeneralizedSchur
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index e4f28286b6aaa..1744a2301f48a 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -107,6 +107,37 @@ for op in (:+, :-)
     end
 end
 
+# disambiguation between triangular and banded matrices, banded ones "dominate"
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix) = _mul!(C, A, B, MulAddMul())
+mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular) = _mul!(C, A, B, MulAddMul())
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
+    _mul!(C, A, B, MulAddMul(alpha, beta))
+mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular, alpha::Number, beta::Number) =
+    _mul!(C, A, B, MulAddMul(alpha, beta))
+
+function *(H::UpperHessenberg, B::Bidiagonal)
+    T = promote_op(matprod, eltype(H), eltype(B))
+    A = mul!(similar(H, T, size(H)), H, B)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+function *(B::Bidiagonal, H::UpperHessenberg)
+    T = promote_op(matprod, eltype(B), eltype(H))
+    A = mul!(similar(H, T, size(H)), B, H)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
+function /(H::UpperHessenberg, B::Bidiagonal)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
+    A = _rdiv!(similar(H, T, size(H)), H, B)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
+function \(B::Bidiagonal, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
+    A = ldiv!(similar(H, T, size(H)), B, H)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
 # specialized +/- for structured matrices. If these are removed, it falls
 # back to broadcasting which has ~2-10x speed regressions.
 # For the other structure matrix pairs, broadcasting works well.
@@ -235,65 +266,15 @@ function (-)(A::UniformScaling, B::Diagonal)
     Diagonal(Ref(A) .- B.diag)
 end
 
-lmul!(Q::AbstractQ, B::AbstractTriangular) = lmul!(Q, full!(B))
-lmul!(Q::QRPackedQ, B::AbstractTriangular) = lmul!(Q, full!(B)) # disambiguation
-lmul!(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractTriangular) = lmul!(Q, full!(B))
-lmul!(Q::Adjoint{<:Any,<:QRPackedQ}, B::AbstractTriangular) = lmul!(Q, full!(B)) # disambiguation
-
-function _qlmul(Q::AbstractQ, B)
-    TQB = promote_type(eltype(Q), eltype(B))
-    if size(Q.factors, 1) == size(B, 1)
-        Bnew = Matrix{TQB}(B)
-    elseif size(Q.factors, 2) == size(B, 1)
-        Bnew = [Matrix{TQB}(B); zeros(TQB, size(Q.factors, 1) - size(B,1), size(B, 2))]
-    else
-        throw(DimensionMismatch("first dimension of matrix must have size either $(size(Q.factors, 1)) or $(size(Q.factors, 2))"))
-    end
-    lmul!(convert(AbstractMatrix{TQB}, Q), Bnew)
-end
-function _qlmul(adjQ::Adjoint{<:Any,<:AbstractQ}, B)
-    TQB = promote_type(eltype(adjQ), eltype(B))
-    lmul!(adjoint(convert(AbstractMatrix{TQB}, parent(adjQ))), Matrix{TQB}(B))
-end
-
-*(Q::AbstractQ, B::AbstractTriangular) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractTriangular) = _qlmul(Q, B)
-*(Q::AbstractQ, B::BiTriSym) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::BiTriSym) = _qlmul(Q, B)
-*(Q::AbstractQ, B::Diagonal) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::Diagonal) = _qlmul(Q, B)
-
-rmul!(A::AbstractTriangular, Q::AbstractQ) = rmul!(full!(A), Q)
-rmul!(A::AbstractTriangular, Q::Adjoint{<:Any,<:AbstractQ}) = rmul!(full!(A), Q)
-
-function _qrmul(A, Q::AbstractQ)
-    TAQ = promote_type(eltype(A), eltype(Q))
-    return rmul!(Matrix{TAQ}(A), convert(AbstractMatrix{TAQ}, Q))
-end
-function _qrmul(A, adjQ::Adjoint{<:Any,<:AbstractQ})
-    Q = adjQ.parent
-    TAQ = promote_type(eltype(A), eltype(Q))
-    if size(A,2) == size(Q.factors, 1)
-        Anew = Matrix{TAQ}(A)
-    elseif size(A,2) == size(Q.factors,2)
-        Anew = [Matrix{TAQ}(A) zeros(TAQ, size(A, 1), size(Q.factors, 1) - size(Q.factors, 2))]
-    else
-        throw(DimensionMismatch("matrix A has dimensions $(size(A)) but matrix B has dimensions $(size(Q))"))
-    end
-    return rmul!(Anew, adjoint(convert(AbstractMatrix{TAQ}, Q)))
-end
+## Diagonal construction from UniformScaling
+Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
+Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
 
-*(A::AbstractTriangular, Q::AbstractQ) = _qrmul(A, Q)
-*(A::AbstractTriangular, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
-*(A::BiTriSym, Q::AbstractQ) = _qrmul(A, Q)
-*(A::BiTriSym, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
-*(A::Diagonal, Q::AbstractQ) = _qrmul(A, Q)
-*(A::Diagonal, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
+Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
+    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
 
-*(Q::AbstractQ, B::AbstractQ) = Q * (B * I)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractQ) = Q * (B * I)
-*(Q::AbstractQ, B::Adjoint{<:Any,<:AbstractQ}) = Q * (B * I)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::Adjoint{<:Any,<:AbstractQ}) = Q * (B * I)
+_diag_or_value(A::Diagonal) = A.diag
+_diag_or_value(A::UniformScaling) = A.λ
 
 # fill[stored]! methods
 fillstored!(A::Diagonal, x) = (fill!(A.diag, x); A)
@@ -362,9 +343,7 @@ const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpo
 promote_to_array_type(::Tuple{Vararg{Union{_DenseConcatGroup,UniformScaling}}}) = Matrix
 
 Base._cat(dims, xs::_DenseConcatGroup...) = Base._cat_t(dims, promote_eltype(xs...), xs...)
-vcat(A::Vector...) = Base.typed_vcat(promote_eltype(A...), A...)
 vcat(A::_DenseConcatGroup...) = Base.typed_vcat(promote_eltype(A...), A...)
-hcat(A::Vector...) = Base.typed_hcat(promote_eltype(A...), A...)
 hcat(A::_DenseConcatGroup...) = Base.typed_hcat(promote_eltype(A...), A...)
 hvcat(rows::Tuple{Vararg{Int}}, xs::_DenseConcatGroup...) = Base.typed_hvcat(promote_eltype(xs...), rows, xs...)
 # For performance, specially handle the case where the matrices/vectors have homogeneous eltype
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
index ccf95f88a1bee..02e39b199679b 100644
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
@@ -9,35 +9,41 @@ StructuredMatrixStyle{T}(::Val{2}) where {T} = StructuredMatrixStyle{T}()
 StructuredMatrixStyle{T}(::Val{N}) where {T,N} = Broadcast.DefaultArrayStyle{N}()
 
 const StructuredMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,LowerTriangular,UnitLowerTriangular,UpperTriangular,UnitUpperTriangular}
-Broadcast.BroadcastStyle(::Type{T}) where {T<:StructuredMatrix} = StructuredMatrixStyle{T}()
+for ST in Base.uniontypes(StructuredMatrix)
+    @eval Broadcast.BroadcastStyle(::Type{<:$ST}) = $(StructuredMatrixStyle{ST}())
+end
 
 # Promotion of broadcasts between structured matrices. This is slightly unusual
 # as we define them symmetrically. This allows us to have a fallback to DefaultArrayStyle{2}().
 # Diagonal can cavort with all the other structured matrix types.
 # Bidiagonal doesn't know if it's upper or lower, so it becomes Tridiagonal
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Diagonal}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Diagonal}) =
     StructuredMatrixStyle{Diagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Union{Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Bidiagonal}) =
+    StructuredMatrixStyle{Bidiagonal}()
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
     StructuredMatrixStyle{UpperTriangular}()
 
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Bidiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{Diagonal}) =
+    StructuredMatrixStyle{Bidiagonal}()
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{<:Union{Bidiagonal,SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:SymTridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{SymTridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Tridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Tridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
 
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:LowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{LowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:UpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{UpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
     StructuredMatrixStyle{UpperTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:UnitLowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitLowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:UnitUpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitUpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
     StructuredMatrixStyle{UpperTriangular}()
 
 Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
@@ -45,17 +51,17 @@ Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLow
 Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{Matrix}()
 
-# Make sure that `StructuredMatrixStyle{<:Matrix}` doesn't ever end up falling
+# Make sure that `StructuredMatrixStyle{Matrix}` doesn't ever end up falling
 # through and give back `DefaultArrayStyle{2}`
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{<:Matrix}, ::StructuredMatrixStyle) = T
-Broadcast.BroadcastStyle(::StructuredMatrixStyle, T::StructuredMatrixStyle{<:Matrix}) = T
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{<:Matrix}, ::StructuredMatrixStyle{<:Matrix}) = T
+Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle) = T
+Broadcast.BroadcastStyle(::StructuredMatrixStyle, T::StructuredMatrixStyle{Matrix}) = T
+Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle{Matrix}) = T
 
 # All other combinations fall back to the default style
 Broadcast.BroadcastStyle(::StructuredMatrixStyle, ::StructuredMatrixStyle) = DefaultArrayStyle{2}()
 
 # And a definition akin to similar using the structured type:
-structured_broadcast_alloc(bc, ::Type{<:Diagonal}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{Diagonal}, ::Type{ElType}, n) where {ElType} =
     Diagonal(Array{ElType}(undef, n))
 # Bidiagonal is tricky as we need to know if it's upper or lower. The promotion
 # system will return Tridiagonal when there's more than one Bidiagonal, but when
@@ -67,9 +73,9 @@ merge_uplos(a, b) = a == b ? a : 'T'
 
 find_uplo(a::Bidiagonal) = a.uplo
 find_uplo(a) = nothing
-find_uplo(bc::Broadcasted) = mapreduce(find_uplo, merge_uplos, bc.args, init=nothing)
+find_uplo(bc::Broadcasted) = mapfoldl(find_uplo, merge_uplos, Broadcast.cat_nested(bc), init=nothing)
 
-function structured_broadcast_alloc(bc, ::Type{<:Bidiagonal}, ::Type{ElType}, n) where {ElType}
+function structured_broadcast_alloc(bc, ::Type{Bidiagonal}, ::Type{ElType}, n) where {ElType}
     uplo = n > 0 ? find_uplo(bc) : 'U'
     n1 = max(n - 1, 0)
     if uplo == 'T'
@@ -77,19 +83,19 @@ function structured_broadcast_alloc(bc, ::Type{<:Bidiagonal}, ::Type{ElType}, n)
     end
     return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n1), uplo)
 end
-structured_broadcast_alloc(bc, ::Type{<:SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
     SymTridiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{<:Tridiagonal}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{Tridiagonal}, ::Type{ElType}, n) where {ElType} =
     Tridiagonal(Array{ElType}(undef, n-1),Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{<:LowerTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{LowerTriangular}, ::Type{ElType}, n) where {ElType} =
     LowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:UpperTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{UpperTriangular}, ::Type{ElType}, n) where {ElType} =
     UpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:UnitLowerTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) where {ElType} =
     UnitLowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
     UnitUpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:Matrix}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} =
     Matrix(Array{ElType}(undef, n, n))
 
 # A _very_ limited list of structure-preserving functions known at compile-time. This list is
@@ -152,83 +158,91 @@ function Base.similar(bc::Broadcasted{StructuredMatrixStyle{T}}, ::Type{ElType})
     return similar(convert(Broadcasted{DefaultArrayStyle{ndims(bc)}}, bc), ElType)
 end
 
+isvalidstructbc(dest, bc::Broadcasted{T}) where {T<:StructuredMatrixStyle} =
+    Broadcast.combine_styles(dest, bc) === Broadcast.combine_styles(dest) &&
+    (isstructurepreserving(bc) || fzeropreserving(bc))
+
+isvalidstructbc(dest::Bidiagonal, bc::Broadcasted{StructuredMatrixStyle{Bidiagonal}}) =
+    (size(dest, 1) < 2 || find_uplo(bc) == dest.uplo) &&
+    (isstructurepreserving(bc) || fzeropreserving(bc))
+
 function copyto!(dest::Diagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.diag[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.diag[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     return dest
 end
 
 function copyto!(dest::Bidiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.dv[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     if dest.uplo == 'U'
         for i = 1:size(dest, 1)-1
-            dest.ev[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
         end
     else
         for i = 1:size(dest, 1)-1
-            dest.ev[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
+            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
         end
     end
     return dest
 end
 
 function copyto!(dest::SymTridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.dv[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     for i = 1:size(dest, 1)-1
-        v = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        v == Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i)) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
+        v = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+        v == (@inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
         dest.ev[i] = v
     end
     return dest
 end
 
 function copyto!(dest::Tridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.d[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.d[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     for i = 1:size(dest, 1)-1
-        dest.du[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        dest.dl[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
+        dest.du[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+        dest.dl[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
     end
     return dest
 end
 
 function copyto!(dest::LowerTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for j in axs[2]
         for i in j:axs[1][end]
-            dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
+            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
         end
     end
     return dest
 end
 
 function copyto!(dest::UpperTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for j in axs[2]
         for i in 1:j
-            dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
+            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
         end
     end
     return dest
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
index 86f322524d13d..c1b886f616f02 100644
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ b/stdlib/LinearAlgebra/src/svd.jl
@@ -213,7 +213,6 @@ Base.propertynames(F::SVD, private::Bool=false) =
 
 Return the singular values of `A`, saving space by overwriting the input.
 See also [`svdvals`](@ref) and [`svd`](@ref).
-```
 """
 svdvals!(A::StridedMatrix{T}) where {T<:BlasFloat} = isempty(A) ? zeros(real(T), 0) : LAPACK.gesdd!('N', A)[2]
 svdvals!(A::StridedVector{T}) where {T<:BlasFloat} = svdvals!(reshape(A, (length(A), 1)))
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index 376c1f7820b6f..f96ca812ea0ec 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -17,34 +17,45 @@ end
 Construct a `Symmetric` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
 
+`Symmetric` views are mainly useful for real-symmetric matrices, for which
+specialized algorithms (e.g. for eigenproblems) are enabled for `Symmetric` types.
+More generally, see also [`Hermitian(A)`](@ref) for Hermitian matrices `A == A'`, which
+is effectively equivalent to `Symmetric` for real matrices but is also useful for
+complex matrices.  (Whereas complex `Symmetric` matrices are supported but have few
+if any specialized algorithms.)
+
+To compute the symmetric part of a real matrix, or more generally the Hermitian part `(A + A') / 2` of
+a real or complex matrix `A`, use [`hermitianpart`](@ref).
+
 # Examples
 ```jldoctest
-julia> A = [1 0 2 0 3; 0 4 0 5 0; 6 0 7 0 8; 0 9 0 1 0; 2 0 3 0 4]
-5×5 Matrix{Int64}:
- 1  0  2  0  3
- 0  4  0  5  0
- 6  0  7  0  8
- 0  9  0  1  0
- 2  0  3  0  4
+julia> A = [1 2 3; 4 5 6; 7 8 9]
+3×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+ 7  8  9
 
 julia> Supper = Symmetric(A)
-5×5 Symmetric{Int64, Matrix{Int64}}:
- 1  0  2  0  3
- 0  4  0  5  0
- 2  0  7  0  8
- 0  5  0  1  0
- 3  0  8  0  4
+3×3 Symmetric{Int64, Matrix{Int64}}:
+ 1  2  3
+ 2  5  6
+ 3  6  9
 
 julia> Slower = Symmetric(A, :L)
-5×5 Symmetric{Int64, Matrix{Int64}}:
- 1  0  6  0  2
- 0  4  0  9  0
- 6  0  7  0  3
- 0  9  0  1  0
- 2  0  3  0  4
+3×3 Symmetric{Int64, Matrix{Int64}}:
+ 1  4  7
+ 4  5  8
+ 7  8  9
+
+julia> hermitianpart(A)
+3×3 Hermitian{Float64, Matrix{Float64}}:
+ 1.0  3.0  5.0
+ 3.0  5.0  7.0
+ 5.0  7.0  9.0
 ```
 
-Note that `Supper` will not be equal to `Slower` unless `A` is itself symmetric (e.g. if `A == transpose(A)`).
+Note that `Supper` will not be equal to `Slower` unless `A` is itself symmetric (e.g. if
+`A == transpose(A)`).
 """
 function Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
     checksquare(A)
@@ -99,25 +110,33 @@ end
 Construct a `Hermitian` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
 
+To compute the Hermitian part of `A`, use [`hermitianpart`](@ref).
+
 # Examples
 ```jldoctest
-julia> A = [1 0 2+2im 0 3-3im; 0 4 0 5 0; 6-6im 0 7 0 8+8im; 0 9 0 1 0; 2+2im 0 3-3im 0 4];
+julia> A = [1 2+2im 3-3im; 4 5 6-6im; 7 8+8im 9]
+3×3 Matrix{Complex{Int64}}:
+ 1+0im  2+2im  3-3im
+ 4+0im  5+0im  6-6im
+ 7+0im  8+8im  9+0im
 
 julia> Hupper = Hermitian(A)
-5×5 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  0+0im  2+2im  0+0im  3-3im
- 0+0im  4+0im  0+0im  5+0im  0+0im
- 2-2im  0+0im  7+0im  0+0im  8+8im
- 0+0im  5+0im  0+0im  1+0im  0+0im
- 3+3im  0+0im  8-8im  0+0im  4+0im
+3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
+ 1+0im  2+2im  3-3im
+ 2-2im  5+0im  6-6im
+ 3+3im  6+6im  9+0im
 
 julia> Hlower = Hermitian(A, :L)
-5×5 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  0+0im  6+6im  0+0im  2-2im
- 0+0im  4+0im  0+0im  9+0im  0+0im
- 6-6im  0+0im  7+0im  0+0im  3+3im
- 0+0im  9+0im  0+0im  1+0im  0+0im
- 2+2im  0+0im  3-3im  0+0im  4+0im
+3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
+ 1+0im  4+0im  7+0im
+ 4+0im  5+0im  8-8im
+ 7+0im  8+8im  9+0im
+
+julia> hermitianpart(A)
+3×3 Hermitian{ComplexF64, Matrix{ComplexF64}}:
+ 1.0+0.0im  3.0+1.0im  5.0-1.5im
+ 3.0-1.0im  5.0+0.0im  7.0-7.0im
+ 5.0+1.5im  7.0+7.0im  9.0+0.0im
 ```
 
 Note that `Hupper` will not be equal to `Hlower` unless `A` is itself Hermitian (e.g. if `A == adjoint(A)`).
@@ -863,3 +882,49 @@ for func in (:log, :sqrt)
         end
     end
 end
+
+"""
+    hermitianpart(A, uplo=:U) -> Hermitian
+
+Return the Hermitian part of the square matrix `A`, defined as `(A + A') / 2`, as a
+[`Hermitian`](@ref) matrix. For real matrices `A`, this is also known as the symmetric part
+of `A`; it is also sometimes called the "operator real part". The optional argument `uplo` controls the corresponding argument of the
+[`Hermitian`](@ref) view. For real matrices, the latter is equivalent to a
+[`Symmetric`](@ref) view.
+
+See also [`hermitianpart!`](@ref) for the corresponding in-place operation.
+
+!!! compat "Julia 1.10"
+    This function requires Julia 1.10 or later.
+"""
+hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart(A), uplo)
+
+"""
+    hermitianpart!(A, uplo=:U) -> Hermitian
+
+Overwrite the square matrix `A` in-place with its Hermitian part `(A + A') / 2`, and return
+[`Hermitian(A, uplo)`](@ref). For real matrices `A`, this is also known as the symmetric
+part of `A`.
+
+See also [`hermitianpart`](@ref) for the corresponding out-of-place operation.
+
+!!! compat "Julia 1.10"
+    This function requires Julia 1.10 or later.
+"""
+hermitianpart!(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart!(A), uplo)
+
+_hermitianpart(A::AbstractMatrix) = _hermitianpart!(copy_similar(A, Base.promote_op(/, eltype(A), Int)))
+_hermitianpart(a::Number) = real(a)
+
+function _hermitianpart!(A::AbstractMatrix)
+    require_one_based_indexing(A)
+    n = checksquare(A)
+    @inbounds for j in 1:n
+        A[j, j] = _hermitianpart(A[j, j])
+        for i in 1:j-1
+            A[i, j] = val = (A[i, j] + adjoint(A[j, i])) / 2
+            A[j, i] = adjoint(val)
+        end
+    end
+    return A
+end
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
index 248fc048612c8..1e4ba4119393d 100644
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ b/stdlib/LinearAlgebra/src/triangular.jl
@@ -176,7 +176,7 @@ function Matrix{T}(A::UnitLowerTriangular) where T
     copyto!(B, A.data)
     tril!(B)
     for i = 1:size(B,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(T)
     end
     B
 end
@@ -191,7 +191,7 @@ function Matrix{T}(A::UnitUpperTriangular) where T
     copyto!(B, A.data)
     triu!(B)
     for i = 1:size(B,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(T)
     end
     B
 end
@@ -205,7 +205,7 @@ function full!(A::UnitLowerTriangular)
     B = A.data
     tril!(B)
     for i = 1:size(A,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(eltype(B))
     end
     B
 end
@@ -218,7 +218,7 @@ function full!(A::UnitUpperTriangular)
     B = A.data
     triu!(B)
     for i = 1:size(A,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(eltype(B))
     end
     B
 end
@@ -234,7 +234,7 @@ getindex(A::UpperTriangular, i::Integer, j::Integer) =
 
 function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
     if i > j
-        x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
             "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
     else
         A.data[i,j] = x
@@ -244,10 +244,10 @@ end
 
 function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
     if i > j
-        x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
             "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
     elseif i == j
-        x == 1 || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
+        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
             "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
     else
         A.data[i,j] = x
@@ -257,7 +257,7 @@ end
 
 function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
     if i < j
-        x == 0 || throw(ArgumentError("cannot set index in the upper triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
             "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
     else
         A.data[i,j] = x
@@ -267,10 +267,10 @@ end
 
 function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
     if i < j
-        x == 0 || throw(ArgumentError("cannot set index in the upper triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
             "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
     elseif i == j
-        x == 1 || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
+        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
             "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
     else
         A.data[i,j] = x
@@ -302,23 +302,23 @@ istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
 istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
 istriu(A::Transpose, k::Integer=0) = istril(A.parent, -k)
 
-function tril!(A::UpperTriangular, k::Integer=0)
+function tril!(A::UpperTriangular{T}, k::Integer=0) where {T}
     n = size(A,1)
     if k < 0
-        fill!(A.data,0)
+        fill!(A.data, zero(T))
         return A
     elseif k == 0
         for j in 1:n, i in 1:j-1
-            A.data[i,j] = 0
+            A.data[i,j] = zero(T)
         end
         return A
     else
         return UpperTriangular(tril!(A.data,k))
     end
 end
-triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data,k))
+triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data, k))
 
-function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where T
+function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where {T}
     n = size(A,1)
     if k < 0
         fill!(A.data, zero(T))
@@ -341,25 +341,25 @@ function triu!(A::UnitUpperTriangular, k::Integer=0)
     for i in diagind(A)
         A.data[i] = oneunit(eltype(A))
     end
-    return triu!(UpperTriangular(A.data),k)
+    return triu!(UpperTriangular(A.data), k)
 end
 
-function triu!(A::LowerTriangular, k::Integer=0)
+function triu!(A::LowerTriangular{T}, k::Integer=0) where {T}
     n = size(A,1)
     if k > 0
-        fill!(A.data,0)
+        fill!(A.data, zero(T))
         return A
     elseif k == 0
         for j in 1:n, i in j+1:n
-            A.data[i,j] = 0
+            A.data[i,j] = zero(T)
         end
         return A
     else
-        return LowerTriangular(triu!(A.data,k))
+        return LowerTriangular(triu!(A.data, k))
     end
 end
 
-tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data,k))
+tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data, k))
 
 function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
     n = size(A,1)
@@ -376,7 +376,7 @@ function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
         for i in diagind(A)
             A.data[i] = oneunit(T)
         end
-        return LowerTriangular(triu!(A.data,k))
+        return LowerTriangular(triu!(A.data, k))
     end
 end
 
@@ -384,7 +384,7 @@ function tril!(A::UnitLowerTriangular, k::Integer=0)
     for i in diagind(A)
         A.data[i] = oneunit(eltype(A))
     end
-    return tril!(LowerTriangular(A.data),k)
+    return tril!(LowerTriangular(A.data), k)
 end
 
 adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
@@ -406,9 +406,9 @@ adjoint!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U' , true, true
 adjoint!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U' , true, true))
 
 diag(A::LowerTriangular) = diag(A.data)
-diag(A::UnitLowerTriangular) = fill(one(eltype(A)), size(A,1))
+diag(A::UnitLowerTriangular) = fill(oneunit(eltype(A)), size(A,1))
 diag(A::UpperTriangular) = diag(A.data)
-diag(A::UnitUpperTriangular) = fill(one(eltype(A)), size(A,1))
+diag(A::UnitUpperTriangular) = fill(oneunit(eltype(A)), size(A,1))
 
 # Unary operations
 -(A::LowerTriangular) = LowerTriangular(-A.data)
@@ -416,14 +416,14 @@ diag(A::UnitUpperTriangular) = fill(one(eltype(A)), size(A,1))
 function -(A::UnitLowerTriangular)
     Anew = -A.data
     for i = 1:size(A, 1)
-        Anew[i, i] = -1
+        Anew[i, i] = -A[i, i]
     end
     LowerTriangular(Anew)
 end
 function -(A::UnitUpperTriangular)
     Anew = -A.data
     for i = 1:size(A, 1)
-        Anew[i, i] = -1
+        Anew[i, i] = -A[i, i]
     end
     UpperTriangular(Anew)
 end
@@ -434,7 +434,7 @@ tr(A::UpperTriangular) = tr(A.data)
 tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
 
 # copy and scale
-function copyto!(A::T, B::T) where T<:Union{UpperTriangular,UnitUpperTriangular}
+function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
     n = size(B,1)
     for j = 1:n
         for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
@@ -443,7 +443,7 @@ function copyto!(A::T, B::T) where T<:Union{UpperTriangular,UnitUpperTriangular}
     end
     return A
 end
-function copyto!(A::T, B::T) where T<:Union{LowerTriangular,UnitLowerTriangular}
+function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
     n = size(B,1)
     for j = 1:n
         for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
@@ -453,106 +453,100 @@ function copyto!(A::T, B::T) where T<:Union{LowerTriangular,UnitLowerTriangular}
     return A
 end
 
-# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a
-# number.
-for (Trig, UnitTrig) in Any[(UpperTriangular, UnitUpperTriangular),
-                            (LowerTriangular, UnitLowerTriangular)]
-    for (TB, TC) in Any[(Trig, Number),
-                        (Number, Trig),
-                        (UnitTrig, Number),
-                        (Number, UnitTrig)]
-        @eval @inline mul!(A::$Trig, B::$TB, C::$TC, alpha::Number, beta::Number) =
-            _mul!(A, B, C, MulAddMul(alpha, beta))
-    end
-end
+# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a number.
+# be permissive here and require compatibility later in _triscale!
+@inline mul!(A::UpperOrLowerTriangular, B::UpperOrLowerTriangular, C::Number, alpha::Number, beta::Number) =
+    _triscale!(A, B, C, MulAddMul(alpha, beta))
+@inline mul!(A::UpperOrLowerTriangular, B::Number, C::UpperOrLowerTriangular, alpha::Number, beta::Number) =
+    _triscale!(A, B, C, MulAddMul(alpha, beta))
 
-@inline function _mul!(A::UpperTriangular, B::UpperTriangular, c::Number, _add::MulAddMul)
+function _triscale!(A::UpperTriangular, B::UpperTriangular, c::Number, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = 1:j
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::UpperTriangular, c::Number, B::UpperTriangular, _add::MulAddMul)
+function _triscale!(A::UpperTriangular, c::Number, B::UpperTriangular, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = 1:j
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::UpperTriangular, B::UnitUpperTriangular, c::Number, _add::MulAddMul)
+function _triscale!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular, c::Number, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = 1:(j - 1)
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::UpperTriangular, c::Number, B::UnitUpperTriangular, _add::MulAddMul)
+function _triscale!(A::UpperOrUnitUpperTriangular, c::Number, B::UnitUpperTriangular, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = 1:(j - 1)
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, B::LowerTriangular, c::Number, _add::MulAddMul)
+function _triscale!(A::LowerTriangular, B::LowerTriangular, c::Number, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = j:n
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, c::Number, B::LowerTriangular, _add::MulAddMul)
+function _triscale!(A::LowerTriangular, c::Number, B::LowerTriangular, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = j:n
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, B::UnitLowerTriangular, c::Number, _add::MulAddMul)
+function _triscale!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular, c::Number, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = (j + 1):n
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, c::Number, B::UnitLowerTriangular, _add::MulAddMul)
+function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriangular, _add)
     n = checksquare(B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = (j + 1):n
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
 
-rmul!(A::Union{UpperTriangular,LowerTriangular}, c::Number) = mul!(A, A, c)
-lmul!(c::Number, A::Union{UpperTriangular,LowerTriangular}) = mul!(A, c, A)
+rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
+lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
 
 function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector)
     require_one_based_indexing(x, y)
@@ -668,26 +662,39 @@ fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1);
 # BlasFloat routines #
 ######################
 
-lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B) # is this necessary?
+lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
+mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = _multrimat!(C, A, B)
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = _multrimat!(C, A, B)
+mul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = _mulmattri!(C, A, B)
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = _multrimat!(C, A, B)
 
-mul!(C::AbstractVector, A::AbstractTriangular, transB::Transpose{<:Any,<:AbstractVecOrMat}) =
-    (B = transB.parent; lmul!(A, transpose!(C, B)))
-mul!(C::AbstractMatrix, A::AbstractTriangular, transB::Transpose{<:Any,<:AbstractVecOrMat}) =
-    (B = transB.parent; lmul!(A, transpose!(C, B)))
-mul!(C::AbstractMatrix, A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractVecOrMat}) =
-    (B = adjB.parent; lmul!(A, adjoint!(C, B)))
-mul!(C::AbstractVecOrMat, A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractVecOrMat}) =
-    (B = adjB.parent; lmul!(A, adjoint!(C, B)))
+for TC in (:AbstractVector, :AbstractMatrix)
+    @eval @inline function mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
+        if isone(alpha) && iszero(beta)
+            return mul!(C, A, B)
+        else
+            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
+        end
+    end
+end
+for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
+                    (:AbstractMatrix, :AbstractTriangular),
+                    (:AbstractTriangular, :AbstractTriangular)
+                )
+    @eval @inline function mul!(C::AbstractMatrix, A::$TA, B::$TB, alpha::Number, beta::Number)
+        if isone(alpha) && iszero(beta)
+            return mul!(C, A, B)
+        else
+            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+        end
+    end
+end
 
-# The three methods are necessary to avoid ambiguities with definitions in matmul.jl
-mul!(C::AbstractVector  , A::AbstractTriangular, B::AbstractVector)   = lmul!(A, copyto!(C, B))
-mul!(C::AbstractMatrix  , A::AbstractTriangular, B::AbstractVecOrMat) = lmul!(A, copyto!(C, B))
-mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = lmul!(A, copyto!(C, B))
 
-@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
-    mul!(C, A, copy(B), alpha, beta)
-@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
-    mul!(C, A, copy(B), alpha, beta)
+# generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
+_multrimat!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =
+    lmul!(A, inplace_adj_or_trans(B)(C, _parent(B)))
+_mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = rmul!(copyto!(C, A), B)
 
 # preserve triangular structure in in-place multiplication
 for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
@@ -698,18 +705,10 @@ for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
                         (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
                         (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
                         (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
-    @eval function mul!(C::$cty, A::$aty, B::$bty)
-        lmul!(A, copyto!(parent(C), B))
+    @eval function _multrimat!(C::$cty, A::$aty, B::$bty)
+        _multrimat!(parent(C), A, B)
         return C
     end
-
-    @eval @inline function mul!(C::$cty, A::$aty, B::$bty, alpha::Number, beta::Number)
-        if isone(alpha) && iszero(beta)
-            return mul!(C, A, B)
-        else
-            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-        end
-    end
 end
 
 # direct multiplication/division
@@ -805,16 +804,46 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
     end
 end
 
-function inv(A::LowerTriangular{T}) where T
-    S = typeof((zero(T)*one(T) + zero(T))/one(T))
-    LowerTriangular(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A, 1), size(A, 1))))
-end
-function inv(A::UpperTriangular{T}) where T
-    S = typeof((zero(T)*one(T) + zero(T))/one(T))
-    UpperTriangular(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A, 1), size(A, 1))))
+# redirect back to BLAS
+for t in (:UpperTriangular, :UnitUpperTriangular, :LowerTriangular, :UnitLowerTriangular)
+    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{T,<:StridedMatrix}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+        lmul!(A, copyto!(C, B))
+    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+        lmul!(A, copyto!(C, B))
+    @eval _multrimat!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+        lmul!(A, copyto!(C, B))
+    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
+        rmul!(copyto!(C, A), B)
+    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasFloat} =
+        rmul!(copyto!(C, A), B)
+    @eval _mulmattri!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
+        rmul!(copyto!(C, A), B)
+
+    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{T,<:StridedMatrix}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+        ldiv!(A, copyto!(C, B))
+    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+        ldiv!(A, copyto!(C, B))
+    @eval ldiv!(C::StridedVecOrMat{T}, A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+        ldiv!(A, copyto!(C, B))
+    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
+        rdiv!(copyto!(C, A), B)
+    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasFloat} =
+        rdiv!(copyto!(C, A), B)
+    @eval _rdiv!(C::StridedMatrix{T}, A::AbstractMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
+        rdiv!(copyto!(C, A), B)
+end
+
+for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
+    @eval function inv(A::$t{T}) where {T}
+        S = typeof(inv(oneunit(T)))
+        if S <: BlasFloat || S === T # i.e. A is unitless
+            $t(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A))))
+        else
+            J = (one(T)*I)(size(A, 1))
+            $t(ldiv!(similar(A, S, size(A)), A, J))
+        end
+    end
 end
-inv(A::UnitUpperTriangular{T}) where {T} = UnitUpperTriangular(ldiv!(A, Matrix{T}(I, size(A, 1), size(A, 1))))
-inv(A::UnitLowerTriangular{T}) where {T} = UnitLowerTriangular(ldiv!(A, Matrix{T}(I, size(A, 1), size(A, 1))))
 
 errorbounds(A::AbstractTriangular{T,<:AbstractMatrix}, X::AbstractVecOrMat{T}, B::AbstractVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
     error("not implemented yet! Please submit a pull request.")
@@ -892,147 +921,193 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
             end
             $t(B)
         end
+
+        lmul!(A::$t, B::AbstractVecOrMat)     = @inline _multrimat!(B, A, B)
+        lmul!(A::$unitt, B::AbstractVecOrMat) = @inline _multrimat!(B, A, B)
+
+        rmul!(A::AbstractMatrix, B::$t)     = @inline _mulmattri!(A, A, B)
+        rmul!(A::AbstractMatrix, B::$unitt) = @inline _mulmattri!(A, A, B)
     end
 end
 
 ## Generic triangular multiplication
-function lmul!(A::UpperTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
+function _multrimat!(C::AbstractVecOrMat, A::UpperTriangular, B::AbstractVecOrMat)
+    require_one_based_indexing(C, A, B)
     m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
+    N = size(A, 1)
+    if m != N
         throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
-    @inbounds for j = 1:n
-        for i = 1:m
-            Bij = A.data[i,i]*B[i,j]
-            for k = i + 1:m
-                Bij += A.data[i,k]*B[k,j]
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != N || nc != n
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+    end
+    @inbounds for j in 1:n
+        for i in 1:m
+            Cij = A.data[i,i] * B[i,j]
+            for k in i + 1:m
+                Cij += A.data[i,k] * B[k,j]
             end
-            B[i,j] = Bij
+            C[i,j] = Cij
         end
     end
-    B
+    return C
 end
-function lmul!(A::UnitUpperTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
+function _multrimat!(C::AbstractVecOrMat, A::UnitUpperTriangular, B::AbstractVecOrMat)
+    require_one_based_indexing(C, A, B)
     m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
+    N = size(A, 1)
+    if m != N
         throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
-    @inbounds for j = 1:n
-        for i = 1:m
-            Bij = B[i,j]
-            for k = i + 1:m
-                Bij += A.data[i,k]*B[k,j]
+
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != N || nc != n
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+    end
+    @inbounds for j in 1:n
+        for i in 1:m
+            Cij = oneunit(eltype(A)) * B[i,j]
+            for k in i + 1:m
+                Cij += A.data[i,k] * B[k,j]
             end
-            B[i,j] = Bij
+            C[i,j] = Cij
         end
     end
-    B
+    return C
 end
-function lmul!(A::LowerTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
+function _multrimat!(C::AbstractVecOrMat, A::LowerTriangular, B::AbstractVecOrMat)
+    require_one_based_indexing(C, A, B)
     m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
+    N = size(A, 1)
+    if m != N
         throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
-    @inbounds for j = 1:n
-        for i = m:-1:1
-            Bij = A.data[i,i]*B[i,j]
-            for k = 1:i - 1
-                Bij += A.data[i,k]*B[k,j]
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != N || nc != n
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+    end
+    @inbounds for j in 1:n
+        for i in m:-1:1
+            Cij = A.data[i,i] * B[i,j]
+            for k in 1:i - 1
+                Cij += A.data[i,k] * B[k,j]
             end
-            B[i,j] = Bij
+            C[i,j] = Cij
         end
     end
-    B
+    return C
 end
-function lmul!(A::UnitLowerTriangular, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
+function _multrimat!(C::AbstractVecOrMat, A::UnitLowerTriangular, B::AbstractVecOrMat)
+    require_one_based_indexing(C, A, B)
     m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
+    N = size(A, 1)
+    if m != N
         throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
-    @inbounds for j = 1:n
-        for i = m:-1:1
-            Bij = B[i,j]
-            for k = 1:i - 1
-                Bij += A.data[i,k]*B[k,j]
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != N || nc != n
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+    end
+    @inbounds for j in 1:n
+        for i in m:-1:1
+            Cij = oneunit(eltype(A)) * B[i,j]
+            for k in 1:i - 1
+                Cij += A.data[i,k] * B[k,j]
             end
-            B[i,j] = Bij
+            C[i,j] = Cij
         end
     end
-    B
+    return C
 end
 
-function rmul!(A::AbstractMatrix, B::UpperTriangular)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UpperTriangular)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A, 1), size(A, 2)
+    N = size(B, 1)
+    if n != N
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+    end
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != m || nc != N
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
     end
-    @inbounds for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]*B.data[j,j]
-            for k = 1:j - 1
-                Aij += A[i,k]*B.data[k,j]
+    @inbounds for i in 1:m
+        for j in n:-1:1
+            Cij = A[i,j] * B.data[j,j]
+            for k in 1:j - 1
+                Cij += A[i,k] * B.data[k,j]
             end
-            A[i,j] = Aij
+            C[i,j] = Cij
         end
     end
-    A
+    return C
 end
-function rmul!(A::AbstractMatrix, B::UnitUpperTriangular)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    @inbounds for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij += A[i,k]*B.data[k,j]
+function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UnitUpperTriangular)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A, 1), size(A, 2)
+    N = size(B, 1)
+    if n != N
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+    end
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != m || nc != N
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+    end
+    @inbounds for i in 1:m
+        for j in n:-1:1
+            Cij = A[i,j] * oneunit(eltype(B))
+            for k in 1:j - 1
+                Cij += A[i,k] * B.data[k,j]
             end
-            A[i,j] = Aij
+            C[i,j] = Cij
         end
     end
-    A
+    return C
 end
-
-function rmul!(A::AbstractMatrix, B::LowerTriangular)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    @inbounds for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]*B.data[j,j]
-            for k = j + 1:n
-                Aij += A[i,k]*B.data[k,j]
+function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::LowerTriangular)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A, 1), size(A, 2)
+    N = size(B, 1)
+    if n != N
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+    end
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != m || nc != N
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+    end
+    @inbounds for i in 1:m
+        for j in 1:n
+            Cij = A[i,j] * B.data[j,j]
+            for k in j + 1:n
+                Cij += A[i,k] * B.data[k,j]
             end
-            A[i,j] = Aij
+            C[i,j] = Cij
         end
     end
-    A
+    return C
 end
-function rmul!(A::AbstractMatrix, B::UnitLowerTriangular)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    @inbounds for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij += A[i,k]*B.data[k,j]
+function _mulmattri!(C::AbstractMatrix, A::AbstractMatrix, B::UnitLowerTriangular)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A, 1), size(A, 2)
+    N = size(B, 1)
+    if n != N
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+    end
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != m || nc != N
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+    end
+    @inbounds for i in 1:m
+        for j in 1:n
+            Cij = A[i,j] * oneunit(eltype(B))
+            for k in j + 1:n
+                Cij += A[i,k] * B.data[k,j]
             end
-            A[i,j] = Aij
+            C[i,j] = Cij
         end
     end
-    A
+    return C
 end
 
 #Generic solver using naive substitution
@@ -1043,222 +1118,237 @@ end
 # does not significantly impact performance as of Dec 2015
 # replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
 # does not significantly impact performance as of Dec 2015
-function ldiv!(A::UpperTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
-    n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+ldiv!(A::AbstractTriangular, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
+function ldiv!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix)
+    require_one_based_indexing(C, A, B)
+    nA, mA = size(A)
+    n = size(B, 1)
+    if nA != n
+        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
     end
-    @inbounds for j in n:-1:1
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        bj = b[j] = A.data[j,j] \ b[j]
-        for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * bj
+    if size(C) != size(B)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
+    end
+    @inbounds for (c, b) in zip(eachcol(C), eachcol(B))
+        ldiv!(c, A, b)
+    end
+    C
+end
+@inline function ldiv!(c::AbstractVector, A::AbstractTriangular, b::AbstractVector)
+    @boundscheck begin
+        require_one_based_indexing(c, A, b)
+        n = size(A, 2)
+        if !(n == length(b))
+            throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+        end
+        if !(n == length(c))
+            throw(DimensionMismatch("length of output c, $(length(c)), does not match length of right hand side b, $(length(b))"))
         end
     end
-    return b
+    return _ldiv!(c, A, b)
 end
-function ldiv!(A::UnitUpperTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
+
+_uconvert_copyto!(c, b, oA) = (c .= Ref(oA) .\ b)
+_uconvert_copyto!(c::AbstractArray{T}, b::AbstractArray{T}, _) where {T} = copyto!(c, b)
+
+@inline _ustrip(a) = oneunit(a) \ a
+@inline _ustrip(a::Union{AbstractFloat,Integer,Complex,Rational}) = a
+
+# all of the following _ldiv! methods are "unsafe" in that they assume one-based indexing
+# and compatible sizes
+function _ldiv!(c::AbstractVector, A::UpperTriangular, b::AbstractVector)
     n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-    end
+    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
     @inbounds for j in n:-1:1
-        bj = b[j]
-        for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * bj
+        ajj = A.data[j,j]
+        iszero(ajj) && throw(SingularException(j))
+        cj = c[j] = _ustrip(ajj) \ c[j]
+        for i in j-1:-1:1
+            c[i] -= _ustrip(A.data[i,j]) * cj
         end
     end
-    return b
+    return c
 end
-function ldiv!(A::LowerTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
+function _ldiv!(c::AbstractVector, A::UnitUpperTriangular, b::AbstractVector)
     n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
+    @inbounds for j in n:-1:1
+        cj = c[j]
+        for i in 1:j-1
+            c[i] -= _ustrip(A.data[i,j]) * cj
+        end
     end
+    return c
+end
+function _ldiv!(c::AbstractVector, A::LowerTriangular, b::AbstractVector)
+    n = size(A, 2)
+    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
     @inbounds for j in 1:n
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        bj = b[j] = A.data[j,j] \ b[j]
+        ajj = A.data[j,j]
+        iszero(ajj) && throw(SingularException(j))
+        cj = c[j] = _ustrip(ajj) \ c[j]
         for i in j+1:n
-            b[i] -= A.data[i,j] * bj
+            c[i] -= _ustrip(A.data[i,j]) * cj
         end
     end
-    return b
+    return c
 end
-function ldiv!(A::UnitLowerTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
+function _ldiv!(c::AbstractVector, A::UnitLowerTriangular, b::AbstractVector)
     n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-    end
+    c !== b && _uconvert_copyto!(c, b, oneunit(eltype(A)))
     @inbounds for j in 1:n
-        bj = b[j]
+        cj = c[j]
         for i in j+1:n
-            b[i] -= A.data[i,j] * bj
+            c[i] -= _ustrip(A.data[i,j]) * cj
         end
     end
-    return b
-end
-function ldiv!(A::AbstractTriangular, B::AbstractMatrix)
-    require_one_based_indexing(A, B)
-    nA, mA = size(A)
-    n = size(B, 1)
-    if nA != n
-        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
-    end
-    for b in eachcol(B)
-        ldiv!(A, b)
-    end
-    B
+    return c
 end
 
+
 # in the following transpose and conjugate transpose naive substitution variants,
 # accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
-for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval begin
-        function ldiv!(xA::UpperTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in n:-1:1
-                z = b[j]
-                for i in n:-1:j+1
-                    z -= $tfun(A[i,j]) * b[i]
-                end
-                iszero(A[j,j]) && throw(SingularException(j))
-                b[j] = $tfun(A[j,j]) \ z
-            end
-            return b
+function _ldiv!(c::AbstractVector, xA::UpperTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
+    tfun = adj_or_trans(parent(xA))
+    A = parent(parent(xA))
+    n = size(A, 2)
+    @inbounds for j in n:-1:1
+        ajj = A[j,j]
+        iszero(ajj) && throw(SingularException(j))
+        bj = b[j]
+        for i in j+1:n
+            bj -= tfun(A[i,j]) * c[i]
         end
-
-        function ldiv!(xA::UnitUpperTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in n:-1:1
-                z = b[j]
-                for i in n:-1:j+1
-                    z -= $tfun(A[i,j]) * b[i]
-                end
-                b[j] = z
-            end
-            return b
+        c[j] = tfun(ajj) \ bj
+    end
+    return c
+end
+function _ldiv!(c::AbstractVector, xA::UnitUpperTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
+    tfun = adj_or_trans(parent(xA))
+    A = parent(parent(xA))
+    oA = oneunit(eltype(A))
+    n = size(A, 2)
+    @inbounds for j in n:-1:1
+        bj = b[j]
+        for i in j+1:n
+            bj -= tfun(A[i,j]) * c[i]
         end
-
-        function ldiv!(xA::LowerTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in 1:n
-                z = b[j]
-                for i in 1:j-1
-                    z -= $tfun(A[i,j]) * b[i]
-                end
-                iszero(A[j,j]) && throw(SingularException(j))
-                b[j] = $tfun(A[j,j]) \ z
-            end
-            return b
+        c[j] = oA \ bj
+    end
+    return c
+end
+function _ldiv!(c::AbstractVector, xA::LowerTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
+    tfun = adj_or_trans(parent(xA))
+    A = parent(parent(xA))
+    n = size(A, 2)
+    @inbounds for j in 1:n
+        ajj = A[j,j]
+        iszero(ajj) && throw(SingularException(j))
+        bj = b[j]
+        for i in 1:j-1
+            bj -= tfun(A[i,j]) * c[i]
         end
-
-        function ldiv!(xA::UnitLowerTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in 1:n
-                z = b[j]
-                for i in 1:j-1
-                    z -= $tfun(A[i,j]) * b[i]
-                end
-                b[j] = z
-            end
-            return b
+        c[j] = tfun(ajj) \ bj
+    end
+    return c
+end
+function _ldiv!(c::AbstractVector, xA::UnitLowerTriangular{<:Any,<:AdjOrTrans}, b::AbstractVector)
+    tfun = adj_or_trans(parent(xA))
+    A = parent(parent(xA))
+    oA = oneunit(eltype(A))
+    n = size(A, 2)
+    @inbounds for j in 1:n
+        bj = b[j]
+        for i in 1:j-1
+            bj -= tfun(A[i,j]) * c[i]
         end
+        c[j] = oA \ bj
     end
+    return c
 end
 
-function rdiv!(A::AbstractMatrix, B::UpperTriangular)
-    require_one_based_indexing(A, B)
+rdiv!(A::AbstractMatrix, B::AbstractTriangular) = @inline _rdiv!(A, A, B)
+function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperTriangular)
+    require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
         throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
-    @inbounds for i = 1:m
-        for j = 1:n
+    if size(C) != size(A)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+    end
+    @inbounds for i in 1:m
+        for j in 1:n
             Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k]*B.data[k,j]
+            for k in 1:j - 1
+                Aij -= C[i,k]*B.data[k,j]
             end
             iszero(B.data[j,j]) && throw(SingularException(j))
-            A[i,j] = Aij/B.data[j,j]
+            C[i,j] = Aij / B.data[j,j]
         end
     end
-    A
+    C
 end
-function rdiv!(A::AbstractMatrix, B::UnitUpperTriangular)
-    require_one_based_indexing(B)
+function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UnitUpperTriangular)
+    require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
         throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
-    @inbounds for i = 1:m
-        for j = 1:n
+    if size(C) != size(A)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+    end
+    @inbounds for i in 1:m
+        for j in 1:n
             Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k]*B.data[k,j]
+            for k in 1:j - 1
+                Aij -= C[i,k]*B.data[k,j]
             end
-            A[i,j] = Aij
+            C[i,j] = Aij / oneunit(eltype(B))
         end
     end
-    A
+    C
 end
-function rdiv!(A::AbstractMatrix, B::LowerTriangular)
-    require_one_based_indexing(A, B)
+function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::LowerTriangular)
+    require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
         throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
-    @inbounds for i = 1:m
-        for j = n:-1:1
+    if size(C) != size(A)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+    end
+    @inbounds for i in 1:m
+        for j in n:-1:1
             Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k]*B.data[k,j]
+            for k in j + 1:n
+                Aij -= C[i,k]*B.data[k,j]
             end
             iszero(B.data[j,j]) && throw(SingularException(j))
-            A[i,j] = Aij/B.data[j,j]
+            C[i,j] = Aij / B.data[j,j]
         end
     end
-    A
+    C
 end
-function rdiv!(A::AbstractMatrix, B::UnitLowerTriangular)
-    require_one_based_indexing(A, B)
+function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UnitLowerTriangular)
+    require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
         throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
-    @inbounds for i = 1:m
-        for j = n:-1:1
+    if size(C) != size(A)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+    end
+    @inbounds for i in 1:m
+        for j in n:-1:1
             Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k]*B.data[k,j]
+            for k in j + 1:n
+                Aij -= C[i,k]*B.data[k,j]
             end
-            A[i,j] = Aij
+            C[i,j] = Aij / oneunit(eltype(B))
         end
     end
-    A
+    C
 end
 
 lmul!(A::UpperTriangular,     B::UpperTriangular) = UpperTriangular(lmul!(A, triu!(B.data)))
@@ -1288,175 +1378,126 @@ rmul!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rmul!(tril!(
 ## the element type doesn't have to be stable under division whereas that is
 ## necessary in the general triangular solve problem.
 
-## Some Triangular-Triangular cases. We might want to write tailored methods
-## for these cases, but I'm not sure it is worth it.
-
-for (f, f2!) in ((:*, :lmul!), (:\, :ldiv!))
-    @eval begin
-        function ($f)(A::LowerTriangular, B::LowerTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function $(f)(A::UnitLowerTriangular, B::LowerTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-             BB = copy_similar(B, TAB)
-            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function $(f)(A::LowerTriangular, B::UnitLowerTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-             BB = copy_similar(B, TAB)
-            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function $(f)(A::UnitLowerTriangular, B::UnitLowerTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-             BB = copy_similar(B, TAB)
-            return UnitLowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UpperTriangular, B::UpperTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UnitUpperTriangular, B::UpperTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UpperTriangular, B::UnitUpperTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UnitUpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-    end
-end
-
-function (/)(A::LowerTriangular, B::LowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitLowerTriangular, B::LowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::LowerTriangular, B::UnitLowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitLowerTriangular, B::UnitLowerTriangular)
-    TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                 (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UnitLowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UpperTriangular, B::UpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitUpperTriangular, B::UpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UpperTriangular, B::UnitUpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitUpperTriangular, B::UnitUpperTriangular)
-    TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                 (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UnitUpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-
-_inner_type_promotion(A,B) = promote_type(eltype(A), eltype(B), typeof(zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B))))
+_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA<:Integer,TB<:Integer} =
+    _init_eltype(*, TA, TB)
+_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
+    _init_eltype(op, TA, TB)
 ## The general promotion methods
 function *(A::AbstractTriangular, B::AbstractTriangular)
-    TAB = _inner_type_promotion(A,B)
-    BB = copy_similar(B, TAB)
-    lmul!(convert(AbstractArray{TAB}, A), BB)
+    TAB = _init_eltype(*, eltype(A), eltype(B))
+    if TAB <: BlasFloat
+        lmul!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
+    else
+        mul!(similar(B, TAB, size(B)), A, B)
+    end
 end
 
 for mat in (:AbstractVector, :AbstractMatrix)
     ### Multiplication with triangle to the left and hence rhs cannot be transposed.
     @eval function *(A::AbstractTriangular, B::$mat)
         require_one_based_indexing(B)
-        TAB = _inner_type_promotion(A,B)
-        BB = copy_similar(B, TAB)
-        lmul!(convert(AbstractArray{TAB}, A), BB)
+        TAB = _init_eltype(*, eltype(A), eltype(B))
+        if TAB <: BlasFloat
+            lmul!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
+        else
+            mul!(similar(B, TAB, size(B)), A, B)
+        end
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
     @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
         require_one_based_indexing(B)
-        TAB = _inner_type_promotion(A,B)
-        BB = copy_similar(B, TAB)
-        ldiv!(convert(AbstractArray{TAB}, A), BB)
+        TAB = _inner_type_promotion(\, eltype(A), eltype(B))
+        if TAB <: BlasFloat
+            ldiv!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
+        else
+            ldiv!(similar(B, TAB, size(B)), A, B)
+        end
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
     @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
         require_one_based_indexing(B)
-        TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-        BB = copy_similar(B, TAB)
-        ldiv!(convert(AbstractArray{TAB}, A), BB)
+        TAB = _init_eltype(\, eltype(A), eltype(B))
+        if TAB <: BlasFloat
+            ldiv!(convert(AbstractArray{TAB}, A), copy_similar(B, TAB))
+        else
+            ldiv!(similar(B, TAB, size(B)), A, B)
+        end
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
     @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
         require_one_based_indexing(A)
-        TAB = _inner_type_promotion(A,B)
-        AA = copy_similar(A, TAB)
-        rdiv!(AA, convert(AbstractArray{TAB}, B))
+        TAB = _inner_type_promotion(/, eltype(A), eltype(B))
+        if TAB <: BlasFloat
+            rdiv!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
+        else
+            _rdiv!(similar(A, TAB, size(A)), A, B)
+        end
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
     @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
         require_one_based_indexing(A)
-        TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-        AA = copy_similar(A, TAB)
-        rdiv!(AA, convert(AbstractArray{TAB}, B))
+        TAB = _init_eltype(/, eltype(A), eltype(B))
+        if TAB <: BlasFloat
+            rdiv!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
+        else
+            _rdiv!(similar(A, TAB, size(A)), A, B)
+        end
     end
 end
 ### Multiplication with triangle to the right and hence lhs cannot be transposed.
 # Only for AbstractMatrix, hence outside the above loop.
 function *(A::AbstractMatrix, B::AbstractTriangular)
     require_one_based_indexing(A)
-    TAB = _inner_type_promotion(A,B)
-    AA = copy_similar(A, TAB)
-    rmul!(AA, convert(AbstractArray{TAB}, B))
+    TAB = _init_eltype(*, eltype(A), eltype(B))
+    if TAB <: BlasFloat
+        rmul!(copy_similar(A, TAB), convert(AbstractArray{TAB}, B))
+    else
+        mul!(similar(A, TAB, size(A)), A, B)
+    end
 end
 # ambiguity resolution with definitions in matmul.jl
 *(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
 *(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
 
+## Some Triangular-Triangular cases. We might want to write tailored methods
+## for these cases, but I'm not sure it is worth it.
+for f in (:*, :\)
+    @eval begin
+        ($f)(A::LowerTriangular, B::LowerTriangular) =
+            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
+        ($f)(A::LowerTriangular, B::UnitLowerTriangular) =
+            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
+        ($f)(A::UnitLowerTriangular, B::LowerTriangular) =
+            LowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
+        ($f)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
+            UnitLowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
+        ($f)(A::UpperTriangular, B::UpperTriangular) =
+            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
+        ($f)(A::UpperTriangular, B::UnitUpperTriangular) =
+            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
+        ($f)(A::UnitUpperTriangular, B::UpperTriangular) =
+            UpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
+        ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
+            UnitUpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
+    end
+end
+(/)(A::LowerTriangular, B::LowerTriangular) =
+    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
+(/)(A::LowerTriangular, B::UnitLowerTriangular) =
+    LowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
+(/)(A::UnitLowerTriangular, B::LowerTriangular) =
+    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
+(/)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
+    UnitLowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
+(/)(A::UpperTriangular, B::UpperTriangular) =
+    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
+(/)(A::UpperTriangular, B::UnitUpperTriangular) =
+    UpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
+(/)(A::UnitUpperTriangular, B::UpperTriangular) =
+    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
+(/)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
+    UnitUpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
+
 # Complex matrix power for upper triangular factor, see:
 #   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
 #     SIAM J. Matrix Anal. & Appl., 32 (3), (2011) 1056–1078.
@@ -2073,7 +2114,7 @@ function sqrt(A::UnitUpperTriangular{T}) where T
     n = checksquare(B)
     t = typeof(sqrt(zero(T)))
     R = Matrix{t}(I, n, n)
-    tt = typeof(zero(t)*zero(t))
+    tt = typeof(oneunit(t)*oneunit(t))
     half = inv(R[1,1]+R[1,1]) # for general, algebraic cases. PR#20214
     @inbounds for j = 1:n
         for i = j-1:-1:1
@@ -2081,7 +2122,7 @@ function sqrt(A::UnitUpperTriangular{T}) where T
             @simd for k = i+1:j-1
                 r -= R[i,k]*R[k,j]
             end
-            r==0 || (R[i,j] = half*r)
+            iszero(r) || (R[i,j] = half*r)
         end
     end
     return UnitUpperTriangular(R)
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index 428acf469c9b2..21ae8a1bb913a 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -179,7 +179,7 @@ for (t1, t2) in ((:UnitUpperTriangular, :UpperTriangular),
                  (:UnitLowerTriangular, :LowerTriangular))
     @eval begin
         function (+)(UL::$t1, J::UniformScaling)
-            ULnew = copymutable_oftype(UL.data, Base._return_type(+, Tuple{eltype(UL), typeof(J)}))
+            ULnew = copymutable_oftype(UL.data, Base.promote_op(+, eltype(UL), typeof(J)))
             for i in axes(ULnew, 1)
                 ULnew[i,i] = one(ULnew[i,i]) + J
             end
@@ -193,7 +193,7 @@ end
 # However, to preserve type stability, we do not special-case a
 # UniformScaling{<:Complex} that happens to be real.
 function (+)(A::Hermitian, J::UniformScaling{<:Complex})
-    TS = Base._return_type(+, Tuple{eltype(A), typeof(J)})
+    TS = Base.promote_op(+, eltype(A), typeof(J))
     B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
     for i in diagind(B)
         B[i] = A[i] + J
@@ -202,7 +202,7 @@ function (+)(A::Hermitian, J::UniformScaling{<:Complex})
 end
 
 function (-)(J::UniformScaling{<:Complex}, A::Hermitian)
-    TS = Base._return_type(+, Tuple{eltype(A), typeof(J)})
+    TS = Base.promote_op(+, eltype(A), typeof(J))
     B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
     B .= .-B
     for i in diagind(B)
@@ -213,7 +213,7 @@ end
 
 function (+)(A::AbstractMatrix, J::UniformScaling)
     checksquare(A)
-    B = copymutable_oftype(A, Base._return_type(+, Tuple{eltype(A), typeof(J)}))
+    B = copymutable_oftype(A, Base.promote_op(+, eltype(A), typeof(J)))
     for i in intersect(axes(A,1), axes(A,2))
         @inbounds B[i,i] += J
     end
@@ -222,7 +222,7 @@ end
 
 function (-)(J::UniformScaling, A::AbstractMatrix)
     checksquare(A)
-    B = convert(AbstractMatrix{Base._return_type(+, Tuple{eltype(A), typeof(J)})}, -A)
+    B = convert(AbstractMatrix{Base.promote_op(+, eltype(A), typeof(J))}, -A)
     for i in intersect(axes(A,1), axes(A,2))
         @inbounds B[i,i] += J
     end
@@ -381,6 +381,22 @@ function copyto!(A::AbstractMatrix, J::UniformScaling)
     return A
 end
 
+function copyto!(A::Diagonal, J::UniformScaling)
+    A.diag .= J.λ
+    return A
+end
+function copyto!(A::Union{Bidiagonal, SymTridiagonal}, J::UniformScaling)
+    A.ev .= 0
+    A.dv .= J.λ
+    return A
+end
+function copyto!(A::Tridiagonal, J::UniformScaling)
+    A.dl .= 0
+    A.du .= 0
+    A.d .= J.λ
+    return A
+end
+
 function cond(J::UniformScaling{T}) where T
     onereal = inv(one(real(J.λ)))
     return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
@@ -403,10 +419,14 @@ promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
     (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
 promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
 
+_us2number(A) = A
+_us2number(J::UniformScaling) = J.λ
+
 for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
     @eval begin
         @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $_f(A...)
+        # if there's a Number present, J::UniformScaling must be 1x1-dimensional
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $f(map(_us2number, A)...)
         function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
             n = -1
             for a in A
@@ -509,10 +529,6 @@ Array{T}(s::UniformScaling, m::Integer, n::Integer) where {T} = Matrix{T}(s, m,
 Array(s::UniformScaling, m::Integer, n::Integer) = Matrix(s, m, n)
 Array(s::UniformScaling, dims::Dims{2}) = Matrix(s, dims)
 
-## Diagonal construction from UniformScaling
-Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
-Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
-
 dot(A::AbstractMatrix, J::UniformScaling) = dot(tr(A), J.λ)
 dot(J::UniformScaling, A::AbstractMatrix) = dot(J.λ, tr(A))
 
@@ -523,8 +539,3 @@ dot(x::AbstractVector, a::Union{Real,Complex}, y::AbstractVector) = a*dot(x, y)
 # muladd
 Base.muladd(A::UniformScaling, B::UniformScaling, z::UniformScaling) =
     UniformScaling(A.λ * B.λ + z.λ)
-Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
-    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
-
-_diag_or_value(A::Diagonal) = A.diag
-_diag_or_value(A::UniformScaling) = A.λ
diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl
new file mode 100644
index 0000000000000..e3f48c7b2e3fd
--- /dev/null
+++ b/stdlib/LinearAlgebra/test/abstractq.jl
@@ -0,0 +1,89 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module TestAbstractQ
+
+using Test
+using LinearAlgebra
+using LinearAlgebra: AbstractQ, AdjointQ
+import LinearAlgebra: lmul!, rmul!
+import Base: size, convert
+
+n = 5
+
+@testset "custom AbstractQ type" begin
+    struct MyQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
+        Q::S
+    end
+    MyQ{T}(Q::AbstractQ) where {T} = (P = convert(AbstractQ{T}, Q); MyQ{T,typeof(P)}(P))
+    MyQ(Q::MyQ) = Q
+
+    Base.size(Q::MyQ) = size(Q.Q)
+    LinearAlgebra.lmul!(Q::MyQ, B::AbstractVecOrMat) = lmul!(Q.Q, B)
+    LinearAlgebra.lmul!(adjQ::AdjointQ{<:Any,<:MyQ}, B::AbstractVecOrMat) = lmul!(parent(adjQ).Q', B)
+    LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) = rmul!(A, Q.Q)
+    LinearAlgebra.rmul!(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q')
+    Base.convert(::Type{AbstractQ{T}}, Q::MyQ) where {T} = MyQ{T}(Q.Q)
+    LinearAlgebra.det(Q::MyQ) = det(Q.Q)
+
+    for T in (Float64, ComplexF64)
+        A = rand(T, n, n)
+        F = qr(A)
+        Q = MyQ(F.Q)
+        @test ndims(Q) == 2
+        T <: Real && @test transpose(Q) == adjoint(Q)
+        T <: Complex && @test_throws ErrorException transpose(Q)
+        @test convert(AbstractQ{complex(T)}, Q) isa MyQ{complex(T)}
+        @test convert(AbstractQ{complex(T)}, Q') isa AdjointQ{<:complex(T),<:MyQ{complex(T)}}
+        @test Q*I ≈ Q.Q*I rtol=2eps(real(T))
+        @test Q'*I ≈ Q.Q'*I rtol=2eps(real(T))
+        @test I*Q ≈ Q.Q*I rtol=2eps(real(T))
+        @test I*Q' ≈ I*Q.Q' rtol=2eps(real(T))
+        @test abs(det(Q)) ≈ 1
+        @test logabsdet(Q)[1] ≈ 0 atol=2n*eps(real(T))
+        y = rand(T, n)
+        @test Q * y ≈ Q.Q * y ≈ Q' \ y ≈ ldiv!(Q', copy(y)) ≈ ldiv!(zero(y), Q', y)
+        @test Q'y ≈ Q.Q' * y ≈ Q \ y ≈ ldiv!(Q, copy(y)) ≈ ldiv!(zero(y), Q, y)
+        @test y'Q ≈ y'Q.Q ≈ y' / Q'
+        @test y'Q' ≈ y'Q.Q' ≈ y' / Q
+        y = Matrix(y')
+        @test y*Q ≈ y*Q.Q ≈ y / Q' ≈ rdiv!(copy(y), Q')
+        @test y*Q' ≈ y*Q.Q' ≈ y / Q ≈ rdiv!(copy(y), Q)
+        Y = rand(T, n, n); X = similar(Y)
+        for transQ in (identity, adjoint), transY in (identity, adjoint), Y in (Y, Y')
+            @test mul!(X, transQ(Q), transY(Y)) ≈ transQ(Q) * transY(Y) ≈ transQ(Q.Q) * transY(Y)
+            @test mul!(X, transY(Y), transQ(Q)) ≈ transY(Y) * transQ(Q) ≈ transY(Y) * transQ(Q.Q)
+        end
+        @test convert(Matrix, Q) ≈ Matrix(Q) ≈ Q[:,:] ≈ copyto!(zeros(T, size(Q)), Q) ≈ Q.Q*I
+        @test convert(Matrix, Q') ≈ Matrix(Q') ≈ (Q')[:,:] ≈ copyto!(zeros(T, size(Q)), Q') ≈ Q.Q'*I
+        @test Q[1,:] == Q.Q[1,:] == view(Q, 1, :)
+        @test Q[:,1] == Q.Q[:,1] == view(Q, :, 1)
+        @test Q[1,1] == Q.Q[1,1]
+        @test Q[:] == Q.Q[:]
+        @test Q[:,1:3] == Q.Q[:,1:3] == view(Q, :, 1:3)
+        @test Q[:,1:3] ≈ Matrix(Q)[:,1:3]
+        @test Q[2:3,2:3] == view(Q, 2:3, 2:3) ≈ Matrix(Q)[2:3,2:3]
+        @test_throws BoundsError Q[0,1]
+        @test_throws BoundsError Q[n+1,1]
+        @test_throws BoundsError Q[1,0]
+        @test_throws BoundsError Q[1,n+1]
+        @test_throws BoundsError Q[:,1:n+1]
+        @test_throws BoundsError Q[:,0:n]
+        for perm in ((1, 2), (2, 1))
+            P = PermutedDimsArray(zeros(T, size(Q)), perm)
+            @test copyto!(P, Q) ≈ Matrix(Q)
+        end
+        x = randn(T)
+        @test x * Q ≈ (x*I)*Q ≈ x * Q.Q
+        @test Q * x ≈ Q*(x*I) ≈ Q.Q * x
+        @test x * Q' ≈ (x*I)* Q' ≈ x * Q.Q'
+        @test Q' * x ≈ Q'*(x*I) ≈ Q.Q' * x
+        x = rand(T, 1)
+        Q = MyQ(qr(rand(T, 1, 1)).Q)
+        @test x * Q ≈ x * Q.Q
+        @test x * Q' ≈ x * Q.Q'
+        @test Q * x ≈ Q.Q * x
+        @test Q' * x ≈ Q.Q' * x
+    end
+end
+
+end # module
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
index 7479057d9f027..e40beb29787cf 100644
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ b/stdlib/LinearAlgebra/test/adjtrans.jl
@@ -489,13 +489,13 @@ end
     @test B == A .* A'
 end
 
-@testset "test show methods for $t of Factorizations" for t in (Adjoint, Transpose)
-    A = randn(4, 4)
+@testset "test show methods for $t of Factorizations" for t in (adjoint, transpose)
+    A = randn(ComplexF64, 4, 4)
     F = lu(A)
     Fop = t(F)
-    @test "LinearAlgebra."*sprint(show, Fop) ==
+    @test sprint(show, Fop) ==
                   "$t of "*sprint(show, parent(Fop))
-    @test "LinearAlgebra."*sprint((io, t) -> show(io, MIME"text/plain"(), t), Fop) ==
+    @test sprint((io, t) -> show(io, MIME"text/plain"(), t), Fop) ==
                   "$t of "*sprint((io, t) -> show(io, MIME"text/plain"(), t), parent(Fop))
 end
 
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index 9866fce047dd1..89f2b21a6a973 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -13,6 +13,12 @@ using .Main.Furlongs
 isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
 using .Main.Quaternions
 
+isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
+using .Main.InfiniteArrays
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
 include("testutils.jl") # test_approx_eq_modphase
 
 n = 10 #Size of test matrix
@@ -323,36 +329,35 @@ Random.seed!(1)
                 @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
             end
             @testset "Specialized multiplication/division" begin
+                getval(x) = x
+                getval(x::Furlong) = x.val
                 function _bidiagdivmultest(T,
                         x,
                         typemul=T.uplo == 'U' ? UpperTriangular : Matrix,
                         typediv=T.uplo == 'U' ? UpperTriangular : Matrix,
                         typediv2=T.uplo == 'U' ? UpperTriangular : Matrix)
                     TM = Matrix(T)
-                    @test (T*x)::typemul ≈  TM*x #broken=eltype(x) <: Furlong
-                    @test (x*T)::typemul ≈ x*TM #broken=eltype(x) <: Furlong
-                    @test (x\T)::typediv ≈ x\TM #broken=eltype(T) <: Furlong
-                    @test (T/x)::typediv ≈ TM/x #broken=eltype(T) <: Furlong
+                    @test map(getval, (T*x)::typemul) ≈ map(getval, TM*x)
+                    @test map(getval, (x*T)::typemul) ≈ map(getval, x*TM)
+                    @test map(getval, (x\T)::typediv) ≈ map(getval, x\TM)
+                    @test map(getval, (T/x)::typediv) ≈ map(getval, TM/x)
                     if !isa(x, Number)
-                        @test (T\x)::typediv2 ≈ TM\x #broken=eltype(x) <: Furlong
-                        @test (x/T)::typediv2 ≈ x/TM #broken=eltype(x) <: Furlong
+                        @test map(getval, Array((T\x)::typediv2)) ≈ map(getval, Array(TM\x))
+                        @test map(getval, Array((x/T)::typediv2)) ≈ map(getval, Array(x/TM))
                     end
                     return nothing
                 end
-                A = randn(n,n)
-                d = randn(n)
-                dl = randn(n-1)
-                t = T
-                for t in (T, #=Furlong.(T)=#), (A, d, dl) in ((A, d, dl), #=(Furlong.(A), Furlong.(d), Furlong.(dl))=#)
+                A = Matrix(T)
+                for t in (T, Furlong.(T)), (A, dv, ev) in ((A, dv, ev), (Furlong.(A), Furlong.(dv), Furlong.(ev)))
                     _bidiagdivmultest(t, 5, Bidiagonal, Bidiagonal)
                     _bidiagdivmultest(t, 5I, Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
-                    _bidiagdivmultest(t, Diagonal(d), Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
+                    _bidiagdivmultest(t, Diagonal(dv), Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
                     _bidiagdivmultest(t, UpperTriangular(A))
                     _bidiagdivmultest(t, UnitUpperTriangular(A))
                     _bidiagdivmultest(t, LowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
                     _bidiagdivmultest(t, UnitLowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
-                    _bidiagdivmultest(t, Bidiagonal(d, dl, :U), Matrix, Matrix, Matrix)
-                    _bidiagdivmultest(t, Bidiagonal(d, dl, :L), Matrix, Matrix, Matrix)
+                    _bidiagdivmultest(t, Bidiagonal(dv, ev, :U), Matrix, Matrix, Matrix)
+                    _bidiagdivmultest(t, Bidiagonal(dv, ev, :L), Matrix, Matrix, Matrix)
                 end
             end
         end
@@ -794,4 +799,19 @@ end
     end
 end
 
+@testset "copyto! with UniformScaling" begin
+    @testset "Fill" begin
+        for len in (4, InfiniteArrays.Infinity())
+            d = FillArrays.Fill(1, len)
+            ud = FillArrays.Fill(0, len-1)
+            B = Bidiagonal(d, ud, :U)
+            @test copyto!(B, I) === B
+        end
+    end
+    B = Bidiagonal(fill(2, 4), fill(3, 3), :U)
+    copyto!(B, I)
+    @test all(isone, diag(B))
+    @test all(iszero, diag(B, 1))
+end
+
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
index cbaf0e4628b9a..4252d9ee7938b 100644
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ b/stdlib/LinearAlgebra/test/blas.jl
@@ -4,6 +4,7 @@ module TestBLAS
 
 using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasReal, BlasComplex
+using Libdl: dlsym, dlopen
 fabs(x::Real) = abs(x)
 fabs(x::Complex) = abs(real(x)) + abs(imag(x))
 
@@ -713,4 +714,11 @@ end
     end
 end
 
+# Make sure we can use `Base.libblas_name`.  Avoid causing
+# https://github.com/JuliaLang/julia/issues/48427 again.
+@testset "libblas_name" begin
+    dot_sym = dlsym(dlopen(Base.libblas_name), "cblas_ddot" * (Sys.WORD_SIZE == 64 ? "64_" : ""))
+    @test 23.0 === @ccall $(dot_sym)(2::Int, [2.0, 3.0]::Ref{Cdouble}, 1::Int, [4.0, 5.0]::Ref{Cdouble}, 1::Int)::Cdouble
+end
+
 end # module TestBLAS
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index a3008a236df7b..a795eb8d44a03 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -260,11 +260,12 @@ end
     end
 end
 
-@testset "behavior for non-positive definite matrices" for T in (Float64, ComplexF64)
+@testset "behavior for non-positive definite matrices" for T in (Float64, ComplexF64, BigFloat)
     A = T[1 2; 2 1]
     B = T[1 2; 0 1]
+    C = T[2 0; 0 0]
     # check = (true|false)
-    for M in (A, Hermitian(A), B)
+    for M in (A, Hermitian(A), B, C)
         @test_throws PosDefException cholesky(M)
         @test_throws PosDefException cholesky!(copy(M))
         @test_throws PosDefException cholesky(M; check = true)
@@ -272,17 +273,19 @@ end
         @test !LinearAlgebra.issuccess(cholesky(M; check = false))
         @test !LinearAlgebra.issuccess(cholesky!(copy(M); check = false))
     end
-    for M in (A, Hermitian(A), B)
-        @test_throws RankDeficientException cholesky(M, RowMaximum())
-        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
-        @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
-        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
-        @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
-        @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
-        C = cholesky(M, RowMaximum(); check = false)
-        @test_throws RankDeficientException chkfullrank(C)
-        C = cholesky!(copy(M), RowMaximum(); check = false)
-        @test_throws RankDeficientException chkfullrank(C)
+    if T !== BigFloat # generic pivoted cholesky is not implemented
+        for M in (A, Hermitian(A), B)
+            @test_throws RankDeficientException cholesky(M, RowMaximum())
+            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
+            @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
+            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
+            @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
+            @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
+            C = cholesky(M, RowMaximum(); check = false)
+            @test_throws RankDeficientException chkfullrank(C)
+            C = cholesky!(copy(M), RowMaximum(); check = false)
+            @test_throws RankDeficientException chkfullrank(C)
+        end
     end
     @test !isposdef(A)
     str = sprint((io, x) -> show(io, "text/plain", x), cholesky(A; check = false))
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index 83a2a896e736c..5f169d21ff6fb 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -12,6 +12,12 @@ using .Main.Furlongs
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
+isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
+using .Main.InfiniteArrays
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
 const n=12 # Size of matrix problem to test
 Random.seed!(1)
 
@@ -37,11 +43,14 @@ Random.seed!(1)
         end
         @test eltype(Diagonal{elty}([1,2,3,4])) == elty
         @test isa(Diagonal{elty,Vector{elty}}(GenericArray([1,2,3,4])), Diagonal{elty,Vector{elty}})
+        @test isa(Diagonal{elty}(rand(Int,n,n)), Diagonal{elty,Vector{elty}})
         DI = Diagonal([1,2,3,4])
         @test Diagonal(DI) === DI
         @test isa(Diagonal{elty}(DI), Diagonal{elty})
         # issue #26178
-        @test_throws MethodError convert(Diagonal, [1, 2, 3, 4])
+        @test_throws MethodError convert(Diagonal, [1,2,3,4])
+        @test_throws DimensionMismatch convert(Diagonal, [1 2 3 4])
+        @test_throws InexactError convert(Diagonal, ones(2,2))
     end
 
     @testset "Basic properties" begin
@@ -372,9 +381,17 @@ Random.seed!(1)
 
     @testset "conj and transpose" begin
         @test transpose(D) == D
-        if elty <: BlasComplex
+        if elty <: Real
+            @test transpose(D) === D
+            @test adjoint(D) === D
+        elseif elty <: BlasComplex
             @test Array(conj(D)) ≈ conj(DM)
             @test adjoint(D) == conj(D)
+            local D2 = copy(D)
+            local D2adj = adjoint(D2)
+            D2adj[1,1] = rand(eltype(D2adj))
+            @test D2[1,1] == adjoint(D2adj[1,1])
+            @test D2adj' === D2
         end
         # Translates to Ac/t_mul_B, which is specialized after issue 21286
         @test(D' * vv == conj(D) * vv)
@@ -1133,4 +1150,28 @@ Base.size(::SMatrix1) = (1, 1)
     @test C isa Matrix{SMatrix1{String}}
 end
 
+@testset "copyto! with UniformScaling" begin
+    @testset "Fill" begin
+        for len in (4, InfiniteArrays.Infinity())
+            d = FillArrays.Fill(1, len)
+            D = Diagonal(d)
+            @test copyto!(D, I) === D
+        end
+    end
+    D = Diagonal(fill(2, 2))
+    copyto!(D, I)
+    @test all(isone, diag(D))
+end
+
+@testset "diagonal triple multiplication (#49005)" begin
+    n = 10
+    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n))) isa Diagonal
+    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n+1))))
+    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n+1), Diagonal(ones(n+1))))
+    @test_throws DimensionMismatch (*(Diagonal(ones(n+1)), Diagonal(1:n), Diagonal(ones(n))))
+
+    # currently falls back to two-term *
+    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
index d200eff2f17bf..72233293ff515 100644
--- a/stdlib/LinearAlgebra/test/factorization.jl
+++ b/stdlib/LinearAlgebra/test/factorization.jl
@@ -56,11 +56,24 @@ end
     A = randn(3, 3)
     A = A * A' # ensure A is pos. def. and symmetric
     F = f(A)
-    tF = Transpose(F)
-    aF = Adjoint(F)
     @test size(F) == size(A)
-    @test size(tF) == size(Transpose(A))
-    @test size(aF) == size(Adjoint(A))
+    @test size(F') == size(A')
+end
+
+@testset "size for transpose factorizations - $f" for f in Any[
+    bunchkaufman,
+    cholesky,
+    x -> cholesky(x, RowMaximum()),
+    hessenberg,
+    lq,
+    lu,
+    svd,
+]
+    A = randn(3, 3)
+    A = A * A' # ensure A is pos. def. and symmetric
+    F = f(A)
+    @test size(F) == size(A)
+    @test size(transpose(F)) == size(transpose(A))
 end
 
 @testset "equality of QRCompactWY" begin
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index a95827867cd18..3ebaf38e84945 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -12,6 +12,8 @@ using .Main.Quaternions
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
+isdefined(Main, :DualNumbers) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "DualNumbers.jl"))
+using .Main.DualNumbers
 
 Random.seed!(123)
 
@@ -78,30 +80,7 @@ n = 5 # should be odd
     end
 
     @testset "det with nonstandard Number type" begin
-        struct MyDual{T<:Real} <: Real
-            val::T
-            eps::T
-        end
-        Base.:+(x::MyDual, y::MyDual) = MyDual(x.val + y.val, x.eps + y.eps)
-        Base.:*(x::MyDual, y::MyDual) = MyDual(x.val * y.val, x.eps * y.val + y.eps * x.val)
-        Base.:/(x::MyDual, y::MyDual) = x.val / y.val
-        Base.:(==)(x::MyDual, y::MyDual) = x.val == y.val && x.eps == y.eps
-        Base.zero(::MyDual{T}) where {T} = MyDual(zero(T), zero(T))
-        Base.zero(::Type{MyDual{T}}) where {T} = MyDual(zero(T), zero(T))
-        Base.one(::MyDual{T}) where {T} = MyDual(one(T), zero(T))
-        Base.one(::Type{MyDual{T}}) where {T} = MyDual(one(T), zero(T))
-        # the following line is required for BigFloat, IDK why it doesn't work via
-        # promote_rule like for all other types
-        Base.promote_type(::Type{MyDual{BigFloat}}, ::Type{BigFloat}) = MyDual{BigFloat}
-        Base.promote_rule(::Type{MyDual{T}}, ::Type{S}) where {T,S<:Real} =
-            MyDual{promote_type(T, S)}
-        Base.promote_rule(::Type{MyDual{T}}, ::Type{MyDual{S}}) where {T,S} =
-            MyDual{promote_type(T, S)}
-        Base.convert(::Type{MyDual{T}}, x::MyDual) where {T} =
-            MyDual(convert(T, x.val), convert(T, x.eps))
-        if elty <: Real
-            @test det(triu(MyDual.(A, zero(A)))) isa MyDual
-        end
+        elty <: Real && @test det(Dual.(triu(A), zero(A))) isa Dual
     end
 end
 
@@ -269,6 +248,24 @@ end
     @test norm(x, 3) ≈ cbrt(5^3  +sqrt(5)^3)
 end
 
+@testset "norm of transpose/adjoint equals norm of parent #32739" begin
+    for t in (transpose, adjoint), elt in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
+        # Vector/matrix of scalars
+        for sz in ((2,), (2, 3))
+            A = rand(elt, sz...)
+            Aᵀ = t(A)
+            @test norm(Aᵀ) ≈ norm(Matrix(Aᵀ))
+        end
+
+        # Vector/matrix of vectors/matrices
+        for sz_outer in ((2,), (2, 3)), sz_inner in ((3,), (1, 2))
+            A = [rand(elt, sz_inner...) for _ in CartesianIndices(sz_outer)]
+            Aᵀ = t(A)
+            @test norm(Aᵀ) ≈ norm(Matrix(Matrix.(Aᵀ)))
+        end
+    end
+end
+
 @testset "rotate! and reflect!" begin
     x = rand(ComplexF64, 10)
     y = rand(ComplexF64, 10)
@@ -372,6 +369,7 @@ end
         [1.0 2.0 3.0; 4.0 5.0 6.0], # 2-dim
         rand(1,2,3),                # higher dims
         rand(1,2,3,4),
+        Dual.(randn(2,3), randn(2,3)),
         OffsetArray([-1,0], (-2,))  # no index 1
     )
         @test normalize(arr) == normalize!(copy(arr))
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index fd1fefb97cab7..91e4e1b1b3df0 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -97,10 +97,10 @@ let n = 10
             @testset "Multiplication/division" begin
                 for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
                             UpperTriangular(A), UnitUpperTriangular(A))
-                    @test (H*x)::UpperHessenberg == Array(H)*x
-                    @test (x*H)::UpperHessenberg == x*Array(H)
-                    @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa UpperTriangular
-                    @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa UpperTriangular
+                    @test (H*x)::UpperHessenberg ≈ Array(H)*x
+                    @test (x*H)::UpperHessenberg ≈ x*Array(H)
+                    @test H/x ≈ Array(H)/x# broken = eltype(H) <: Furlong && x isa UpperTriangular
+                    @test x\H ≈ x\Array(H)# broken = eltype(H) <: Furlong && x isa UpperTriangular
                     @test H/x isa UpperHessenberg
                     @test x\H isa UpperHessenberg
                 end
@@ -113,13 +113,12 @@ let n = 10
             H = UpperHessenberg(Furlong.(Areal))
             for A in (A, Furlong.(A))
                 @testset "Multiplication/division Furlong" begin
-                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U))
-                        @test (H*x)::UpperHessenberg == Array(H)*x
-                        @test (x*H)::UpperHessenberg == x*Array(H)
-                        @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa UpperTriangular
-                        @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa UpperTriangular
-                        @test H/x isa UpperHessenberg
-                        @test x\H isa UpperHessenberg
+                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
+                                UpperTriangular(A), UnitUpperTriangular(A))
+                        @test map(x -> x.val, (H*x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)*x)
+                        @test map(x -> x.val, (x*H)::UpperHessenberg) ≈ map(x -> x.val, x*Array(H))
+                        @test map(x -> x.val, (H/x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)/x)
+                        @test map(x -> x.val, (x\H)::UpperHessenberg) ≈ map(x -> x.val, x\Array(H))
                     end
                     x = Bidiagonal(d, dl, :L)
                     @test H*x == Array(H)*x
@@ -149,9 +148,11 @@ let n = 10
         @test_throws ErrorException H.Z
         @test convert(Array, H) ≈ A
         @test (H.Q * H.H) * H.Q' ≈ A ≈ (Matrix(H.Q) * Matrix(H.H)) * Matrix(H.Q)'
-        @test (H.Q' *A) * H.Q ≈ H.H
+        @test (H.Q' * A) * H.Q ≈ H.H
         #getindex for HessenbergQ
         @test H.Q[1,1] ≈ Array(H.Q)[1,1]
+        @test det(H.Q) ≈ det(Matrix(H.Q))
+        @test logabsdet(H.Q)[1] ≈ logabsdet(Matrix(H.Q))[1] atol=2n*eps(float(real(eltya)))
 
         # REPL show
         hessstring = sprint((t, s) -> show(t, "text/plain", s), H)
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index e0e75f0a88413..2c5d92541af93 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -27,6 +27,13 @@ using LinearAlgebra: BlasInt
         @test LAPACK.syevr!('N', 'V', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[vals .< 1.0]
         @test LAPACK.syevr!('N', 'I', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[4:5]
         @test vals ≈ LAPACK.syev!('N', 'U', copy(Asym))
+        @test vals ≈ LAPACK.syevd!('N', 'U', copy(Asym))
+        vals_test, Z_test = LAPACK.syev!('V', 'U', copy(Asym))
+        @test vals_test ≈ vals
+        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
+        vals_test, Z_test = LAPACK.syevd!('V', 'U', copy(Asym))
+        @test vals_test ≈ vals
+        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
         @test_throws DimensionMismatch LAPACK.sygvd!(1, 'V', 'U', copy(Asym), zeros(elty, 6, 6))
     end
 end
@@ -180,7 +187,7 @@ end
     end
 end
 
-@testset "geevx, ggev errors" begin
+@testset "geevx, ggev, ggev3 errors" begin
     @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
         A = rand(elty,10,10)
         B = rand(elty,10,10)
@@ -191,6 +198,9 @@ end
         @test_throws ArgumentError LAPACK.ggev!('N','B',A,B)
         @test_throws ArgumentError LAPACK.ggev!('B','N',A,B)
         @test_throws DimensionMismatch LAPACK.ggev!('N','N',A,zeros(elty,12,12))
+        @test_throws ArgumentError LAPACK.ggev3!('N','B',A,B)
+        @test_throws ArgumentError LAPACK.ggev3!('B','N',A,B)
+        @test_throws DimensionMismatch LAPACK.ggev3!('N','N',A,zeros(elty,12,12))
     end
 end
 
@@ -590,11 +600,12 @@ end
     end
 end
 
-@testset "gees, gges error throwing" begin
+@testset "gees, gges, gges3 error throwing" begin
     @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
         A = rand(elty,10,10)
         B = rand(elty,11,11)
         @test_throws DimensionMismatch LAPACK.gges!('V','V',A,B)
+        @test_throws DimensionMismatch LAPACK.gges3!('V','V',A,B)
     end
 end
 
@@ -709,4 +720,13 @@ a = zeros(2,0), zeros(0)
 @test LinearAlgebra.LAPACK.geqrf!(a...) === a
 @test LinearAlgebra.LAPACK.gerqf!(a...) === a
 
+# Issue #49489: https://github.com/JuliaLang/julia/issues/49489
+# Dimension mismatch between A and ipiv causes segfaults
+@testset "issue #49489" begin
+    A = randn(23,23)
+    b = randn(23)
+    ipiv = collect(1:20)
+    @test_throws DimensionMismatch LinearAlgebra.LAPACK.getrs!('N', A, ipiv, b)
+end
+
 end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
index c340317a7cc23..8b4af6a0a5f8d 100644
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ b/stdlib/LinearAlgebra/test/lq.jl
@@ -71,13 +71,11 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                     @test lqa*x ≈ a*x rtol=3000ε
                     @test (sq = size(q.factors, 2); *(Matrix{eltyb}(I, sq, sq), adjoint(q))*squareQ(q)) ≈ Matrix(I, n, n) rtol=5000ε
                     if eltya != Int
-                        @test Matrix{eltyb}(I, n, n)*q ≈ convert(AbstractMatrix{tab},q)
+                        @test Matrix{eltyb}(I, n, n)*q ≈ Matrix(I, n, n) * convert(LinearAlgebra.AbstractQ{tab}, q)
                     end
                     @test q*x ≈ squareQ(q)*x rtol=100ε
-                    @test transpose(q)*x ≈ transpose(squareQ(q))*x rtol=100ε
                     @test q'*x ≈ squareQ(q)'*x rtol=100ε
                     @test a*q ≈ a*squareQ(q) rtol=100ε
-                    @test a*transpose(q) ≈ a*transpose(squareQ(q)) rtol=100ε
                     @test a*q' ≈ a*squareQ(q)' rtol=100ε
                     @test q*a'≈ squareQ(q)*a' rtol=100ε
                     @test q'*a' ≈ squareQ(q)'*a' rtol=100ε
@@ -89,7 +87,6 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                         pad_a = vcat(I, a)
                         pad_x = hcat(I, x)
                         @test pad_a*q ≈ pad_a*squareQ(q) rtol=100ε
-                        @test transpose(q)*pad_x ≈ transpose(squareQ(q))*pad_x rtol=100ε
                         @test q'*pad_x ≈ squareQ(q)'*pad_x rtol=100ε
                     end
                 end
@@ -193,12 +190,12 @@ end
     @testset for n in 1:3, m in 1:3
         @testset "real" begin
             _, Q = lq(randn(n, m))
-            @test det(Q) ≈ det(collect(Q))
+            @test det(Q) ≈ det(Q*I)
             @test abs(det(Q)) ≈ 1
         end
         @testset "complex" begin
             _, Q = lq(randn(ComplexF64, n, m))
-            @test det(Q) ≈ det(collect(Q))
+            @test det(Q) ≈ det(Q*I)
             @test abs(det(Q)) ≈ 1
         end
     end
@@ -217,11 +214,7 @@ L factor:
  0.0  0.0  1.0  0.0
  0.0  0.0  0.0  1.0
 Q factor:
-4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0"""
+4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}"""
 end
 
 @testset "adjoint of LQ" begin
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 0150c4c2efdc8..2d99856a2667b 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -655,10 +655,10 @@ Transpose(x::RootInt) = x
 
 @testset "#14293" begin
     a = [RootInt(3)]
-    C = [0]
+    C = [0;;]
     mul!(C, a, transpose(a))
     @test C[1] == 9
-    C = [1]
+    C = [1;;]
     mul!(C, a, transpose(a), 2, 3)
     @test C[1] == 21
     a = [RootInt(2), RootInt(10)]
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
index c8db95b8c34b6..6e2e9a7b20603 100644
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ b/stdlib/LinearAlgebra/test/qr.jl
@@ -21,8 +21,8 @@ breal = randn(n,2)/2
 bimg  = randn(n,2)/2
 
 # helper functions to unambiguously recover explicit forms of an implicit QR Q
-squareQ(Q::LinearAlgebra.AbstractQ) = (sq = size(Q.factors, 1); lmul!(Q, Matrix{eltype(Q)}(I, sq, sq)))
-rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
+squareQ(Q::LinearAlgebra.AbstractQ) = Q*I
+rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
 
 @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
     raw_a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
@@ -62,7 +62,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 sq = size(q.factors, 2)
                 @test *(Matrix{eltyb}(I, sq, sq), adjoint(q)) * squareQ(q) ≈ Matrix(I, sq, sq) atol=5000ε
                 if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ convert(AbstractMatrix{tab}, q)
+                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab}, q))
                     ac = copy(a)
                     @test qr!(a[:, 1:5])\b == qr!(view(ac, :, 1:5))\b
                 end
@@ -86,14 +86,14 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test q*b[1:n1] ≈ rectangularQ(q)*b[1:n1] atol=100ε
                 @test q*b ≈ squareQ(q)*b atol=100ε
                 if eltya != Int
-                    @test Array{eltya}(q) ≈ Matrix(q)
+                    @test Array{eltya}(q) ≈ rectangularQ(q)
                 end
                 @test_throws DimensionMismatch q*b[1:n1 + 1]
                 @test_throws DimensionMismatch b[1:n1 + 1]*q'
                 sq = size(q.factors, 2)
                 @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
                 if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ convert(AbstractMatrix{tab},q)
+                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
                 end
                 # iterate
                 q, r = qra
@@ -123,7 +123,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test_throws DimensionMismatch q*b[1:n1+1]
                 @test_throws DimensionMismatch b[1:n1+1]*q'
                 if eltya != Int
-                    @test Matrix{eltyb}(I, n1, n1)*q ≈ convert(AbstractMatrix{tab},q)
+                    @test Matrix{eltyb}(I, n1, n1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
                 end
                 # iterate
                 q, r, p = qrpa
@@ -149,7 +149,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 sq = size(q.factors, 2)
                 @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
                 if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ convert(AbstractMatrix{tab},q)
+                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
                 end
                 qrstring = sprint((t, s) -> show(t, "text/plain", s), qrpa)
                 rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
@@ -205,15 +205,22 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test mul!(c, b, q) ≈ b*q
                 @test mul!(c, b, q') ≈ b*q'
                 @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
+
+                b = similar(a[:,1]); rand!(b)
+                c = similar(a[:,1])
+                d = similar(a[:,1])
+                @test mul!(c, q, b) ≈ q*b
+                @test mul!(c, q', b) ≈ q'*b
+                @test_throws DimensionMismatch mul!(Vector{eltya}(undef, n+1), q, b)
             end
         end
     end
 end
 
 @testset "transpose errors" begin
-    @test_throws MethodError transpose(qr(randn(3,3)))
-    @test_throws MethodError transpose(qr(randn(3,3), NoPivot()))
-    @test_throws MethodError transpose(qr(big.(randn(3,3))))
+    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3)))
+    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3), NoPivot()))
+    @test_throws ArgumentError transpose(qr(big.(randn(ComplexF64,3,3))))
 end
 
 @testset "Issue 7304" begin
@@ -228,7 +235,7 @@ end
         for T in (Tr, Complex{Tr})
             v = convert(Vector{T}, vr)
             nv, nm = qr(v)
-            @test norm(nv - [-0.6 -0.8; -0.8 0.6], Inf) < eps(Tr)
+            @test norm(nv*Matrix(I, (2,2)) - [-0.6 -0.8; -0.8 0.6], Inf) < eps(Tr)
             @test nm == fill(-5.0, 1, 1)
         end
     end
@@ -261,7 +268,7 @@ end
 
 @testset "Issue 24589. Promotion of rational matrices" begin
     A = rand(1//1:5//5, 4,3)
-    @test first(qr(A)) == first(qr(float(A)))
+    @test Matrix(first(qr(A))) == Matrix(first(qr(float(A))))
 end
 
 @testset "Issue Test Factorization fallbacks for rectangular problems" begin
@@ -303,7 +310,7 @@ end
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(n - k, m - k); dims=(1, 2))
                 Q, = qr(A, pivot)
-                @test det(Q) ≈ det(collect(Q))
+                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
                 @test abs(det(Q)) ≈ 1
             end
         end
@@ -311,7 +318,7 @@ end
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(ComplexF64, n - k, m - k); dims=(1, 2))
                 Q, = qr(A, pivot)
-                @test det(Q) ≈ det(collect(Q))
+                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
                 @test abs(det(Q)) ≈ 1
             end
         end
@@ -322,6 +329,7 @@ end
     for T in (Float64, ComplexF64)
         Q = qr(randn(T,5,5)).Q
         @test inv(Q) === Q'
+        @test inv(Q)' === inv(Q') === Q
     end
 end
 
@@ -329,7 +337,7 @@ end
     for T in (Float32, Float64, ComplexF32, ComplexF64)
         Q1, R1 = qr(randn(T,5,5))
         Q2, R2 = qr(Q1)
-        @test Q1 ≈ Q2
+        @test Matrix(Q1) ≈ Matrix(Q2)
         @test R2 ≈ I
     end
 end
@@ -362,13 +370,13 @@ end
         n = 5
         Q, R = qr(randn(T,n,n))
         Qmat = Matrix(Q)
-        dest1 = similar(Q)
+        dest1 = Matrix{T}(undef, size(Q))
         copyto!(dest1, Q)
         @test dest1 ≈ Qmat
-        dest2 = PermutedDimsArray(similar(Q), (1, 2))
+        dest2 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (1, 2))
         copyto!(dest2, Q)
         @test dest2 ≈ Qmat
-        dest3 = PermutedDimsArray(similar(Q), (2, 1))
+        dest3 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (2, 1))
         copyto!(dest3, Q)
         @test dest3 ≈ Qmat
     end
@@ -419,8 +427,8 @@ end
     A = qr(ones(3, 1))
     B = I(3)
     C = B*A.Q'
-    @test C ≈ A.Q
-    @test A.Q' * B ≈ A.Q
+    @test C ≈ A.Q * Matrix(I, 3, 3)
+    @test A.Q' * B ≈ A.Q * Matrix(I, 3, 3)
 end
 
 @testset "convert between eltypes" begin
@@ -466,4 +474,34 @@ end
     @test MyIdentity{Float64}()[1,:] == [1.0, 0.0]
 end
 
+@testset "issue #48911" begin
+    # testcase in the original issue
+    # test ldiv!(::QRPivoted, ::AbstractVector)
+    A = Complex{BigFloat}[1+im 1-im]
+    b = Complex{BigFloat}[3+im]
+    x = A\b
+    AF = Complex{Float64}[1+im 1-im]
+    bf = Complex{Float64}[3+im]
+    xf = AF\bf
+    @test x ≈ xf
+
+    # test ldiv!(::QRPivoted, ::AbstractVector)
+    A = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    b = Complex{BigFloat}[1+im; 0]
+    x = A\b
+    AF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    bf = Complex{Float64}[1+im; 0]
+    xf = AF\bf
+    @test x ≈ xf
+
+    # test ldiv!(::QRPivoted, ::AbstractMatrix)
+    C = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    D = Complex{BigFloat}[1+im 1-im; 0 0]
+    x = C\D
+    CF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    DF = Complex{Float64}[1+im 1-im; 0 0]
+    xf = CF\DF
+    @test x ≈ xf
+end
+
 end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index df845ba3110da..eaa297e05d957 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -228,10 +228,10 @@ end
         b = rand(n,n)
         for pivot in (ColumnNorm(), NoPivot())
             qrb = qr(b, pivot)
-            @test atri * qrb.Q ≈ matri * qrb.Q ≈ rmul!(copy(atri), qrb.Q)
-            @test atri * qrb.Q' ≈ matri * qrb.Q' ≈ rmul!(copy(atri), qrb.Q')
-            @test qrb.Q * atri ≈ qrb.Q * matri ≈ lmul!(qrb.Q, copy(atri))
-            @test qrb.Q' * atri ≈ qrb.Q' * matri ≈ lmul!(qrb.Q', copy(atri))
+            @test atri * qrb.Q ≈ matri * qrb.Q
+            @test atri * qrb.Q' ≈ matri * qrb.Q'
+            @test qrb.Q * atri ≈ qrb.Q * matri
+            @test qrb.Q' * atri ≈ qrb.Q' * matri
         end
     end
 end
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
index 4855446bc194b..2ca1904b2ff2d 100644
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
@@ -100,6 +100,8 @@ end
     @test_throws ArgumentError broadcast!(+, copy(T), T, A) == Tridiagonal(broadcast(*, T, A))
     @test_throws ArgumentError broadcast!(+, copy(◣), ◣, A) == LowerTriangular(broadcast(*, ◣, A))
     @test_throws ArgumentError broadcast!(+, copy(◥), ◥, A) == UpperTriangular(broadcast(*, ◥, A))
+    @test_throws ArgumentError broadcast!(*, copy(◥), ◣, 2)
+    @test_throws ArgumentError broadcast!(*, copy(Bu), Bl, 2)
 end
 
 @testset "map[!] over combinations of structured matrices" begin
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index 880c9d7c0d747..04621c4b49e86 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -790,4 +790,38 @@ end
     end
 end
 
+@testset "hermitian part" begin
+    for T in [Float32, Complex{Float32}, Int32, Rational{Int32},
+              Complex{Int32}, Complex{Rational{Int32}}]
+        f, f!, t = hermitianpart, hermitianpart!, T <: Real ? transpose : adjoint
+        X = T[1 2 3; 4 5 6; 7 8 9]
+        T <: Complex && (X .+= im .* X)
+        Xc = copy(X)
+        Y = (X + t(X)) / 2
+        U = f(X)
+        L = f(X, :L)
+        @test U isa Hermitian
+        @test L isa Hermitian
+        @test U.uplo == 'U'
+        @test L.uplo == 'L'
+        @test U == L == Y
+        if T <: AbstractFloat || real(T) <: AbstractFloat
+            HU = f!(X)
+            @test HU == Y
+            @test triu(X) == triu(Y)
+            HL = f!(Xc, :L)
+            @test HL == Y
+            @test tril(Xc) == tril(Y)
+        end
+    end
+    @test_throws DimensionMismatch hermitianpart(ones(1,2))
+    for T in (Float64, ComplexF64), uplo in (:U, :L)
+        A = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
+        Aherm = hermitianpart(A, uplo)
+        @test Aherm == Aherm.data == (A + A')/2
+        @test Aherm isa Hermitian
+        @test Aherm.uplo == LinearAlgebra.char_uplo(uplo)
+    end
+end
+
 end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups
index 2648016e453a8..e281203bf3fa3 100644
--- a/stdlib/LinearAlgebra/test/testgroups
+++ b/stdlib/LinearAlgebra/test/testgroups
@@ -1,11 +1,11 @@
-addmul
 triangular
+addmul
+bidiag
 matmul
 dense
 symmetric
 diagonal
 special
-bidiag
 qr
 cholesky
 blas
@@ -25,4 +25,5 @@ bunchkaufman
 givens
 pinv
 factorization
+abstractq
 ldlt
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
index 48441e439708f..78fc2d5e0e74c 100644
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ b/stdlib/LinearAlgebra/test/triangular.jl
@@ -261,11 +261,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         for eltyb in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
             b1 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*fill(1., n))
             b2 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*randn(n))
-            if elty1 in (BigFloat, Complex{BigFloat}) || eltyb in (BigFloat, Complex{BigFloat})
-                @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
-            else
-                @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
-            end
+            @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
         end
 
         # Binary operations
@@ -361,21 +357,29 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                     if t1 === UnitUpperTriangular && t2 === UnitUpperTriangular
                         @test A1*A2 isa UnitUpperTriangular
                         @test A1/A2 isa UnitUpperTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
                         @test A1\A2 isa UnitUpperTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
                     else
                         @test A1*A2 isa UpperTriangular
                         @test A1/A2 isa UpperTriangular
+                        elty1 == Int && elty2 == Int && t2 === UnitUpperTriangular && @test eltype(A1/A2) == Int
                         @test A1\A2 isa UpperTriangular
+                        elty1 == Int && elty2 == Int && t1 === UnitUpperTriangular && @test eltype(A1\A2) == Int
                     end
                 elseif uplo1 === :L && uplo2 === :L
                     if t1 === UnitLowerTriangular && t2 === UnitLowerTriangular
                         @test A1*A2 isa UnitLowerTriangular
                         @test A1/A2 isa UnitLowerTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
                         @test A1\A2 isa UnitLowerTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
                     else
                         @test A1*A2 isa LowerTriangular
                         @test A1/A2 isa LowerTriangular
+                        elty1 == Int && elty2 == Int && t2 === UnitLowerTriangular && @test eltype(A1/A2) == Int
                         @test A1\A2 isa LowerTriangular
+                        elty1 == Int && elty2 == Int && t1 === UnitLowerTriangular && @test eltype(A1\A2) == Int
                     end
                 end
                 offsizeA = Matrix{Float64}(I, n+1, n+1)
@@ -412,17 +416,15 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
 
             debug && println("elty1: $elty1, A1: $t1, B: $eltyB")
 
-            if !(eltyB in (BigFloat, Complex{BigFloat})) # rand does not support BigFloat and Complex{BigFloat} as of Dec 2015
-                Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-                @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
-                Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-                C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
-                mul!(C, Tri, copy(A1))
-                @test C ≈ Tri*Matrix(A1)
-                Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-                mul!(C, copy(A1), Tri)
-                @test C ≈ Matrix(A1)*Tri
-            end
+            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
+            @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
+            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
+            C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
+            mul!(C, Tri, copy(A1))
+            @test C ≈ Tri*Matrix(A1)
+            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
+            mul!(C, copy(A1), Tri)
+            @test C ≈ Matrix(A1)*Tri
 
             # Triangular-dense Matrix/vector multiplication
             @test A1*B[:,1] ≈ Matrix(A1)*B[:,1]
@@ -699,8 +701,23 @@ isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "te
 using .Main.Furlongs
 LinearAlgebra.sylvester(a::Furlong,b::Furlong,c::Furlong) = -c / (a + b)
 
-let A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
-    @test sqrt(A) == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
+@testset "dimensional correctness" begin
+    A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
+    @test sqrt(A)::UpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
+    @test inv(A)::UpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
+    B = UnitUpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
+    @test sqrt(B)::UnitUpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
+    @test inv(B)::UnitUpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
+    b = [Furlong(5), Furlong(8)]
+    @test (A \ b)::Vector{<:Furlong{0}} == (B \ b)::Vector{<:Furlong{0}} == Furlong{0}.([-27, 8])
+    C = LowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
+    @test sqrt(C)::LowerTriangular == Furlong{1//2}.(LowerTriangular([1 0; 2 1]))
+    @test inv(C)::LowerTriangular == Furlong{-1}.(LowerTriangular([1 0; -4 1]))
+    D = UnitLowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
+    @test sqrt(D)::UnitLowerTriangular == Furlong{1//2}.(UnitLowerTriangular([1 0; 2 1]))
+    @test inv(D)::UnitLowerTriangular == Furlong{-1}.(UnitLowerTriangular([1 0; -4 1]))
+    b = [Furlong(5), Furlong(8)]
+    @test (C \ b)::Vector{<:Furlong{0}} == (D \ b)::Vector{<:Furlong{0}} == Furlong{0}.([5, -12])
 end
 
 isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index 590870d4dad0a..e45fc9a65dba0 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -9,6 +9,12 @@ const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
 using .Main.Quaternions
 
+isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
+using .Main.InfiniteArrays
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
 include("testutils.jl") # test_approx_eq_modphase
 
 #Test equivalence of eigenvectors/singular vectors taking into account possible phase (sign) differences
@@ -738,4 +744,38 @@ using .Main.SizedArrays
         @test S !== Tridiagonal(diag(Sdense, 1), diag(Sdense),  diag(Sdense, 1)) !== S
     end
 end
+
+@testset "copyto! with UniformScaling" begin
+    @testset "Tridiagonal" begin
+        @testset "Fill" begin
+            for len in (4, InfiniteArrays.Infinity())
+                d = FillArrays.Fill(1, len)
+                ud = FillArrays.Fill(0, len-1)
+                T = Tridiagonal(ud, d, ud)
+                @test copyto!(T, I) === T
+            end
+        end
+        T = Tridiagonal(fill(3, 3), fill(2, 4), fill(3, 3))
+        copyto!(T, I)
+        @test all(isone, diag(T))
+        @test all(iszero, diag(T, 1))
+        @test all(iszero, diag(T, -1))
+    end
+    @testset "SymTridiagonal" begin
+        @testset "Fill" begin
+            for len in (4, InfiniteArrays.Infinity())
+                d = FillArrays.Fill(1, len)
+                ud = FillArrays.Fill(0, len-1)
+                ST = SymTridiagonal(d, ud)
+                @test copyto!(ST, I) === ST
+            end
+        end
+        ST = SymTridiagonal(fill(2, 4), fill(3, 3))
+        copyto!(ST, I)
+        @test all(isone, diag(ST))
+        @test all(iszero, diag(ST, 1))
+        @test all(iszero, diag(ST, -1))
+    end
+end
+
 end # module TestTridiagonal
diff --git a/stdlib/MPFR_jll/src/MPFR_jll.jl b/stdlib/MPFR_jll/src/MPFR_jll.jl
index 5b2dbd1e84b24..c184a9801102f 100644
--- a/stdlib/MPFR_jll/src/MPFR_jll.jl
+++ b/stdlib/MPFR_jll/src/MPFR_jll.jl
@@ -13,9 +13,9 @@ export libmpfr
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libmpfr_handle = C_NULL
-libmpfr_path = ""
+artifact_dir::String = ""
+libmpfr_handle::Ptr{Cvoid} = C_NULL
+libmpfr_path::String = ""
 
 if Sys.iswindows()
     const libmpfr = "libmpfr-6.dll"
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 427bf7fe29ec7..e42061d593905 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -46,7 +46,7 @@ STDLIBS = Artifacts Base64 CRC32c Dates Distributed FileWatching \
           SharedArrays Sockets Test TOML Unicode UUIDs \
           $(JLL_NAMES)
 
-STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA
+STDLIBS_EXT = Pkg Statistics LibCURL DelimitedFiles Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index 1d9e9a5523184..a031de4d9ad82 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -17,6 +17,23 @@ function wrapped_line(io::IO, s::AbstractString, width, i)
         word_length == 0 && continue
         if isempty(lines) || i + word_length + 1 > width
             i = word_length
+            if length(lines) > 0
+                last_line = lines[end]
+                maybe_underline = findlast(Base.text_colors[:underline], last_line)
+                if !isnothing(maybe_underline)
+                    # disable underline style at end of line if not already disabled.
+                    maybe_disable_underline = max(
+                        last(something(findlast(Base.disable_text_style[:underline], last_line), -1)),
+                        last(something(findlast(Base.text_colors[:normal], last_line), -1)),
+                    )
+
+                    if maybe_disable_underline < 0 || maybe_disable_underline < last(maybe_underline)
+
+                        lines[end] = last_line * Base.disable_text_style[:underline]
+                        word = Base.text_colors[:underline] * word
+                    end
+                end
+            end
             push!(lines, word)
         else
             i += word_length + 1
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index a7421b13660a0..20b1ef6d041fc 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -81,14 +81,15 @@ end
 function _term_header(io::IO, md, char, columns)
     text = terminline_string(io, md.text)
     with_output_color(:bold, io) do io
-        print(io, ' '^margin)
+        pre = ' '^margin
+        print(io, pre)
         line_no, lastline_width = print_wrapped(io, text,
-                                                width=columns - 4margin; pre=" ")
-        line_width = min(1 + lastline_width, columns)
+                                                width=columns - 4margin; pre)
+        line_width = min(lastline_width, columns)
         if line_no > 1
-            line_width = max(line_width, div(columns, 3))
+            line_width = max(line_width, div(columns, 3)+length(pre))
         end
-        header_width = max(0, line_width-margin)
+        header_width = max(0, line_width-length(pre))
         char != ' ' && header_width > 0 && print(io, '\n', ' '^(margin), char^header_width)
     end
 end
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index 52bcf07ad8942..19d821a0254d7 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1149,7 +1149,7 @@ end
 # issue 20225, check this can print
 @test typeof(sprint(Markdown.term, Markdown.parse(" "))) == String
 
-# different output depending on whether color is requested:	+# issue 20225, check this can print
+# different output depending on whether color is requested: +# issue 20225, check this can print
 let buf = IOBuffer()
     @test typeof(sprint(Markdown.term, Markdown.parse(" "))) == String
     show(buf, "text/plain", md"*emph*")
@@ -1160,6 +1160,38 @@ let buf = IOBuffer()
     @test String(take!(buf)) == "  \e[4memph\e[24m"
 end
 
+let word = "Markdown" # disable underline when wrapping lines
+    buf = IOBuffer()
+    ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], length(word)))
+    long_italic_text = Markdown.parse('_' * join(fill(word, 10), ' ') * '_')
+    show(ctx, MIME("text/plain"), long_italic_text)
+    lines = split(String(take!(buf)), '\n')
+    @test endswith(lines[begin], Base.disable_text_style[:underline])
+    @test startswith(lines[begin+1], ' '^Markdown.margin * Base.text_colors[:underline])
+end
+
+let word = "Markdown" # pre is of size Markdown.margin when wrapping title
+    buf = IOBuffer()
+    ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], length(word)))
+    long_title = Markdown.parse("# " * join(fill(word, 3)))
+    show(ctx, MIME("text/plain"), long_title)
+    lines = split(String(take!(buf)), '\n')
+    @test all(startswith(Base.text_colors[:bold] * ' '^Markdown.margin), lines)
+end
+
+struct Struct49454 end
+Base.show(io::IO, ::Struct49454) =
+    print(io, Base.text_colors[:underline], "Struct 49454()", Base.text_colors[:normal])
+
+let buf = IOBuffer()
+    ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], 10))
+    show(stdout, MIME("text/plain"), md"""
+    text without $(Struct49454()) underline.
+    """)
+    lines = split(String(take!(buf)), '\n')
+    @test !occursin(Base.text_colors[:underline], lines[end])
+end
+
 # table rendering with term #25213
 t = """
     a   |   b
diff --git a/stdlib/MbedTLS_jll/Project.toml b/stdlib/MbedTLS_jll/Project.toml
index 00a6b29426d91..2e8d0d384f88a 100644
--- a/stdlib/MbedTLS_jll/Project.toml
+++ b/stdlib/MbedTLS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MbedTLS_jll"
 uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-version = "2.28.0+0"
+version = "2.28.2+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
index 338bec9503c07..e46da42a9a638 100644
--- a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
+++ b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
@@ -14,13 +14,13 @@ export libmbedcrypto, libmbedtls, libmbedx509
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libmbedcrypto_handle = C_NULL
-libmbedcrypto_path = ""
-libmbedtls_handle = C_NULL
-libmbedtls_path = ""
-libmbedx509_handle = C_NULL
-libmbedx509_path = ""
+artifact_dir::String = ""
+libmbedcrypto_handle::Ptr{Cvoid} = C_NULL
+libmbedcrypto_path::String = ""
+libmbedtls_handle::Ptr{Cvoid} = C_NULL
+libmbedtls_path::String = ""
+libmbedx509_handle::Ptr{Cvoid} = C_NULL
+libmbedx509_path::String = ""
 
 if Sys.iswindows()
     const libmbedcrypto = "libmbedcrypto.dll"
diff --git a/stdlib/MbedTLS_jll/test/runtests.jl b/stdlib/MbedTLS_jll/test/runtests.jl
index b731d7f833043..2d82fa564cd18 100644
--- a/stdlib/MbedTLS_jll/test/runtests.jl
+++ b/stdlib/MbedTLS_jll/test/runtests.jl
@@ -6,5 +6,5 @@ using Test, Libdl, MbedTLS_jll
     vstr = zeros(UInt8, 32)
     ccall((:mbedtls_version_get_string, libmbedcrypto), Cvoid, (Ref{UInt8},), vstr)
     vn = VersionNumber(unsafe_string(pointer(vstr)))
-    @test vn == v"2.28.0"
+    @test vn == v"2.28.2"
 end
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index db6e85252e17f..cef860fda4acd 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2022.10.11"
+version = "2023.01.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index f04e3491ad22b..6d953327003be 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.21+0"
+version = "0.3.23+0"
 
 [deps]
 CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
index 2684a6b635cb4..4f1c57a7d06be 100644
--- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -13,9 +13,9 @@ export libopenblas
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libopenblas_handle = C_NULL
-libopenblas_path = ""
+artifact_dir::String = ""
+libopenblas_handle::Ptr{Cvoid} = C_NULL
+libopenblas_path::String = ""
 
 if Base.USE_BLAS64
     const libsuffix = "64_"
diff --git a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
index e3536021ad4c9..f2dee45a279cd 100644
--- a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
+++ b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
@@ -13,9 +13,9 @@ export libopenlibm
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libopenlibm_handle = C_NULL
-libopenlibm_path = ""
+artifact_dir::String = ""
+libopenlibm_handle::Ptr{Cvoid} = C_NULL
+libopenlibm_path::String = ""
 
 if Sys.iswindows()
     const libopenlibm = "libopenlibm.dll"
diff --git a/stdlib/PCRE2_jll/src/PCRE2_jll.jl b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
index 81048a45998b5..e7f685820830b 100644
--- a/stdlib/PCRE2_jll/src/PCRE2_jll.jl
+++ b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
@@ -13,9 +13,9 @@ export libpcre2_8
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libpcre2_8_handle = C_NULL
-libpcre2_8_path = ""
+artifact_dir::String = ""
+libpcre2_8_handle::Ptr{Cvoid} = C_NULL
+libpcre2_8_path::String = ""
 
 if Sys.iswindows()
     const libpcre2_8 = "libpcre2-8-0.dll"
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 5660d343487fa..7b5006f2141ff 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = 957b55a896d5cb496da134ea7bf3ee70de07ef2a
+PKG_SHA1 = daf02a458ae6daa402a5dd6683c40d6910325c4e
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index 9f14961aa2acf..62a84d7d36984 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -34,19 +34,29 @@ struct Spec{T} # T => %type => Val{'type'}
     hash::Bool
     width::Int
     precision::Int
+    dynamic_width::Bool
+    dynamic_precision::Bool
 end
 
 # recreate the format specifier string from a typed Spec
 Base.string(f::Spec{T}; modifier::String="") where {T} =
-    string("%", f.leftalign ? "-" : "", f.plus ? "+" : "", f.space ? " " : "",
-        f.zero ? "0" : "", f.hash ? "#" : "", f.width > 0 ? f.width : "",
-        f.precision == 0 ? ".0" : f.precision > 0 ? ".$(f.precision)" : "", modifier, char(T))
+    string("%",
+           f.leftalign ? "-" : "",
+           f.plus ? "+" : "",
+           f.space ? " " : "",
+           f.zero ? "0" : "",
+           f.hash ? "#" : "",
+           f.dynamic_width ? "*" : (f.width > 0 ? f.width : ""),
+           f.dynamic_precision ? ".*" : (f.precision == 0 ? ".0" : (f.precision > 0 ? ".$(f.precision)" : "")),
+           modifier,
+           char(T))
+
 Base.show(io::IO, f::Spec) = print(io, string(f))
 
 floatfmt(s::Spec{T}) where {T} =
-    Spec{Val{'f'}}(s.leftalign, s.plus, s.space, s.zero, s.hash, s.width, 0)
+    Spec{Val{'f'}}(s.leftalign, s.plus, s.space, s.zero, s.hash, s.width, 0, s.dynamic_width, s.dynamic_precision)
 ptrfmt(s::Spec{T}, x) where {T} =
-    Spec{Val{'x'}}(s.leftalign, s.plus, s.space, s.zero, true, s.width, sizeof(x) == 8 ? 16 : 8)
+    Spec{Val{'x'}}(s.leftalign, s.plus, s.space, s.zero, true, s.width, sizeof(x) == 8 ? 16 : 8, s.dynamic_width, s.dynamic_precision)
 
 """
     Printf.Format(format_str)
@@ -75,6 +85,7 @@ struct Format{S, T}
       # and so on, then at the end, str[substringranges[end]]
     substringranges::Vector{UnitRange{Int}}
     formats::T # Tuple of Specs
+    numarguments::Int  # required for dynamic format specifiers
 end
 
 # what number base should be used for a given format specifier?
@@ -115,6 +126,8 @@ function Format(f::AbstractString)
     bytes = codeunits(f)
     len = length(bytes)
     pos = 1
+    numarguments = 0
+
     b = 0x00
     local last_percent_pos
 
@@ -165,26 +178,43 @@ function Format(f::AbstractString)
         end
         # parse width
         width = 0
-        while b - UInt8('0') < 0x0a
-            width = 10 * width + (b - UInt8('0'))
+        dynamic_width = false
+        if b == UInt8('*')
+            dynamic_width = true
+            numarguments += 1
             b = bytes[pos]
             pos += 1
-            pos > len && break
+        else
+            while b - UInt8('0') < 0x0a
+            width = 10 * width + (b - UInt8('0'))
+                b = bytes[pos]
+                pos += 1
+                pos > len && break
+            end
         end
         # parse precision
         precision = 0
         parsedprecdigits = false
+        dynamic_precision = false
         if b == UInt8('.')
             pos > len && throw(InvalidFormatStringError("Precision specifier is missing precision", f, last_percent_pos, pos-1))
             parsedprecdigits = true
             b = bytes[pos]
             pos += 1
             if pos <= len
-                while b - UInt8('0') < 0x0a
-                    precision = 10precision + (b - UInt8('0'))
+                if b == UInt8('*')
+                    dynamic_precision = true
+                    numarguments += 1
                     b = bytes[pos]
                     pos += 1
-                    pos > len && break
+                else
+                    precision = 0
+                    while b - UInt8('0') < 0x0a
+                        precision = 10precision + (b - UInt8('0'))
+                        b = bytes[pos]
+                        pos += 1
+                        pos > len && break
+                    end
                 end
             end
         end
@@ -208,6 +238,8 @@ function Format(f::AbstractString)
         !(b in b"diouxXDOUeEfFgGaAcCsSpn") && throw(InvalidFormatStringError("'$(Char(b))' is not a valid type specifier", f, last_percent_pos, pos-1))
         type = Val{Char(b)}
         if type <: Ints && precision > 0
+            # note - we should also set zero to false if dynamic precison > 0
+            # this is taken care of in fmt() for Ints
             zero = false
         elseif (type <: Strings || type <: Chars) && !parsedprecdigits
             precision = -1
@@ -216,7 +248,8 @@ function Format(f::AbstractString)
         elseif type <: Floats && !parsedprecdigits
             precision = 6
         end
-        push!(fmts, Spec{type}(leftalign, plus, space, zero, hash, width, precision))
+        numarguments += 1
+        push!(fmts, Spec{type}(leftalign, plus, space, zero, hash, width, precision, dynamic_width, dynamic_precision))
         start = pos
         while pos <= len
             b = bytes[pos]
@@ -235,7 +268,7 @@ function Format(f::AbstractString)
         end
         push!(strs, start:pos - 1 - (b == UInt8('%')))
     end
-    return Format(bytes, strs, Tuple(fmts))
+    return Format(bytes, strs, Tuple(fmts), numarguments)
 end
 
 macro format_str(str)
@@ -257,6 +290,28 @@ const HEX = b"0123456789ABCDEF"
     return pos
 end
 
+
+@inline function rmdynamic(spec::Spec{T}, args, argp) where {T}
+    zero, width, precision = spec.zero, spec.width, spec.precision
+    if spec.dynamic_width
+        width = args[argp]
+        argp += 1
+    end
+    if spec.dynamic_precision
+        precision = args[argp]
+        if zero && T <: Ints && precision > 0
+            zero = false
+        end
+        argp += 1
+    end
+    (Spec{T}(spec.leftalign, spec.plus, spec.space, zero, spec.hash, width, precision, false, false), argp)
+end
+
+@inline function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
+    spec, argp = rmdynamic(spec, args, argp)
+    (fmt(buf, pos, args[argp], spec), argp+1)
+end
+
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
     c = Char(first(arg))
@@ -772,9 +827,10 @@ const UNROLL_UPTO = 16
     # for each format, write out arg and next substring
     # unroll up to 16 formats
     N = length(f.formats)
+    argp = 1
     Base.@nexprs 16 i -> begin
         if N >= i
-            pos = fmt(buf, pos, args[i], f.formats[i])
+            pos, argp = fmt(buf, pos, args, argp, f.formats[i])
             for j in f.substringranges[i + 1]
                 b = f.str[j]
                 if !escapechar
@@ -789,7 +845,7 @@ const UNROLL_UPTO = 16
     end
     if N > 16
         for i = 17:length(f.formats)
-            pos = fmt(buf, pos, args[i], f.formats[i])
+            pos, argp = fmt(buf, pos, args, argp, f.formats[i])
             for j in f.substringranges[i + 1]
                 b = f.str[j]
                 if !escapechar
@@ -805,11 +861,17 @@ const UNROLL_UPTO = 16
     return pos
 end
 
+@inline function plength(f::Spec{T}, args, argp) where {T}
+    f, argp = rmdynamic(f, args, argp)
+    (plength(f, args[argp]), argp+1)
+end
+
 function plength(f::Spec{T}, x) where {T <: Chars}
     c = Char(first(x))
     w = textwidth(c)
     return max(f.width, w) + (ncodeunits(c) - w)
 end
+
 plength(f::Spec{Pointer}, x) = max(f.width, 2 * sizeof(x) + 2)
 
 function plength(f::Spec{T}, x) where {T <: Strings}
@@ -837,14 +899,17 @@ plength(::Spec{PositionCounter}, x) = 0
     len = sum(length, substringranges)
     N = length(formats)
     # unroll up to 16 formats
+    argp = 1
     Base.@nexprs 16 i -> begin
         if N >= i
-            len += plength(formats[i], args[i])
+            l, argp = plength(formats[i], args, argp)
+            len += l
         end
     end
     if N > 16
         for i = 17:length(formats)
-            len += plength(formats[i], args[i])
+            l, argp = plength(formats[i], args, argp)
+            len += l
         end
     end
     return len
@@ -864,7 +929,7 @@ for more details on C `printf` support.
 function format end
 
 function format(io::IO, f::Format, args...) # => Nothing
-    length(f.formats) == length(args) || argmismatch(length(f.formats), length(args))
+    f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
     write(io, resize!(buf, pos - 1))
@@ -872,7 +937,7 @@ function format(io::IO, f::Format, args...) # => Nothing
 end
 
 function format(f::Format, args...) # => String
-    length(f.formats) == length(args) || argmismatch(length(f.formats), length(args))
+    f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
     return String(resize!(buf, pos - 1))
@@ -906,8 +971,10 @@ Padded with zeros to length 6 000123
 
 julia> @printf "Use shorter of decimal or scientific %g %g" 1.23 12300000.0
 Use shorter of decimal or scientific 1.23 1.23e+07
-```
 
+julia> @printf "Use dynamic width and precision  %*.*f" 10 2 0.12345
+Use dynamic width and precision        0.12
+```
 For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/).
 See also [`@sprintf`](@ref) to get the result as a `String` instead of it being printed.
 
@@ -931,6 +998,9 @@ julia> @printf "%.0f %.1f %f" 0.5 0.025 -0.0078125
     using [`textwidth`](@ref), which e.g. ignores zero-width characters
     (such as combining characters for diacritical marks) and treats certain
     "wide" characters (e.g. emoji) as width `2`.
+
+!!! compat "Julia 1.10"
+    Dynamic width specifiers like `%*s` and `%0*.*f` require Julia 1.10.
 """
 macro printf(io_or_fmt, args...)
     if io_or_fmt isa String
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index 40a6a763e4eac..96d61b61d02e3 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -775,6 +775,7 @@ end
     @test Printf.@sprintf("%40d", typemax(Int128)) == " 170141183460469231731687303715884105727"
 end
 
+
 @testset "%n" begin
     x = Ref{Int}()
     @test (Printf.@sprintf("%d4%n", 123, x); x[] == 4)
@@ -782,6 +783,359 @@ end
     @test (Printf.@sprintf("%s%n", "1234", x); x[] == 4)
 end
 
+@testset "dynamic" begin
+
+    # dynamic width and precision
+    @test Printf.@sprintf("%*d", 10, 12)         == "        12"
+    @test Printf.@sprintf("%.*d",  4, 12)        == "0012"
+    @test Printf.@sprintf("%*.*d", 10, 4, 12)    == "      0012"
+    @test Printf.@sprintf("%+*.*d", 10, 4, 12)   == "     +0012"
+    @test Printf.@sprintf("%0*.*d", 10, 4, 12)   == "      0012"
+
+    @test Printf.@sprintf("%*d%*d%*d", 4, 12, 4, 13, 4, 14)  == "  12  13  14"
+    @test Printf.@sprintf("%*d%*d%*d", 4, 12, 5, 13, 6, 14)  == "  12   13    14"
+
+    # dynamic should return whatever the static width and precision returns
+
+
+    # pointers
+    @test Printf.@sprintf("%*p", 20, 0) == Printf.@sprintf("%20p", 0)
+    @test Printf.@sprintf("%-*p", 20, 0) == Printf.@sprintf("%-20p", 0)
+    @test Printf.@sprintf("%*p", 20, C_NULL) == Printf.@sprintf("%20p", C_NULL)
+    @test Printf.@sprintf("%-*p", 20, C_NULL) ==  Printf.@sprintf("%-20p", C_NULL)
+
+    # hex float
+    @test Printf.@sprintf("%.*a", 0, 3.14) == Printf.@sprintf("%.0a", 3.14)
+    @test Printf.@sprintf("%.*a", 1, 3.14) == Printf.@sprintf("%.1a", 3.14)
+    @test Printf.@sprintf("%.*a", 2, 3.14) == Printf.@sprintf("%.2a", 3.14)
+    @test Printf.@sprintf("%#.*a", 0, 3.14) == Printf.@sprintf("%#.0a", 3.14)
+    @test Printf.@sprintf("%#.*a", 1, 3.14) == Printf.@sprintf("%#.1a", 3.14)
+    @test Printf.@sprintf("%#.*a", 2, 3.14) == Printf.@sprintf("%#.2a", 3.14)
+    @test Printf.@sprintf("%.*a", 6, 1.5) == Printf.@sprintf("%.6a", 1.5)
+
+    # "%g"
+    @test Printf.@sprintf("%*.*g", 10, 5, -123.4 ) == Printf.@sprintf( "%10.5g", -123.4 )
+    @test Printf.@sprintf("%0*.*g", 10, 5, -123.4 ) == Printf.@sprintf( "%010.5g", -123.4 )
+    @test Printf.@sprintf("%.*g", 6, 12340000.0 ) == Printf.@sprintf( "%.6g", 12340000.0 )
+    @test Printf.@sprintf("%#.*g", 6, 12340000.0 ) == Printf.@sprintf( "%#.6g", 12340000.0 )
+    @test Printf.@sprintf("%*.*g", 10, 5, big"-123.4" ) == Printf.@sprintf( "%10.5g", big"-123.4" )
+    @test Printf.@sprintf("%0*.*g", 10, 5, big"-123.4" ) == Printf.@sprintf( "%010.5g", big"-123.4" )
+    @test Printf.@sprintf("%.*g", 6, big"12340000.0" ) == Printf.@sprintf( "%.6g", big"12340000.0" )
+    @test Printf.@sprintf("%#.*g", 6, big"12340000.0") == Printf.@sprintf( "%#.6g", big"12340000.0")
+
+    @test Printf.@sprintf("%.*g", 5, 42) == Printf.@sprintf( "%.5g", 42)
+    @test Printf.@sprintf("%#.*g", 2, 42) == Printf.@sprintf( "%#.2g", 42)
+    @test Printf.@sprintf("%#.*g", 5, 42) == Printf.@sprintf( "%#.5g", 42)
+
+    @test Printf.@sprintf("%.*g", 15, 0) == Printf.@sprintf("%.15g", 0)
+    @test Printf.@sprintf("%#.*g", 15, 0) == Printf.@sprintf("%#.15g", 0)
+
+    # "%f"
+    @test Printf.@sprintf("%.*f", 0, 3e142) ==  Printf.@sprintf( "%.0f", 3e142)
+    @test Printf.@sprintf("%.*f", 2, 1.234) == Printf.@sprintf("%.2f", 1.234)
+    @test Printf.@sprintf("%.*f", 2, 1.235) == Printf.@sprintf("%.2f", 1.235)
+    @test Printf.@sprintf("%.*f", 2, 0.235) == Printf.@sprintf("%.2f", 0.235)
+    @test Printf.@sprintf("%*.*f", 4, 1, 1.234) == Printf.@sprintf("%4.1f", 1.234)
+    @test Printf.@sprintf("%*.*f", 8, 1, 1.234) == Printf.@sprintf("%8.1f", 1.234)
+    @test Printf.@sprintf("%+*.*f", 8, 1, 1.234) == Printf.@sprintf("%+8.1f", 1.234)
+    @test Printf.@sprintf("% *.*f", 8, 1, 1.234) == Printf.@sprintf("% 8.1f", 1.234)
+    @test Printf.@sprintf("% *.*f", 7, 1, 1.234) == Printf.@sprintf("% 7.1f", 1.234)
+    @test Printf.@sprintf("% 0*.*f", 8, 1, 1.234) == Printf.@sprintf("% 08.1f", 1.234)
+    @test Printf.@sprintf("%0*.*f", 8, 1, 1.234) == Printf.@sprintf("%08.1f", 1.234)
+    @test Printf.@sprintf("%-0*.*f", 8, 1, 1.234) == Printf.@sprintf("%-08.1f", 1.234)
+    @test Printf.@sprintf("%-*.*f", 8, 1, 1.234) == Printf.@sprintf("%-8.1f", 1.234)
+    @test Printf.@sprintf("%0*.*f", 8, 1, -1.234) == Printf.@sprintf("%08.1f", -1.234)
+    @test Printf.@sprintf("%0*.*f", 9, 1, -1.234) == Printf.@sprintf("%09.1f", -1.234)
+    @test Printf.@sprintf("%0*.*f", 9, 1, 1.234) == Printf.@sprintf("%09.1f", 1.234)
+    @test Printf.@sprintf("%+0*.*f", 9, 1, 1.234) == Printf.@sprintf("%+09.1f", 1.234)
+    @test Printf.@sprintf("% 0*.*f", 9, 1, 1.234) == Printf.@sprintf("% 09.1f", 1.234)
+    @test Printf.@sprintf("%+ 0*.*f", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1f", 1.234)
+    @test Printf.@sprintf("%+ 0*.*f", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1f", 1.234)
+    @test Printf.@sprintf("%+ 0*.*f", 9, 0, 1.234) == Printf.@sprintf("%+ 09.0f", 1.234)
+    @test Printf.@sprintf("%+ #0*.*f", 9, 0, 1.234) == Printf.@sprintf("%+ #09.0f", 1.234)
+
+    # "%e"
+    @test Printf.@sprintf("%*.*e", 10, 4, Inf) == Printf.@sprintf("%10.4e", Inf)
+    @test Printf.@sprintf("%*.*e", 10, 4, NaN) == Printf.@sprintf("%10.4e", NaN)
+    @test Printf.@sprintf("%*.*e", 10, 4, big"Inf") == Printf.@sprintf("%10.4e", big"Inf")
+    @test Printf.@sprintf("%*.*e", 10, 4, big"NaN") == Printf.@sprintf("%10.4e", big"NaN")
+
+    @test Printf.@sprintf("%.*e", 0, 3e142) == Printf.@sprintf("%.0e",3e142)
+    @test Printf.@sprintf("%#.*e", 0,  3e142) == Printf.@sprintf("%#.0e", 3e142)
+    @test Printf.@sprintf("%.*e", 0,  big"3e142") == Printf.@sprintf("%.0e", big"3e142")
+
+    @test Printf.@sprintf("%#.*e", 0,  big"3e142") == Printf.@sprintf("%#.0e", big"3e142")
+    @test Printf.@sprintf("%.*e", 0, big"3e1042") == Printf.@sprintf("%.0e", big"3e1042")
+
+    @test Printf.@sprintf("%.*e", 2, 1.234) == Printf.@sprintf("%.2e", 1.234)
+    @test Printf.@sprintf("%.*e", 2, 1.235) == Printf.@sprintf("%.2e", 1.235)
+    @test Printf.@sprintf("%.*e", 2, 0.235) == Printf.@sprintf("%.2e", 0.235)
+    @test Printf.@sprintf("%*.*e", 4, 1, 1.234) == Printf.@sprintf("%4.1e", 1.234)
+    @test Printf.@sprintf("%*.*e", 8, 1, 1.234) == Printf.@sprintf("%8.1e", 1.234)
+    @test Printf.@sprintf("%+*.*e", 8, 1, 1.234) == Printf.@sprintf("%+8.1e", 1.234)
+    @test Printf.@sprintf("% *.*e", 8, 1, 1.234) == Printf.@sprintf("% 8.1e", 1.234)
+    @test Printf.@sprintf("% *.*e", 7, 1, 1.234) == Printf.@sprintf("% 7.1e", 1.234)
+    @test Printf.@sprintf("% 0*.*e", 8, 1, 1.234) == Printf.@sprintf("% 08.1e", 1.234)
+    @test Printf.@sprintf("%0*.*e", 8, 1, 1.234) == Printf.@sprintf("%08.1e", 1.234)
+    @test Printf.@sprintf("%-0*.*e", 8, 1, 1.234) == Printf.@sprintf("%-08.1e", 1.234)
+    @test Printf.@sprintf("%-*.*e", 8, 1, 1.234) == Printf.@sprintf("%-8.1e", 1.234)
+    @test Printf.@sprintf("%-*.*e", 8, 1, 1.234) == Printf.@sprintf("%-8.1e", 1.234)
+    @test Printf.@sprintf("%0*.*e", 8, 1, -1.234) == Printf.@sprintf("%08.1e", -1.234)
+    @test Printf.@sprintf("%0*.*e", 9, 1, -1.234) == Printf.@sprintf("%09.1e", -1.234)
+    @test Printf.@sprintf("%0*.*e", 9, 1, 1.234) == Printf.@sprintf("%09.1e", 1.234)
+    @test Printf.@sprintf("%+0*.*e", 9, 1, 1.234) == Printf.@sprintf("%+09.1e", 1.234)
+    @test Printf.@sprintf("% 0*.*e", 9, 1, 1.234) == Printf.@sprintf("% 09.1e", 1.234)
+    @test Printf.@sprintf("%+ 0*.*e", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1e", 1.234)
+    @test Printf.@sprintf("%+ 0*.*e", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1e", 1.234)
+    @test Printf.@sprintf("%+ 0*.*e", 9, 0, 1.234) == Printf.@sprintf("%+ 09.0e", 1.234)
+    @test Printf.@sprintf("%+ #0*.*e", 9, 0, 1.234) == Printf.@sprintf("%+ #09.0e", 1.234)
+
+    # strings
+    @test Printf.@sprintf("%.*s", 1, "foo") == Printf.@sprintf("%.1s", "foo")
+    @test Printf.@sprintf("%*s", 1, "Hallo heimur") == Printf.@sprintf("%1s", "Hallo heimur")
+    @test Printf.@sprintf("%*s", 20, "Hallo") == Printf.@sprintf("%20s", "Hallo")
+    @test Printf.@sprintf("%-*s", 20, "Hallo") == Printf.@sprintf("%-20s", "Hallo")
+    @test Printf.@sprintf("%0-*s", 20, "Hallo") == Printf.@sprintf("%0-20s", "Hallo")
+    @test Printf.@sprintf("%.*s", 20, "Hallo heimur") == Printf.@sprintf("%.20s", "Hallo heimur")
+    @test Printf.@sprintf("%*.*s", 20, 5, "Hallo heimur") == Printf.@sprintf("%20.5s", "Hallo heimur")
+    @test Printf.@sprintf("%.*s", 0, "Hallo heimur") == Printf.@sprintf("%.0s", "Hallo heimur")
+    @test Printf.@sprintf("%*.*s", 20, 0, "Hallo heimur") == Printf.@sprintf("%20.0s", "Hallo heimur")
+    @test Printf.@sprintf("%.s", "Hallo heimur") == Printf.@sprintf("%.s", "Hallo heimur")
+    @test Printf.@sprintf("%*.s", 20, "Hallo heimur") == Printf.@sprintf("%20.s", "Hallo heimur")
+    @test Printf.@sprintf("%*sø", 4, "ø") == Printf.@sprintf("%4sø", "ø")
+    @test Printf.@sprintf("%-*sø", 4, "ø") == Printf.@sprintf("%-4sø", "ø")
+
+    @test Printf.@sprintf("%*s", 8, "test") == Printf.@sprintf("%8s", "test")
+    @test Printf.@sprintf("%-*s", 8, "test") == Printf.@sprintf("%-8s", "test")
+
+    @test Printf.@sprintf("%#*s", 8, :test) == Printf.@sprintf("%#8s", :test)
+    @test Printf.@sprintf("%#-*s", 8, :test) == Printf.@sprintf("%#-8s", :test)
+
+    @test Printf.@sprintf("%*.*s", 8, 3, "test") == Printf.@sprintf("%8.3s", "test")
+    @test Printf.@sprintf("%#*.*s", 8, 3, "test") == Printf.@sprintf("%#8.3s", "test")
+    @test Printf.@sprintf("%-*.*s", 8, 3, "test") == Printf.@sprintf("%-8.3s", "test")
+    @test Printf.@sprintf("%#-*.*s", 8, 3, "test") == Printf.@sprintf("%#-8.3s", "test")
+    @test Printf.@sprintf("%.*s", 3, "test") == Printf.@sprintf("%.3s", "test")
+    @test Printf.@sprintf("%#.*s", 3, "test") == Printf.@sprintf("%#.3s", "test")
+    @test Printf.@sprintf("%-.*s", 3, "test") == Printf.@sprintf("%-.3s", "test")
+    @test Printf.@sprintf("%#-.*s", 3, "test") == Printf.@sprintf("%#-.3s", "test")
+
+    # chars
+    @test Printf.@sprintf("%*c", 3, 'a') == Printf.@sprintf("%3c", 'a')
+    @test Printf.@sprintf("%*c", 1, 'x') == Printf.@sprintf("%1c", 'x')
+    @test Printf.@sprintf("%*c"  , 20, 'x') == Printf.@sprintf("%20c"  , 'x')
+    @test Printf.@sprintf("%-*c" , 20, 'x') == Printf.@sprintf("%-20c" , 'x')
+    @test Printf.@sprintf("%-0*c", 20, 'x') == Printf.@sprintf("%-020c", 'x')
+    @test Printf.@sprintf("%*c", 3, 'A') == Printf.@sprintf("%3c", 'A')
+    @test Printf.@sprintf("%-*c", 3, 'A') == Printf.@sprintf("%-3c", 'A')
+
+    # more than 16 formats/args
+    @test Printf.@sprintf("%*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f", 4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345) ==  Printf.@sprintf("%4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f", 1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345)
+
+    # Check bug with trailing nul printing BigFloat
+    @test (Printf.@sprintf("%.*f", 330, BigFloat(1)))[end] != '\0'
+
+    # Check bug with precision > length of string
+    @test Printf.@sprintf("%*.*s", 4, 2, "a") == Printf.@sprintf("%4.2s", "a")
+
+    # issue #29662
+    @test Printf.@sprintf("%*.*e", 12, 3, pi*1e100) == Printf.@sprintf("%12.3e", pi*1e100)
+    @test Printf.@sprintf("%*d", 2, 3.14) == Printf.@sprintf("%*d", 2, 3.14)
+    @test Printf.@sprintf("%*d", 2, big(3.14)) == Printf.@sprintf("%*d", 2, big(3.14))
+
+    # 37539
+    @test Printf.@sprintf(" %.*e\n", 1, 0.999) == Printf.@sprintf(" %.1e\n", 0.999)
+    @test Printf.@sprintf("   %.*f", 1, 9.999) == Printf.@sprintf("   %.1f", 9.999)
+
+    # integers
+    @test Printf.@sprintf("%*d", 10, 12)         == (Printf.@sprintf("%10d", 12))
+    @test Printf.@sprintf("%.*d",  4, 12)        == (Printf.@sprintf("%.4d", 12))
+    @test Printf.@sprintf("%*.*d", 10, 4, 12)    == (Printf.@sprintf("%10.4d", 12))
+    @test Printf.@sprintf("%+*.*d", 10, 4, 12)   == (Printf.@sprintf("%+10.4d", 12))
+    @test Printf.@sprintf("%0*.*d", 10, 4, 12)   == (Printf.@sprintf("%010.4d", 12))
+
+    @test Printf.@sprintf( "% *d",  5,  42)   == Printf.@sprintf( "% 5d",  42)
+    @test Printf.@sprintf( "% *d",  5, -42)   == Printf.@sprintf( "% 5d", -42)
+    @test Printf.@sprintf( "% *d", 15,  42)   == Printf.@sprintf( "% 15d",  42)
+    @test Printf.@sprintf( "% *d", 15, -42)   == Printf.@sprintf( "% 15d", -42)
+
+    @test Printf.@sprintf("%+*d",  5,  42) == Printf.@sprintf("%+5d",  42)
+    @test Printf.@sprintf("%+*d",  5, -42) == Printf.@sprintf("%+5d", -42)
+    @test Printf.@sprintf("%+*d", 15,  42) == Printf.@sprintf("%+15d",  42)
+    @test Printf.@sprintf("%+*d", 15, -42) == Printf.@sprintf("%+15d", -42)
+    @test Printf.@sprintf( "%*d",  0,  42) == Printf.@sprintf( "%0d",  42)
+    @test Printf.@sprintf( "%*d",  0, -42) == Printf.@sprintf( "%0d", -42)
+
+    @test Printf.@sprintf("%-*d",  5,  42) == Printf.@sprintf("%-5d",  42)
+    @test Printf.@sprintf("%-*d",  5, -42) == Printf.@sprintf("%-5d", -42)
+    @test Printf.@sprintf("%-*d", 15,  42) == Printf.@sprintf("%-15d",  42)
+    @test Printf.@sprintf("%-*d", 15, -42) == Printf.@sprintf("%-15d", -42)
+
+    @test Printf.@sprintf("%+*lld", 8, 100) == Printf.@sprintf("%+8lld", 100)
+    @test Printf.@sprintf("%+.*lld", 8, 100) == Printf.@sprintf("%+.8lld", 100)
+    @test Printf.@sprintf("%+*.*lld", 10, 8, 100) == Printf.@sprintf("%+10.8lld", 100)
+
+    @test Printf.@sprintf("%-*.*lld", 1, 5, -100) == Printf.@sprintf("%-1.5lld", -100)
+    @test Printf.@sprintf("%*lld", 5, 100) == Printf.@sprintf("%5lld", 100)
+    @test Printf.@sprintf("%*lld", 5, -100) == Printf.@sprintf("%5lld", -100)
+    @test Printf.@sprintf("%-*lld", 5, 100) == Printf.@sprintf("%-5lld", 100)
+    @test Printf.@sprintf("%-*lld", 5, -100) == Printf.@sprintf("%-5lld", -100)
+    @test Printf.@sprintf("%-.*lld", 5, 100) == Printf.@sprintf("%-.5lld", 100)
+    @test Printf.@sprintf("%-.*lld", 5, -100) == Printf.@sprintf("%-.5lld", -100)
+    @test Printf.@sprintf("%-*.*lld", 8, 5, 100) == Printf.@sprintf("%-8.5lld", 100)
+    @test Printf.@sprintf("%-*.*lld", 8, 5, -100) == Printf.@sprintf("%-8.5lld", -100)
+    @test Printf.@sprintf("%0*lld", 5, 100) == Printf.@sprintf("%05lld", 100)
+    @test Printf.@sprintf("%0*lld", 5, -100) == Printf.@sprintf("%05lld", -100)
+    @test Printf.@sprintf("% *lld", 5,  100) == Printf.@sprintf("% 5lld", 100)
+    @test Printf.@sprintf("% *lld", 5,  -100) == Printf.@sprintf("% 5lld", -100)
+    @test Printf.@sprintf("% .*lld", 5,  100) == Printf.@sprintf("% .5lld", 100)
+    @test Printf.@sprintf("% .*lld", 5,  -100) == Printf.@sprintf("% .5lld", -100)
+    @test Printf.@sprintf("% *.*lld", 8, 5,  100) == Printf.@sprintf("% 8.5lld", 100)
+    @test Printf.@sprintf("% *.*lld", 8, 5,  -100) == Printf.@sprintf("% 8.5lld", -100)
+    @test Printf.@sprintf("%.*lld", 0, 0) == Printf.@sprintf("%.0lld", 0)
+    @test Printf.@sprintf("%#+*.*llx", 21, 18, -100) == Printf.@sprintf("%#+21.18llx", -100)
+    @test Printf.@sprintf("%#.*llo", 25, -100) == Printf.@sprintf("%#.25llo", -100)
+    @test Printf.@sprintf("%#+*.*llo", 24, 20, -100) == Printf.@sprintf("%#+24.20llo", -100)
+    @test Printf.@sprintf("%#+*.*llX", 18, 21, -100) == Printf.@sprintf("%#+18.21llX", -100)
+    @test Printf.@sprintf("%#+*.*llo", 20, 24, -100) == Printf.@sprintf("%#+20.24llo", -100)
+    @test Printf.@sprintf("%#+*.*llu", 25, 22, -1) == Printf.@sprintf("%#+25.22llu", -1)
+    @test Printf.@sprintf("%#+*.*llu", 30, 25, -1) == Printf.@sprintf("%#+30.25llu", -1)
+    @test Printf.@sprintf("%+#*.*lld", 25, 22, -1) == Printf.@sprintf("%+#25.22lld", -1)
+    @test Printf.@sprintf("%#-*.*llo", 8, 5, 100) == Printf.@sprintf("%#-8.5llo", 100)
+    @test Printf.@sprintf("%#-+ 0*.*lld", 8, 5, 100) == Printf.@sprintf("%#-+ 08.5lld", 100)
+    @test Printf.@sprintf("%#-+ 0*.*lld", 8, 5, 100) == Printf.@sprintf("%#-+ 08.5lld", 100)
+    @test Printf.@sprintf("%.*lld",  40, 1) == Printf.@sprintf("%.40lld",  1)
+    @test Printf.@sprintf("% .*lld",  40, 1) == Printf.@sprintf("% .40lld",  1)
+    @test Printf.@sprintf("% .*d",  40, 1) == Printf.@sprintf("% .40d",  1)
+
+    @test Printf.@sprintf("%#0*x",  12, 1) == Printf.@sprintf("%#012x",  1)
+    @test Printf.@sprintf("%#0*.*x", 4, 8, 1) == Printf.@sprintf("%#04.8x",  1)
+
+    @test Printf.@sprintf("%#-0*.*x", 8, 2,  1) == Printf.@sprintf("%#-08.2x",  1)
+    @test Printf.@sprintf("%#0*o", 8,  1) == Printf.@sprintf("%#08o",  1)
+
+    @test Printf.@sprintf("%*d", 20, 1024) == Printf.@sprintf("%20d",  1024)
+    @test Printf.@sprintf("%*d", 20,-1024) == Printf.@sprintf("%20d", -1024)
+    @test Printf.@sprintf("%*i", 20, 1024) == Printf.@sprintf("%20i",  1024)
+    @test Printf.@sprintf("%*i", 20,-1024) == Printf.@sprintf("%20i", -1024)
+    @test Printf.@sprintf("%*u", 20, 1024) == Printf.@sprintf("%20u",  1024)
+    @test Printf.@sprintf("%*u", 20, UInt(4294966272)) == Printf.@sprintf("%20u",  UInt(4294966272))
+    @test Printf.@sprintf("%*o", 20, 511) == Printf.@sprintf("%20o",  511)
+    @test Printf.@sprintf("%*o", 20, UInt(4294966785)) == Printf.@sprintf("%20o",  UInt(4294966785))
+    @test Printf.@sprintf("%*x", 20, 305441741) == Printf.@sprintf("%20x",  305441741)
+    @test Printf.@sprintf("%*x", 20, UInt(3989525555)) == Printf.@sprintf("%20x",  UInt(3989525555))
+    @test Printf.@sprintf("%*X", 20, 305441741) == Printf.@sprintf("%20X",  305441741)
+    @test Printf.@sprintf("%*X", 20, UInt(3989525555)) == Printf.@sprintf("%20X",  UInt(3989525555))
+    @test Printf.@sprintf("%-*d", 20, 1024) == Printf.@sprintf("%-20d",  1024)
+    @test Printf.@sprintf("%-*d", 20,-1024) == Printf.@sprintf("%-20d", -1024)
+    @test Printf.@sprintf("%-*i", 20, 1024) == Printf.@sprintf("%-20i",  1024)
+    @test Printf.@sprintf("%-*i", 20,-1024) == Printf.@sprintf("%-20i", -1024)
+    @test Printf.@sprintf("%-*u", 20, 1024) == Printf.@sprintf("%-20u",  1024)
+    @test Printf.@sprintf("%-*u", 20, UInt(4294966272)) == Printf.@sprintf("%-20u",  UInt(4294966272))
+    @test Printf.@sprintf("%-*o", 20, 511) == Printf.@sprintf("%-20o",  511)
+    @test Printf.@sprintf("%-*o", 20, UInt(4294966785)) == Printf.@sprintf("%-20o",  UInt(4294966785))
+    @test Printf.@sprintf("%-*x", 20, 305441741) == Printf.@sprintf("%-20x",  305441741)
+    @test Printf.@sprintf("%-*x", 20, UInt(3989525555)) == Printf.@sprintf("%-20x",  UInt(3989525555))
+    @test Printf.@sprintf("%-*X", 20, 305441741) == Printf.@sprintf("%-20X",  305441741)
+    @test Printf.@sprintf("%-*X", 20, UInt(3989525555)) == Printf.@sprintf("%-20X",  UInt(3989525555))
+    @test Printf.@sprintf("%0*d", 20, 1024) == Printf.@sprintf("%020d",  1024)
+    @test Printf.@sprintf("%0*d", 20,-1024) == Printf.@sprintf("%020d", -1024)
+    @test Printf.@sprintf("%0*i", 20, 1024) == Printf.@sprintf("%020i",  1024)
+    @test Printf.@sprintf("%0*i", 20,-1024) == Printf.@sprintf("%020i", -1024)
+    @test Printf.@sprintf("%0*u", 20, 1024) == Printf.@sprintf("%020u",  1024)
+    @test Printf.@sprintf("%0*u", 20, UInt(4294966272)) == Printf.@sprintf("%020u",  UInt(4294966272))
+    @test Printf.@sprintf("%0*o", 20, 511) == Printf.@sprintf("%020o",  511)
+    @test Printf.@sprintf("%0*o", 20, UInt(4294966785)) == Printf.@sprintf("%020o",  UInt(4294966785))
+    @test Printf.@sprintf("%0*x", 20, 305441741) == Printf.@sprintf("%020x",  305441741)
+    @test Printf.@sprintf("%0*x", 20, UInt(3989525555)) == Printf.@sprintf("%020x",  UInt(3989525555))
+    @test Printf.@sprintf("%0*X", 20, 305441741) == Printf.@sprintf("%020X",  305441741)
+    @test Printf.@sprintf("%0*X", 20, UInt(3989525555)) == Printf.@sprintf("%020X",  UInt(3989525555))
+    @test Printf.@sprintf("%#*o", 20, 511) == Printf.@sprintf("%#20o",  511)
+    @test Printf.@sprintf("%#*o", 20, UInt(4294966785)) == Printf.@sprintf("%#20o",  UInt(4294966785))
+    @test Printf.@sprintf("%#*x", 20, 305441741) == Printf.@sprintf("%#20x",  305441741)
+    @test Printf.@sprintf("%#*x", 20, UInt(3989525555)) == Printf.@sprintf("%#20x",  UInt(3989525555))
+    @test Printf.@sprintf("%#*X", 20, 305441741) == Printf.@sprintf("%#20X",  305441741)
+    @test Printf.@sprintf("%#*X", 20, UInt(3989525555)) == Printf.@sprintf("%#20X",  UInt(3989525555))
+    @test Printf.@sprintf("%#0*o", 20, 511) == Printf.@sprintf("%#020o",  511)
+    @test Printf.@sprintf("%#0*o", 20, UInt(4294966785)) == Printf.@sprintf("%#020o",  UInt(4294966785))
+    @test Printf.@sprintf("%#0*x", 20, 305441741) == Printf.@sprintf("%#020x",  305441741)
+    @test Printf.@sprintf("%#0*x", 20, UInt(3989525555)) == Printf.@sprintf("%#020x",  UInt(3989525555))
+    @test Printf.@sprintf("%#0*X", 20, 305441741) == Printf.@sprintf("%#020X",  305441741)
+    @test Printf.@sprintf("%#0*X", 20, UInt(3989525555)) == Printf.@sprintf("%#020X",  UInt(3989525555))
+    @test Printf.@sprintf("%0-*d", 20, 1024) == Printf.@sprintf("%0-20d",  1024)
+    @test Printf.@sprintf("%0-*d", 20,-1024) == Printf.@sprintf("%0-20d", -1024)
+    @test Printf.@sprintf("%0-*i", 20, 1024) == Printf.@sprintf("%0-20i",  1024)
+    @test Printf.@sprintf("%0-*i", 20,-1024) == Printf.@sprintf("%0-20i", -1024)
+    @test Printf.@sprintf("%0-*u", 20, 1024) == Printf.@sprintf("%0-20u",  1024)
+    @test Printf.@sprintf("%0-*u", 20, UInt(4294966272)) == Printf.@sprintf("%0-20u",  UInt(4294966272))
+    @test Printf.@sprintf("%-0*o", 20, 511) == Printf.@sprintf("%-020o",  511)
+    @test Printf.@sprintf("%-0*o", 20, UInt(4294966785)) == Printf.@sprintf("%-020o",  UInt(4294966785))
+    @test Printf.@sprintf("%-0*x", 20, 305441741) == Printf.@sprintf("%-020x",  305441741)
+    @test Printf.@sprintf("%-0*x", 20, UInt(3989525555)) == Printf.@sprintf("%-020x",  UInt(3989525555))
+    @test Printf.@sprintf("%-0*X", 20, 305441741) == Printf.@sprintf("%-020X",  305441741)
+    @test Printf.@sprintf("%-0*X", 20, UInt(3989525555)) == Printf.@sprintf("%-020X",  UInt(3989525555))
+    @test Printf.@sprintf("%.*d", 20, 1024) == Printf.@sprintf("%.20d",  1024)
+    @test Printf.@sprintf("%.*d", 20,-1024) == Printf.@sprintf("%.20d", -1024)
+    @test Printf.@sprintf("%.*i", 20, 1024) == Printf.@sprintf("%.20i",  1024)
+    @test Printf.@sprintf("%.*i", 20,-1024) == Printf.@sprintf("%.20i", -1024)
+    @test Printf.@sprintf("%.*u", 20, 1024) == Printf.@sprintf("%.20u",  1024)
+    @test Printf.@sprintf("%.*u", 20, UInt(4294966272)) == Printf.@sprintf("%.20u",  UInt(4294966272))
+    @test Printf.@sprintf("%.*o", 20, 511) == Printf.@sprintf("%.20o",  511)
+    @test Printf.@sprintf("%.*o", 20, UInt(4294966785)) == Printf.@sprintf("%.20o",  UInt(4294966785))
+    @test Printf.@sprintf("%.*x", 20, 305441741) == Printf.@sprintf("%.20x",  305441741)
+    @test Printf.@sprintf("%.*x", 20, UInt(3989525555)) == Printf.@sprintf("%.20x",  UInt(3989525555))
+    @test Printf.@sprintf("%.*X", 20, 305441741) == Printf.@sprintf("%.20X",  305441741)
+    @test Printf.@sprintf("%.*X", 20, UInt(3989525555)) == Printf.@sprintf("%.20X",  UInt(3989525555))
+    @test Printf.@sprintf("%*.*d", 20, 5, 1024) == Printf.@sprintf("%20.5d",  1024)
+    @test Printf.@sprintf("%*.*d", 20, 5, -1024) == Printf.@sprintf("%20.5d", -1024)
+    @test Printf.@sprintf("%*.*i", 20, 5, 1024) == Printf.@sprintf("%20.5i",  1024)
+    @test Printf.@sprintf("%*.*i", 20, 5,-1024) == Printf.@sprintf("%20.5i", -1024)
+    @test Printf.@sprintf("%*.*u", 20, 5, 1024) == Printf.@sprintf("%20.5u",  1024)
+    @test Printf.@sprintf("%*.*u", 20, 5, UInt(4294966272)) == Printf.@sprintf("%20.5u",  UInt(4294966272))
+    @test Printf.@sprintf("%*.*o", 20, 5, 511) == Printf.@sprintf("%20.5o",  511)
+    @test Printf.@sprintf("%*.*o", 20, 5, UInt(4294966785)) == Printf.@sprintf("%20.5o",  UInt(4294966785))
+    @test Printf.@sprintf("%*.*x", 20, 5, 305441741) == Printf.@sprintf("%20.5x",  305441741)
+    @test Printf.@sprintf("%*.*x", 20, 10, UInt(3989525555)) == Printf.@sprintf("%20.10x",  UInt(3989525555))
+    @test Printf.@sprintf("%*.*X", 20, 5, 305441741) == Printf.@sprintf("%20.5X",  305441741)
+    @test Printf.@sprintf("%*.*X", 20, 10, UInt(3989525555)) == Printf.@sprintf("%20.10X",  UInt(3989525555))
+    @test Printf.@sprintf("%0*.*d", 20, 5, 1024) == Printf.@sprintf("%020.5d",  1024)
+    @test Printf.@sprintf("%0*.*d", 20, 5,-1024) == Printf.@sprintf("%020.5d", -1024)
+    @test Printf.@sprintf("%0*.*i", 20, 5, 1024) == Printf.@sprintf("%020.5i",  1024)
+    @test Printf.@sprintf("%0*.*i", 20, 5,-1024) == Printf.@sprintf("%020.5i", -1024)
+    @test Printf.@sprintf("%0*.*u", 20, 5, 1024) == Printf.@sprintf("%020.5u",  1024)
+    @test Printf.@sprintf("%0*.*u", 20, 5, UInt(4294966272)) == Printf.@sprintf("%020.5u",  UInt(4294966272))
+    @test Printf.@sprintf("%0*.*o", 20, 5, 511) == Printf.@sprintf("%020.5o",  511)
+    @test Printf.@sprintf("%0*.*o", 20, 5, UInt(4294966785)) == Printf.@sprintf("%020.5o",  UInt(4294966785))
+    @test Printf.@sprintf("%0*.*x", 20, 5, 305441741) == Printf.@sprintf("%020.5x",  305441741)
+    @test Printf.@sprintf("%0*.*x", 20, 10, UInt(3989525555)) == Printf.@sprintf("%020.10x",  UInt(3989525555))
+    @test Printf.@sprintf("%0*.*X", 20, 5, 305441741) == Printf.@sprintf("%020.5X",  305441741)
+    @test Printf.@sprintf("%0*.*X", 20, 10, UInt(3989525555)) == Printf.@sprintf("%020.10X",  UInt(3989525555))
+    @test Printf.@sprintf("%*.0d", 20, 1024) == Printf.@sprintf("%20.0d",  1024)
+    @test Printf.@sprintf("%*.d", 20,-1024) == Printf.@sprintf("%20.d", -1024)
+    @test Printf.@sprintf("%*.d", 20, 0) == Printf.@sprintf("%20.d",  0)
+    @test Printf.@sprintf("%*.0i", 20, 1024) == Printf.@sprintf("%20.0i",  1024)
+    @test Printf.@sprintf("%*.i", 20,-1024) == Printf.@sprintf("%20.i", -1024)
+    @test Printf.@sprintf("%*.i", 20, 0) == Printf.@sprintf("%20.i",  0)
+    @test Printf.@sprintf("%*.u", 20, 1024) == Printf.@sprintf("%20.u",  1024)
+    @test Printf.@sprintf("%*.0u", 20, UInt(4294966272)) == Printf.@sprintf("%20.0u",  UInt(4294966272))
+    @test Printf.@sprintf("%*.u", 20, UInt(0)) == Printf.@sprintf("%20.u",  UInt(0))
+    @test Printf.@sprintf("%*.o", 20, 511) == Printf.@sprintf("%20.o",  511)
+    @test Printf.@sprintf("%*.0o", 20, UInt(4294966785)) == Printf.@sprintf("%20.0o",  UInt(4294966785))
+    @test Printf.@sprintf("%*.o", 20, UInt(0)) == Printf.@sprintf("%20.o",  UInt(0))
+    @test Printf.@sprintf("%*.x", 20, 305441741) == Printf.@sprintf("%20.x",  305441741)
+    @test Printf.@sprintf("%*.0x", 20, UInt(3989525555)) == Printf.@sprintf("%20.0x",  UInt(3989525555))
+    @test Printf.@sprintf("%*.x", 20, UInt(0)) == Printf.@sprintf("%20.x",  UInt(0))
+    @test Printf.@sprintf("%*.X", 20, 305441741) == Printf.@sprintf("%20.X",  305441741)
+    @test Printf.@sprintf("%*.0X", 20, UInt(3989525555)) == Printf.@sprintf("%20.0X",  UInt(3989525555))
+    @test Printf.@sprintf("%*.X", 20, UInt(0)) == Printf.@sprintf("%20.X",  UInt(0))
+
+    x = Ref{Int}()
+    y = Ref{Int}()
+    @test (Printf.@sprintf("%10s%n", "😉", x); Printf.@sprintf("%*s%n", 10, "😉", y); x[] == y[])
+    @test (Printf.@sprintf("%10s%n", "1234", x); Printf.@sprintf("%*s%n", 10, "1234", y); x[] == y[])
+
+end
+
 @testset "length modifiers" begin
     @test_throws Printf.InvalidFormatStringError Printf.Format("%h")
     @test_throws Printf.InvalidFormatStringError Printf.Format("%hh")
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index 1a52c1ec782de..e45f4dca9607f 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -30,7 +30,7 @@ struct RawResults
 end
 
 """
-    Profile.Allocs.@profile [sample_rate=0.0001] expr
+    Profile.Allocs.@profile [sample_rate=0.1] expr
 
 Profile allocations that happen during `expr`, returning
 both the result and and AllocResults struct.
@@ -67,7 +67,7 @@ macro profile(opts, ex)
     _prof_expr(ex, opts)
 end
 macro profile(ex)
-    _prof_expr(ex, :(sample_rate=0.0001))
+    _prof_expr(ex, :(sample_rate=0.1))
 end
 
 function _prof_expr(expr, opts)
@@ -216,9 +216,4 @@ function stacktrace_memoized(
     return stack
 end
 
-# Precompile once for the package cache.
-@assert precompile(start, ())
-@assert precompile(stop, ())
-@assert precompile(fetch, ())
-
 end
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index 518dc54c7f757..4bce0c4fecd88 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -31,26 +31,6 @@ macro profile(ex)
     end
 end
 
-# triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
-const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
-function profile_printing_listener()
-    try
-        while true
-            wait(PROFILE_PRINT_COND[])
-            peek_report[]()
-            if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
-                println(stderr, "Saving heap snapshot...")
-                fname = take_heap_snapshot()
-                println(stderr, "Heap snapshot saved to `$(fname)`")
-            end
-        end
-    catch ex
-        if !isa(ex, InterruptException)
-            @error "Profile printing listener crashed" exception=ex,catch_backtrace()
-        end
-    end
-end
-
 # An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
 function _peek_report()
     iob = IOBuffer()
@@ -74,12 +54,7 @@ Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO
 """
 set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t)
 
-precompile_script = """
-import Profile
-Profile.@profile while Profile.len_data() < 1000; rand(10,10) * rand(10,10); end
-Profile.peek_report[]()
-Profile.clear()
-"""
+
 
 ####
 #### User-level functions
@@ -150,20 +125,6 @@ function check_init()
     end
 end
 
-function __init__()
-    # Note: The profile buffer is no longer initialized during __init__ because Profile is in the sysimage,
-    # thus __init__ is called every startup. The buffer is lazily initialized the first time `@profile` is
-    # used, if not manually initialized before that.
-    @static if !Sys.iswindows()
-        # triggering a profile via signals is not implemented on windows
-        cond = Base.AsyncCondition()
-        Base.uv_unref(cond.handle)
-        PROFILE_PRINT_COND[] = cond
-        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
-        errormonitor(Threads.@spawn(profile_printing_listener()))
-    end
-end
-
 """
     clear()
 
@@ -1267,5 +1228,6 @@ end
 
 
 include("Allocs.jl")
+include("precompile.jl")
 
 end # module
diff --git a/stdlib/Profile/src/precompile.jl b/stdlib/Profile/src/precompile.jl
new file mode 100644
index 0000000000000..2d947429861a9
--- /dev/null
+++ b/stdlib/Profile/src/precompile.jl
@@ -0,0 +1,11 @@
+if ccall(:jl_generating_output, Cint, ()) == 1
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
+    precompile(Tuple{typeof(Profile._peek_report)})
+    precompile(Tuple{typeof(Profile.Allocs.start)})
+    precompile(Tuple{typeof(Profile.Allocs.stop)})
+    precompile(Tuple{typeof(Profile.Allocs.fetch)})
+end
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index a23b8f224a6cb..ce594d55863bc 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -43,14 +43,14 @@ julia> ans
 "12"
 ```
 
-In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting
-text that starts with `julia> ` into the REPL. In that case, only expressions starting with
-`julia> ` are parsed, others are removed. This makes it possible to paste a chunk of code
-that has been copied from a REPL session without having to scrub away prompts and outputs. This
-feature is enabled by default but can be disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`.
-If it is enabled, you can try it out by pasting the code block above this paragraph straight into
-the REPL. This feature does not work on the standard Windows command prompt due to its limitation
-at detecting when a paste occurs.
+In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting text
+that starts with `julia> ` into the REPL. In that case, only expressions starting with `julia> ` (as
+well as the other REPL mode prompts: `shell> `, `help?> `, `pkg>` ) are parsed, but others are
+removed. This makes it possible to paste a chunk of text that has been copied from a REPL session
+without having to scrub away prompts and outputs. This feature is enabled by default but can be
+disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. If it is enabled, you can try it
+out by pasting the code block above this paragraph straight into the REPL. This feature does not
+work on the standard Windows command prompt due to its limitation at detecting when a paste occurs.
 
 Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref).
 In particular, the `:limit` attribute is set to `true`.
@@ -617,19 +617,21 @@ julia> REPL.activate(CustomMod)
   var       8 bytes Int64
 ```
 
-## IPython mode
+## Numbered prompt
+
+It is possible to get an interface which is similar to the IPython REPL and the Mathematica notebook with numbered input prompts and output prefixes. This is done by calling `REPL.numbered_prompt!()`. If you want to have this enabled on startup, add
 
-It is possible to get an interface which is similar to the IPython REPL with numbered input prompts and output prefixes. This is done by calling `REPL.ipython_mode!()`. If you want to have this enabled on startup, add
 ```julia
 atreplinit() do repl
+    @eval import REPL
     if !isdefined(repl, :interface)
         repl.interface = REPL.setup_interface(repl)
     end
-    REPL.ipython_mode!(repl)
+    REPL.numbered_prompt!(repl)
 end
 ```
 
-to your `startup.jl` file. In `IPython` mode the variable `Out[n]` (where `n` is an integer) can be used to refer to earlier results:
+to your `startup.jl` file. In numbered prompt the variable `Out[n]` (where `n` is an integer) can be used to refer to earlier results:
 
 ```julia-repl
 In [1]: 5 + 3
@@ -644,6 +646,13 @@ Out[3]: Dict{Int64, Any} with 2 entries:
   1 => 8
 ```
 
+!!! note
+    Since all outputs from previous REPL evaluations are saved in the `Out` variable, one should be careful if they are returning many
+    large in-memory objects like arrays, since they will be protected from garbage collection so long as a reference to them remains in
+    `Out`. If you need to remove references to objects in `Out`, you can clear the entire history it stores with `empty!(Out)`, or clear
+    an individual entry with `Out[n] = nothing`.
+
+
 ## TerminalMenus
 
 TerminalMenus is a submodule of the Julia REPL and enables small, low-profile interactive menus in the terminal.
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index 0dffcc6c1e276..ff67e849fcc5a 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -323,6 +323,11 @@ function common_prefix(completions::Vector{String})
     end
 end
 
+# This is the maximum number of completions that will be displayed in a single
+# column, anything above that and multiple columns will be used. Note that this
+# does not restrict column length when multiple columns are used.
+const MULTICOLUMN_THRESHOLD = 5
+
 # Show available completions
 function show_completions(s::PromptState, completions::Vector{String})
     # skip any lines of input after the cursor
@@ -331,9 +336,12 @@ function show_completions(s::PromptState, completions::Vector{String})
     if any(Base.Fix1(occursin, '\n'), completions)
         foreach(Base.Fix1(println, terminal(s)), completions)
     else
-        colmax = 2 + maximum(length, completions; init=1) # n.b. length >= textwidth
-        num_cols = max(div(width(terminal(s)), colmax), 1)
         n = length(completions)
+        colmax = 2 + maximum(length, completions; init=1) # n.b. length >= textwidth
+
+        num_cols = min(cld(n, MULTICOLUMN_THRESHOLD),
+                       max(div(width(terminal(s)), colmax), 1))
+
         entries_per_col = cld(n, num_cols)
         idx = 0
         for _ in 1:entries_per_col
@@ -761,10 +769,11 @@ function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigi
     elseif buf.mark >= B
         buf.mark += sizeof(ins) - B + A
     end
+    ensureroom(buf, B) # handle !buf.reinit from take!
     ret = splice!(buf.data, A+1:B, codeunits(String(ins))) # position(), etc, are 0-indexed
     buf.size = buf.size + sizeof(ins) - B + A
     adjust_pos && seek(buf, position(buf) + sizeof(ins))
-    return String(ret)
+    return String(copy(ret))
 end
 
 edit_splice!(s::MIState, ins::AbstractString) = edit_splice!(s, region(s), ins)
@@ -1281,7 +1290,7 @@ end
 # compute the number of spaces from b till the next non-space on the right
 # (which can also be "end of line" or "end of buffer")
 function leadingspaces(buf::IOBuffer, b::Int)
-    ls = something(findnext(_notspace, buf.data, b+1), 0)-1
+    @views ls = something(findnext(_notspace, buf.data[1:buf.size], b+1), 0)-1
     ls == -1 && (ls = buf.size)
     ls -= b
     return ls
@@ -1357,19 +1366,22 @@ function edit_input(s, f = (filename, line, column) -> InteractiveUtils.edit(fil
         col += 1
     end
 
+    # Write current input to temp file, edit, read back
     write(filename, str)
     f(filename, line, col)
     str_mod = readchomp(filename)
     rm(filename)
-    if str != str_mod # something was changed, run the input
-        write(buf, str_mod)
-        commit_line(s)
-        :done
-    else # no change, the edit session probably unsuccessful
-        write(buf, str)
-        seek(buf, pos) # restore state from before edit
-        refresh_line(s)
+
+    # Write updated content
+    write(buf, str_mod)
+    if str == str_mod
+        # If input was not modified: reset cursor
+        seek(buf, pos)
+    else
+        # If input was modified: move cursor to end
+        move_input_end(s)
     end
+    refresh_line(s)
 end
 
 # return the identifier under the cursor, possibly with other words concatenated
@@ -2235,7 +2247,7 @@ end
 
 function move_line_end(buf::IOBuffer)
     eof(buf) && return
-    pos = findnext(isequal(UInt8('\n')), buf.data, position(buf)+1)
+    @views pos = findnext(isequal(UInt8('\n')), buf.data[1:buf.size], position(buf)+1)
     if pos === nothing
         move_input_end(buf)
         return
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index b2eb8cf63c8da..f8bb442ad6ec4 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -3,13 +3,14 @@
 """
 Run Evaluate Print Loop (REPL)
 
-    Example minimal code
-    ```
-    import REPL
-    term = REPL.Terminals.TTYTerminal("dumb", stdin, stdout, stderr)
-    repl = REPL.LineEditREPL(term, true)
-    REPL.run_repl(repl)
-    ```
+Example minimal code
+
+```julia
+import REPL
+term = REPL.Terminals.TTYTerminal("dumb", stdin, stdout, stderr)
+repl = REPL.LineEditREPL(term, true)
+REPL.run_repl(repl)
+```
 """
 module REPL
 
@@ -71,10 +72,6 @@ include("docview.jl")
 
 @nospecialize # use only declared type signatures
 
-function __init__()
-    Base.REPL_MODULE_REF[] = REPL
-end
-
 answer_color(::AbstractREPL) = ""
 
 const JULIA_PROMPT = "julia> "
@@ -152,7 +149,7 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
                 end
                 value = Core.eval(mod, ast)
                 backend.in_eval = false
-                setglobal!(mod, :ans, value)
+                setglobal!(Base.MainInclude, :ans, value)
                 put!(backend.response_channel, Pair{Any, Bool}(value, false))
             end
             break
@@ -264,7 +261,9 @@ function display(d::REPLDisplay, mime::MIME"text/plain", x)
         if d.repl isa LineEditREPL
             mistate = d.repl.mistate
             mode = LineEdit.mode(mistate)
-            LineEdit.write_output_prefix(io, mode, get(io, :color, false)::Bool)
+            if mode isa LineEdit.Prompt
+                LineEdit.write_output_prefix(io, mode, get(io, :color, false)::Bool)
+            end
         end
         get(io, :color, false)::Bool && write(io, answer_color(d.repl))
         if isdefined(d.repl, :options) && isdefined(d.repl.options, :iocontext)
@@ -294,7 +293,7 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
             Base.sigatomic_end()
             if iserr
                 val = Base.scrub_repl_backtrace(val)
-                Base.istrivialerror(val) || setglobal!(Main, :err, val)
+                Base.istrivialerror(val) || setglobal!(Base.MainInclude, :err, val)
                 Base.invokelatest(Base.display_error, errio, val)
             else
                 if val !== nothing && show_value
@@ -317,7 +316,7 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
                 println(errio, "SYSTEM (REPL): showing an error caused an error")
                 try
                     excs = Base.scrub_repl_backtrace(current_exceptions())
-                    setglobal!(Main, :err, excs)
+                    setglobal!(Base.MainInclude, :err, excs)
                     Base.invokelatest(Base.display_error, errio, excs)
                 catch e
                     # at this point, only print the name of the type as a Symbol to
@@ -1399,7 +1398,7 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
     nothing
 end
 
-module IPython
+module Numbered
 
 using ..REPL
 
@@ -1410,18 +1409,32 @@ function repl_eval_counter(hp)
 end
 
 function out_transform(@nospecialize(x), n::Ref{Int})
-    return quote
+    return Expr(:toplevel, get_usings!([], x)..., quote
         let __temp_val_a72df459 = $x
             $capture_result($n, __temp_val_a72df459)
             __temp_val_a72df459
         end
+    end)
+end
+
+function get_usings!(usings, ex)
+    # get all `using` and `import` statements which are at the top level
+    for (i, arg) in enumerate(ex.args)
+        if Base.isexpr(arg, :toplevel)
+            get_usings!(usings, arg)
+        elseif Base.isexpr(arg, [:using, :import])
+            push!(usings, popat!(ex.args, i))
+        end
     end
+    return usings
 end
 
 function capture_result(n::Ref{Int}, @nospecialize(x))
     n = n[]
-    mod = REPL.active_module()
+    mod = Base.MainInclude
     if !isdefined(mod, :Out)
+        @eval mod global Out
+        @eval mod export Out
         setglobal!(mod, :Out, Dict{Int, Any}())
     end
     if x !== getglobal(mod, :Out) && x !== nothing # remove this?
@@ -1457,15 +1470,26 @@ function __current_ast_transforms(backend)
 end
 
 
-function ipython_mode!(repl::LineEditREPL=Base.active_repl, backend=nothing)
+function numbered_prompt!(repl::LineEditREPL=Base.active_repl, backend=nothing)
     n = Ref{Int}(0)
     set_prompt(repl, n)
     set_output_prefix(repl, n)
     push!(__current_ast_transforms(backend), @nospecialize(ast) -> out_transform(ast, n))
     return
 end
+
+"""
+    Out[n]
+
+A variable referring to all previously computed values, automatically imported to the interactive prompt.
+Only defined and exists while using [Numbered prompt](@ref Numbered-prompt).
+
+See also [`ans`](@ref).
+"""
+Base.MainInclude.Out
+
 end
 
-import .IPython.ipython_mode!
+import .Numbered.numbered_prompt!
 
 end # module
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 34ce7ad9928fb..e09e3b2aa9e6b 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -4,6 +4,8 @@ module REPLCompletions
 
 export completions, shell_completions, bslash_completions, completion_text
 
+using Core: CodeInfo, MethodInstance, CodeInstance, Const
+const CC = Core.Compiler
 using Base.Meta
 using Base: propertynames, something
 
@@ -117,7 +119,8 @@ function completes_global(x, name)
 end
 
 function appendmacro!(syms, macros, needle, endchar)
-    for s in macros
+    for macsym in macros
+        s = String(macsym)
         if endswith(s, needle)
             from = nextind(s, firstindex(s))
             to = prevind(s, sizeof(s)-sizeof(needle)+1)
@@ -129,43 +132,36 @@ end
 function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString, all::Bool = false, imported::Bool = false)
     ssyms = names(mod, all = all, imported = imported)
     filter!(ffunc, ssyms)
-    syms = String[string(s) for s in ssyms]
-    macros =  filter(x -> startswith(x, "@" * name), syms)
+    macros = filter(x -> startswith(String(x), "@" * name), ssyms)
+    syms = String[sprint((io,s)->Base.show_sym(io, s; allow_macroname=true), s) for s in ssyms if completes_global(String(s), name)]
     appendmacro!(syms, macros, "_str", "\"")
     appendmacro!(syms, macros, "_cmd", "`")
-    filter!(x->completes_global(x, name), syms)
     return [ModuleCompletion(mod, sym) for sym in syms]
 end
 
 # REPL Symbol Completions
-function complete_symbol(sym::String, @nospecialize(ffunc), context_module::Module=Main)
+function complete_symbol(@nospecialize(ex), name::String, @nospecialize(ffunc), context_module::Module=Main)
     mod = context_module
-    name = sym
 
     lookup_module = true
     t = Union{}
     val = nothing
-    if something(findlast(in(non_identifier_chars), sym), 0) < something(findlast(isequal('.'), sym), 0)
-        # Find module
-        lookup_name, name = rsplit(sym, ".", limit=2)
-
-        ex = Meta.parse(lookup_name, raise=false, depwarn=false)
-
-        b, found = get_value(ex, context_module)
-        if found
-            val = b
-            if isa(b, Module)
-                mod = b
+    if ex !== nothing
+        res = repl_eval_ex(ex, context_module)
+        res === nothing && return Completion[]
+        if res isa Const
+            val = res.val
+            if isa(val, Module)
+                mod = val
                 lookup_module = true
             else
                 lookup_module = false
-                t = typeof(b)
+                t = typeof(val)
             end
-        else # If the value is not found using get_value, the expression contain an advanced expression
+        else
             lookup_module = false
-            t, found = get_type(ex, context_module)
+            t = CC.widenconst(res)
         end
-        found || return Completion[]
     end
 
     suggestions = Completion[]
@@ -404,133 +400,184 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
     return (startind:lastindex(s), method_name_end)
 end
 
-# Returns the value in a expression if sym is defined in current namespace fn.
-# This method is used to iterate to the value of a expression like:
-# :(REPL.REPLCompletions.whitespace_chars) a `dump` of this expression
-# will show it consist of Expr, QuoteNode's and Symbol's which all needs to
-# be handled differently to iterate down to get the value of whitespace_chars.
-function get_value(sym::Expr, fn)
-    if sym.head === :quote || sym.head === :inert
-        return sym.args[1], true
-    end
-    sym.head !== :. && return (nothing, false)
-    for ex in sym.args
-        ex, found = get_value(ex, fn)::Tuple{Any, Bool}
-        !found && return (nothing, false)
-        fn, found = get_value(ex, fn)::Tuple{Any, Bool}
-        !found && return (nothing, false)
-    end
-    return (fn, true)
+struct REPLInterpreterCache
+    dict::IdDict{MethodInstance,CodeInstance}
 end
-get_value(sym::Symbol, fn) = isdefined(fn, sym) ? (getfield(fn, sym), true) : (nothing, false)
-get_value(sym::QuoteNode, fn) = (sym.value, true)
-get_value(sym::GlobalRef, fn) = get_value(sym.name, sym.mod)
-get_value(sym, fn) = (sym, true)
-
-# Return the type of a getfield call expression
-function get_type_getfield(ex::Expr, fn::Module)
-    length(ex.args) == 3 || return Any, false # should never happen, but just for safety
-    fld, found = get_value(ex.args[3], fn)
-    fld isa Symbol || return Any, false
-    obj = ex.args[2]
-    objt, found = get_type(obj, fn)
-    found || return Any, false
-    objt isa DataType || return Any, false
-    hasfield(objt, fld) || return Any, false
-    return fieldtype(objt, fld), true
+REPLInterpreterCache() = REPLInterpreterCache(IdDict{MethodInstance,CodeInstance}())
+const REPL_INTERPRETER_CACHE = REPLInterpreterCache()
+
+function get_code_cache()
+    # XXX Avoid storing analysis results into the cache that persists across precompilation,
+    #     as [sys|pkg]image currently doesn't support serializing externally created `CodeInstance`.
+    #     Otherwise, `CodeInstance`s created by `REPLInterpreter``, that are much less optimized
+    #     that those produced by `NativeInterpreter`, will leak into the native code cache,
+    #     potentially causing runtime slowdown.
+    #     (see https://github.com/JuliaLang/julia/issues/48453).
+    if (@ccall jl_generating_output()::Cint) == 1
+        return REPLInterpreterCache()
+    else
+        return REPL_INTERPRETER_CACHE
+    end
 end
 
-# Determines the return type with the Compiler of a function call using the type information of the arguments.
-function get_type_call(expr::Expr, fn::Module)
-    f_name = expr.args[1]
-    f, found = get_type(f_name, fn)
-    found || return (Any, false) # If the function f is not found return Any.
-    args = Any[]
-    for i in 2:length(expr.args) # Find the type of the function arguments
-        typ, found = get_type(expr.args[i], fn)
-        found ? push!(args, typ) : push!(args, Any)
+struct REPLInterpreter <: CC.AbstractInterpreter
+    repl_frame::CC.InferenceResult
+    world::UInt
+    inf_params::CC.InferenceParams
+    opt_params::CC.OptimizationParams
+    inf_cache::Vector{CC.InferenceResult}
+    code_cache::REPLInterpreterCache
+    function REPLInterpreter(repl_frame::CC.InferenceResult;
+                             world::UInt = Base.get_world_counter(),
+                             inf_params::CC.InferenceParams = CC.InferenceParams(),
+                             opt_params::CC.OptimizationParams = CC.OptimizationParams(),
+                             inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
+                             code_cache::REPLInterpreterCache = get_code_cache())
+        return new(repl_frame, world, inf_params, opt_params, inf_cache, code_cache)
     end
-    world = Base.get_world_counter()
-    return_type = Core.Compiler.return_type(Tuple{f, args...}, world)
-    return (return_type, true)
 end
-
-# Returns the return type. example: get_type(:(Base.strip("", ' ')), Main) returns (SubString{String}, true)
-function try_get_type(sym::Expr, fn::Module)
-    val, found = get_value(sym, fn)
-    found && return Core.Typeof(val), found
-    if sym.head === :call
-        # getfield call is special cased as the evaluation of getfield provides good type information,
-        # is inexpensive and it is also performed in the complete_symbol function.
-        a1 = sym.args[1]
-        if a1 === :getfield || a1 === GlobalRef(Core, :getfield)
-            return get_type_getfield(sym, fn)
+CC.InferenceParams(interp::REPLInterpreter) = interp.inf_params
+CC.OptimizationParams(interp::REPLInterpreter) = interp.opt_params
+CC.get_world_counter(interp::REPLInterpreter) = interp.world
+CC.get_inference_cache(interp::REPLInterpreter) = interp.inf_cache
+CC.code_cache(interp::REPLInterpreter) = CC.WorldView(interp.code_cache, CC.WorldRange(interp.world))
+CC.get(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
+CC.getindex(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
+CC.haskey(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
+CC.setindex!(wvc::CC.WorldView{REPLInterpreterCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+
+# REPLInterpreter is only used for type analysis, so it should disable optimization entirely
+CC.may_optimize(::REPLInterpreter) = false
+
+# REPLInterpreter analyzes a top-level frame, so better to not bail out from it
+CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.InferenceState) = false
+
+# `REPLInterpreter` aggressively resolves global bindings to enable reasonable completions
+# for lines like `Mod.a.|` (where `|` is the cursor position).
+# Aggressive binding resolution poses challenges for the inference cache validation
+# (until https://github.com/JuliaLang/julia/issues/40399 is implemented).
+# To avoid the cache validation issues, `REPLInterpreter` only allows aggressive binding
+# resolution for top-level frame representing REPL input code (`repl_frame`) and for child
+# `getproperty` frames that are constant propagated from the `repl_frame`. This works, since
+# a.) these frames are never cached, and
+# b.) their results are only observed by the non-cached `repl_frame`.
+#
+# `REPLInterpreter` also aggressively concrete evaluate `:inconsistent` calls within
+# `repl_frame` to provide reasonable completions for lines like `Ref(Some(42))[].|`.
+# Aggressive concrete evaluation allows us to get accurate type information about complex
+# expressions that otherwise can not be constant folded, in a safe way, i.e. it still
+# doesn't evaluate effectful expressions like `pop!(xs)`.
+# Similarly to the aggressive binding resolution, aggressive concrete evaluation doesn't
+# present any cache validation issues because `repl_frame` is never cached.
+
+is_repl_frame(interp::REPLInterpreter, sv::CC.InferenceState) = interp.repl_frame === sv.result
+
+# aggressive global binding resolution within `repl_frame`
+function CC.abstract_eval_globalref(interp::REPLInterpreter, g::GlobalRef,
+                                    sv::CC.InferenceState)
+    if is_repl_frame(interp, sv)
+        if CC.isdefined_globalref(g)
+            return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
         end
-        return get_type_call(sym, fn)
-    elseif sym.head === :thunk
-        thk = sym.args[1]
-        rt = ccall(:jl_infer_thunk, Any, (Any, Any), thk::Core.CodeInfo, fn)
-        rt !== Any && return (rt, true)
-    elseif sym.head === :ref
-        # some simple cases of `expand`
-        return try_get_type(Expr(:call, GlobalRef(Base, :getindex), sym.args...), fn)
-    elseif sym.head === :. && sym.args[2] isa QuoteNode # second check catches broadcasting
-        return try_get_type(Expr(:call, GlobalRef(Core, :getfield), sym.args...), fn)
-    elseif sym.head === :toplevel || sym.head === :block
-        isempty(sym.args) && return (nothing, true)
-        return try_get_type(sym.args[end], fn)
-    elseif sym.head === :escape || sym.head === :var"hygienic-scope"
-        return try_get_type(sym.args[1], fn)
+        return Union{}
     end
-    return (Any, false)
+    return @invoke CC.abstract_eval_globalref(interp::CC.AbstractInterpreter, g::GlobalRef,
+                                              sv::CC.InferenceState)
 end
 
-try_get_type(other, fn::Module) = get_type(other, fn)
+function is_repl_frame_getproperty(interp::REPLInterpreter, sv::CC.InferenceState)
+    def = sv.linfo.def
+    def isa Method || return false
+    def.name === :getproperty || return false
+    sv.cached && return false
+    return is_repl_frame(interp, sv.parent)
+end
 
-function get_type(sym::Expr, fn::Module)
-    # try to analyze nests of calls. if this fails, try using the expanded form.
-    val, found = try_get_type(sym, fn)
-    found && return val, found
-    # https://github.com/JuliaLang/julia/issues/27184
-    if isexpr(sym, :macrocall)
-        _, found = get_type(first(sym.args), fn)
-        found || return Any, false
-    end
-    newsym = try
-        macroexpand(fn, sym; recursive=false)
-    catch e
-        # user code failed in macroexpand (ignore it)
-        return Any, false
-    end
-    val, found = try_get_type(newsym, fn)
-    if !found
-        newsym = try
-            Meta.lower(fn, sym)
-        catch e
-            # user code failed in lowering (ignore it)
-            return Any, false
+# aggressive global binding resolution for `getproperty(::Module, ::Symbol)` calls within `repl_frame`
+function CC.builtin_tfunction(interp::REPLInterpreter, @nospecialize(f),
+                              argtypes::Vector{Any}, sv::CC.InferenceState)
+    if f === Core.getglobal && is_repl_frame_getproperty(interp, sv)
+        if length(argtypes) == 2
+            a1, a2 = argtypes
+            if isa(a1, Const) && isa(a2, Const)
+                a1val, a2val = a1.val, a2.val
+                if isa(a1val, Module) && isa(a2val, Symbol)
+                    g = GlobalRef(a1val, a2val)
+                    if CC.isdefined_globalref(g)
+                        return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
+                    end
+                    return Union{}
+                end
+            end
         end
-        val, found = try_get_type(newsym, fn)
     end
-    return val, found
+    return @invoke CC.builtin_tfunction(interp::CC.AbstractInterpreter, f::Any,
+                                        argtypes::Vector{Any}, sv::CC.InferenceState)
+end
+
+# aggressive concrete evaluation for `:inconsistent` frames within `repl_frame`
+function CC.concrete_eval_eligible(interp::REPLInterpreter, @nospecialize(f),
+                                   result::CC.MethodCallResult, arginfo::CC.ArgInfo,
+                                   sv::CC.InferenceState)
+    if is_repl_frame(interp, sv)
+        neweffects = CC.Effects(result.effects; consistent=CC.ALWAYS_TRUE)
+        result = CC.MethodCallResult(result.rt, result.edgecycle, result.edgelimited,
+                                     result.edge, neweffects)
+    end
+return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any,
+                                         result::CC.MethodCallResult, arginfo::CC.ArgInfo,
+                                         sv::CC.InferenceState)
 end
 
-function get_type(sym, fn::Module)
-    val, found = get_value(sym, fn)
-    return found ? Core.Typeof(val) : Any, found
+function resolve_toplevel_symbols!(mod::Module, src::Core.CodeInfo)
+    newsrc = copy(src)
+    @ccall jl_resolve_globals_in_ir(
+        #=jl_array_t *stmts=# newsrc.code::Any,
+        #=jl_module_t *m=# mod::Any,
+        #=jl_svec_t *sparam_vals=# Core.svec()::Any,
+        #=int binding_effects=# 0::Int)::Cvoid
+    return newsrc
 end
 
-function get_type(T, found::Bool, default_any::Bool)
-    return found ? T :
-           default_any ? Any : throw(ArgumentError("argument not found"))
+# lower `ex` and run type inference on the resulting top-level expression
+function repl_eval_ex(@nospecialize(ex), context_module::Module)
+    lwr = try
+        Meta.lower(context_module, ex)
+    catch # macro expansion failed, etc.
+        return nothing
+    end
+    if lwr isa Symbol
+        return isdefined(context_module, lwr) ? Const(getfield(context_module, lwr)) : nothing
+    end
+    lwr isa Expr || return Const(lwr) # `ex` is literal
+    isexpr(lwr, :thunk) || return nothing # lowered to `Expr(:error, ...)` or similar
+    src = lwr.args[1]::Core.CodeInfo
+
+    # construct top-level `MethodInstance`
+    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    mi.specTypes = Tuple{}
+
+    mi.def = context_module
+    src = resolve_toplevel_symbols!(context_module, src)
+    @atomic mi.uninferred = src
+
+    result = CC.InferenceResult(mi)
+    interp = REPLInterpreter(result)
+    frame = CC.InferenceState(result, src, #=cache=#:no, interp)::CC.InferenceState
+
+    CC.typeinf(interp, frame)
+
+    result = frame.result.result
+    result === Union{} && return nothing # for whatever reason, callers expect this as the Bottom and/or Top type instead
+    return result
 end
 
 # Method completion on function call expression that look like :(max(1))
 MAX_METHOD_COMPLETIONS::Int = 40
 function _complete_methods(ex_org::Expr, context_module::Module, shift::Bool)
-    funct, found = get_type(ex_org.args[1], context_module)::Tuple{Any,Bool}
-    !found && return 2, funct, [], Set{Symbol}()
-
+    funct = repl_eval_ex(ex_org.args[1], context_module)
+    funct === nothing && return 2, nothing, [], Set{Symbol}()
+    funct = CC.widenconst(funct)
     args_ex, kwargs_ex, kwargs_flag = complete_methods_args(ex_org, context_module, true, true)
     return kwargs_flag, funct, args_ex, kwargs_ex
 end
@@ -635,7 +682,14 @@ function detect_args_kwargs(funargs::Vector{Any}, context_module::Module, defaul
                 # argument types
                 push!(args_ex, Any)
             else
-                push!(args_ex, get_type(get_type(ex, context_module)..., default_any))
+                argt = repl_eval_ex(ex, context_module)
+                if argt !== nothing
+                    push!(args_ex, CC.widenconst(argt))
+                elseif default_any
+                    push!(args_ex, Any)
+                else
+                    throw(ArgumentError("argument not found"))
+                end
             end
         end
     end
@@ -709,7 +763,6 @@ function close_path_completion(str, startpos, r, paths, pos)
     return lastindex(str) <= pos || str[nextind(str, pos)] != '"'
 end
 
-
 function bslash_completions(string::String, pos::Int)
     slashpos = something(findprev(isequal('\\'), string, pos), 0)
     if (something(findprev(in(bslash_separators), string, pos), 0) < slashpos &&
@@ -841,7 +894,7 @@ function complete_keyword_argument(partial, last_idx, context_module)
     end
 
     suggestions = Completion[KeywordArgumentCompletion(kwarg) for kwarg in kwargs]
-    append!(suggestions, complete_symbol(last_word, Returns(true), context_module))
+    append!(suggestions, complete_symbol(nothing, last_word, Returns(true), context_module))
 
     return sort!(suggestions, by=completion_text), wordrange
 end
@@ -862,6 +915,55 @@ function project_deps_get_completion_candidates(pkgstarts::String, project_file:
     return Completion[PackageCompletion(name) for name in loading_candidates]
 end
 
+function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ffunc::Function), context_module::Module, string::String, name::String, pos::Int, dotpos::Int, startpos::Int, comp_keywords=false)
+    ex = nothing
+    comp_keywords && append!(suggestions, complete_keyword(name))
+    if dotpos > 1 && string[dotpos] == '.'
+        s = string[1:dotpos-1]
+        # First see if the whole string up to `pos` is a valid expression. If so, use it.
+        ex = Meta.parse(s, raise=false, depwarn=false)
+        if isexpr(ex, :incomplete)
+            s = string[startpos:pos]
+            # Heuristic to find the start of the expression. TODO: This would be better
+            # done with a proper error-recovering parser.
+            if 0 < startpos <= lastindex(string) && string[startpos] == '.'
+                i = prevind(string, startpos)
+                while 0 < i
+                    c = string[i]
+                    if c in (')', ']')
+                        if c == ')'
+                            c_start = '('
+                            c_end = ')'
+                        elseif c == ']'
+                            c_start = '['
+                            c_end = ']'
+                        end
+                        frange, end_of_identifier = find_start_brace(string[1:prevind(string, i)], c_start=c_start, c_end=c_end)
+                        isempty(frange) && break # unbalanced parens
+                        startpos = first(frange)
+                        i = prevind(string, startpos)
+                    elseif c in ('\'', '\"', '\`')
+                        s = "$c$c"*string[startpos:pos]
+                        break
+                    else
+                        break
+                    end
+                    s = string[startpos:pos]
+                end
+            end
+            if something(findlast(in(non_identifier_chars), s), 0) < something(findlast(isequal('.'), s), 0)
+                lookup_name, name = rsplit(s, ".", limit=2)
+                name = String(name)
+
+                ex = Meta.parse(lookup_name, raise=false, depwarn=false)
+            end
+            isexpr(ex, :incomplete) && (ex = nothing)
+        end
+    end
+    append!(suggestions, complete_symbol(ex, name, ffunc, context_module))
+    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
+end
+
 function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true)
     # First parse everything up to the current position
     partial = string[1:pos]
@@ -905,8 +1007,25 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         length(matches)>0 && return Completion[DictCompletion(identifier, match) for match in sort!(matches)], loc::Int:pos, true
     end
 
+    ffunc = Returns(true)
+    suggestions = Completion[]
+
+    # Check if this is a var"" string macro that should be completed like
+    # an identifier rather than a string.
+    # TODO: It would be nice for the parser to give us more information here
+    # so that we can lookup the macro by identity rather than pattern matching
+    # its invocation.
+    varrange = findprev("var\"", string, pos)
+
+    if varrange !== nothing
+        ok, ret = bslash_completions(string, pos)
+        ok && return ret
+        startpos = first(varrange) + 4
+        dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
+        return complete_identifiers!(Completion[], ffunc, context_module, string,
+            string[startpos:pos], pos, dotpos, startpos)
     # otherwise...
-    if inc_tag in [:cmd, :string]
+    elseif inc_tag in [:cmd, :string]
         m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial))
         startpos = nextind(partial, reverseind(partial, m.offset))
         r = startpos:pos
@@ -953,9 +1072,8 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         startpos += length(m.match)
     end
 
-    ffunc = Returns(true)
-    suggestions = Completion[]
-    comp_keywords = true
+    name = string[max(startpos, dotpos+1):pos]
+    comp_keywords = !isempty(name) && startpos > dotpos
     if afterusing(string, startpos)
         # We're right after using or import. Let's look only for packages
         # and modules we can reach from here
@@ -997,38 +1115,11 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         ffunc = (mod,x)->(Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getfield(mod, x), Module))
         comp_keywords = false
     end
+
     startpos == 0 && (pos = -1)
     dotpos < startpos && (dotpos = startpos - 1)
-    s = string[startpos:pos]
-    comp_keywords && append!(suggestions, complete_keyword(s))
-    # if the start of the string is a `.`, try to consume more input to get back to the beginning of the last expression
-    if 0 < startpos <= lastindex(string) && string[startpos] == '.'
-        i = prevind(string, startpos)
-        while 0 < i
-            c = string[i]
-            if c in (')', ']')
-                if c == ')'
-                    c_start = '('
-                    c_end = ')'
-                elseif c == ']'
-                    c_start = '['
-                    c_end = ']'
-                end
-                frange, end_of_identifier = find_start_brace(string[1:prevind(string, i)], c_start=c_start, c_end=c_end)
-                isempty(frange) && break # unbalanced parens
-                startpos = first(frange)
-                i = prevind(string, startpos)
-            elseif c in ('\'', '\"', '\`')
-                s = "$c$c"*string[startpos:pos]
-                break
-            else
-                break
-            end
-            s = string[startpos:pos]
-        end
-    end
-    append!(suggestions, complete_symbol(s, ffunc, context_module))
-    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
+    return complete_identifiers!(suggestions, ffunc, context_module, string,
+        name, pos, dotpos, startpos, comp_keywords)
 end
 
 function shell_completions(string, pos)
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index ea663fa16007f..db28c84b07cb6 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -255,7 +255,11 @@ function summarize(binding::Binding, sig)
     else
         println(io, "No documentation found.\n")
         quot = any(isspace, sprint(print, binding)) ? "'" : ""
-        println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
+        if Base.isbindingresolved(binding.mod, binding.var)
+            println(io, "Binding ", quot, "`", binding, "`", quot, " exists, but has not been assigned a value.")
+        else
+            println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
+        end
     end
     md = Markdown.parse(seekstart(io))
     # Save metadata in the generated markdown.
@@ -475,7 +479,8 @@ function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main)
     quote
         repl_latex($io, $str)
         repl_search($io, $str, $mod)
-        $(if !isdefined(mod, s) && !haskey(keywords, s) && !Base.isoperator(s)
+        $(if !isdefined(mod, s) && !Base.isbindingresolved(mod, s) && !haskey(keywords, s) && !Base.isoperator(s)
+               # n.b. we call isdefined for the side-effect of resolving the binding, if possible
                :(repl_corrections($io, $str, $mod))
           end)
         $(_repl(s, brief))
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 87a3c289661d9..3c2be918d6bd2 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -1289,6 +1289,7 @@ const latex_symbols = Dict(
     "\\bsolhsub" => "\u27c8",  # reverse solidus preceding subset
     "\\suphsol" => "\u27c9",  # superset preceding solidus
     "\\wedgedot" => "⟑",  # and with dot
+    "\\veedot" => "⟇",  # or with dot
     "\\upin" => "⟒",  # element of opening upwards
     "\\bigbot" => "⟘",  # large up tack
     "\\bigtop" => "⟙",  # large down tack
@@ -1569,7 +1570,9 @@ const latex_symbols = Dict(
     "\\bsimilarleftarrow" => "\u2b41",  # reverse tilde operator above leftwards arrow
     "\\leftarrowbackapprox" => "\u2b42",  # leftwards arrow above reverse almost equal to
     "\\rightarrowgtr" => "\u2b43",  # rightwards arrow through greater-than
-    "\\rightarrowsupset" => "\u2b44",  # rightwards arrow through subset
+    "\\leftarrowless" => "\u2977",  # leftwards arrow through less-than
+    "\\rightarrowsupset" => "\u2b44",  # rightwards arrow through superset
+    "\\leftarrowsubset" => "\u297a",  # leftwards arrow through subset
     "\\LLeftarrow" => "\u2b45",  # leftwards quadruple arrow
     "\\RRightarrow" => "\u2b46",  # rightwards quadruple arrow
     "\\bsimilarrightarrow" => "\u2b47",  # reverse tilde operator above rightwards arrow
diff --git a/stdlib/REPL/test/lineedit.jl b/stdlib/REPL/test/lineedit.jl
index 649e294f7c07d..cf87e811508a0 100644
--- a/stdlib/REPL/test/lineedit.jl
+++ b/stdlib/REPL/test/lineedit.jl
@@ -306,21 +306,21 @@ seek(buf,0)
 
 ## edit_delete_prev_word ##
 
-buf = IOBuffer("type X\n ")
+buf = IOBuffer(Vector{UInt8}("type X\n "), read=true, write=true)
 seekend(buf)
 @test !isempty(@inferred(LineEdit.edit_delete_prev_word(buf)))
 @test position(buf) == 5
 @test buf.size == 5
 @test content(buf) == "type "
 
-buf = IOBuffer("4 +aaa+ x")
+buf = IOBuffer(Vector{UInt8}("4 +aaa+ x"), read=true, write=true)
 seek(buf,8)
 @test !isempty(LineEdit.edit_delete_prev_word(buf))
 @test position(buf) == 3
 @test buf.size == 4
 @test content(buf) == "4 +x"
 
-buf = IOBuffer("x = func(arg1,arg2 , arg3)")
+buf = IOBuffer(Vector{UInt8}("x = func(arg1,arg2 , arg3)"), read=true, write=true)
 seekend(buf)
 LineEdit.char_move_word_left(buf)
 @test position(buf) == 21
@@ -917,16 +917,26 @@ end
     @test get_last_word("a[]") == "a[]"
 end
 
-@testset "issue #45836" begin
+@testset "show_completions" begin
     term = FakeTerminal(IOBuffer(), IOBuffer(), IOBuffer())
-    promptstate = REPL.LineEdit.init_state(term, REPL.LineEdit.mode(new_state()))
+
+    function getcompletion(completions)
+        promptstate = REPL.LineEdit.init_state(term, REPL.LineEdit.mode(new_state()))
+        REPL.LineEdit.show_completions(promptstate, completions)
+        return String(take!(term.out_stream))
+    end
+
+    # When the number of completions is less than
+    # LineEdit.MULTICOLUMN_THRESHOLD, they should be in a single column.
     strings = ["abcdef", "123456", "ijklmn"]
-    REPL.LineEdit.show_completions(promptstate, strings)
-    completion = String(take!(term.out_stream))
-    @test completion == "\033[0B\n\rabcdef\r\033[8C123456\r\033[16Cijklmn\n"
-    strings2 = ["abcdef", "123456\nijklmn"]
-    promptstate = REPL.LineEdit.init_state(term, REPL.LineEdit.mode(new_state()))
-    REPL.LineEdit.show_completions(promptstate, strings2)
-    completion2 = String(take!(term.out_stream))
-    @test completion2 == "\033[0B\nabcdef\n123456\nijklmn\n"
+    @assert length(strings) < LineEdit.MULTICOLUMN_THRESHOLD
+    @test getcompletion(strings) == "\033[0B\n\rabcdef\n\r123456\n\rijklmn\n"
+
+    # But with more than the threshold there should be multiple columns
+    strings2 = repeat(["foo"], LineEdit.MULTICOLUMN_THRESHOLD + 1)
+    @test getcompletion(strings2) == "\033[0B\n\rfoo\r\e[5Cfoo\n\rfoo\r\e[5Cfoo\n\rfoo\r\e[5Cfoo\n"
+
+    # Check that newlines in completions are handled correctly (issue #45836)
+    strings3 = ["abcdef", "123456\nijklmn"]
+    @test getcompletion(strings3) == "\033[0B\nabcdef\n123456\nijklmn\n"
 end
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index edcb91defc9ab..8a6c6a3445e0a 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -1164,7 +1164,7 @@ fake_repl() do stdin_write, stdout_read, repl
     Base.wait(repltask)
 end
 
-help_result(line, mod::Module=Base) = mod.eval(REPL._helpmode(IOBuffer(), line))
+help_result(line, mod::Module=Base) = Core.eval(mod, REPL._helpmode(IOBuffer(), line))
 
 # Docs.helpmode tests: we test whether the correct expressions are being generated here,
 # rather than complete integration with Julia's REPL mode system.
@@ -1203,7 +1203,9 @@ end
 @test occursin("broadcast", sprint(show, help_result(".<=")))
 
 # Issue 39427
-@test occursin("does not exist", sprint(show, help_result(":=")))
+@test occursin("does not exist.", sprint(show, help_result(":=")))
+global some_undef_global
+@test occursin("exists,", sprint(show, help_result("some_undef_global", @__MODULE__)))
 
 # Issue #40563
 @test occursin("does not exist", sprint(show, help_result("..")))
@@ -1481,7 +1483,7 @@ fake_repl() do stdin_write, stdout_read, repl
     end
     # initialize `err` to `nothing`
     t = @async (readline(stdout_read); readuntil(stdout_read, "\e[0m\n"))
-    write(stdin_write, "global err = nothing\n")
+    write(stdin_write, "setglobal!(Base.MainInclude, :err, nothing)\n")
     wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     # generate top-level error
@@ -1600,7 +1602,7 @@ fake_repl() do stdin_write, stdout_read, repl
     @test buffercontents(LineEdit.buffer(s)) == "1234αβ56γ"
 end
 
-# Non standard output_prefix, tested via `ipython_mode!`
+# Non standard output_prefix, tested via `numbered_prompt!`
 fake_repl() do stdin_write, stdout_read, repl
     repl.interface = REPL.setup_interface(repl)
 
@@ -1609,7 +1611,7 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl; backend)
     end
 
-    REPL.ipython_mode!(repl, backend)
+    REPL.numbered_prompt!(repl, backend)
 
     global c = Base.Event(true)
     function sendrepl2(cmd, txt)
@@ -1645,6 +1647,11 @@ fake_repl() do stdin_write, stdout_read, repl
     s = sendrepl2("x_47878 = range(-1; stop = 1)\n", "-1:1")
     @test contains(s, "Out[11]: -1:1")
 
+    # Test for https://github.com/JuliaLang/julia/issues/49041
+    s = sendrepl2("using Test; @test true", "In [14]")
+    @test !contains(s, "ERROR")
+    @test contains(s, "Test Passed")
+
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index 547e5c5659d3f..b0d1ff4b5237a 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -121,7 +121,7 @@ let ex = quote
 
         const tuple = (1, 2)
 
-        test_y_array=[CompletionFoo.Test_y(rand()) for i in 1:10]
+        test_y_array=[(@__MODULE__).Test_y(rand()) for i in 1:10]
         test_dict = Dict("abc"=>1, "abcd"=>10, :bar=>2, :bar2=>9, Base=>3,
                          occursin=>4, `ls`=>5, 66=>7, 67=>8, ("q",3)=>11,
                          "α"=>12, :α=>13)
@@ -132,7 +132,12 @@ let ex = quote
         macro testcmd_cmd(s) end
         macro tϵsτcmδ_cmd(s) end
 
-        end
+        var"complicated symbol with spaces" = 5
+
+        struct WeirdNames end
+        Base.propertynames(::WeirdNames) = (Symbol("oh no!"), Symbol("oh yes!"))
+
+        end # module CompletionFoo
         test_repl_comp_dict = CompletionFoo.test_dict
         test_repl_comp_customdict = CompletionFoo.test_customdict
         test_dict_ℂ = Dict(1=>2)
@@ -153,6 +158,9 @@ test_complete_context(s, m) =  map_completion_text(@inferred(completions(s,lasti
 test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
 test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false)))
 
+test_methods_list(@nospecialize(f), tt) = map(x -> string(x.method), Base._methods_by_ftype(Base.signature_type(f, tt), 10, Base.get_world_counter()))
+
+
 module M32377 end
 test_complete_32377(s) = map_completion_text(completions(s,lastindex(s), M32377))
 
@@ -418,8 +426,9 @@ end
 let s = "CompletionFoo.test(1, 1, "
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
-    @test c[2] == string(first(methods(Main.CompletionFoo.test, Tuple{}))) # corresponding to the vararg
+    m = test_methods_list(Main.CompletionFoo.test, Tuple{Int, Int, Vararg})
+    @test c[1] == m[1]
+    @test c[2] == m[2]
     @test length(c) == 2
     # In particular, this checks that test(x::Real, y::Real) is not a valid completion
     # since it is strictly less specific than test(x::T, y::T) where T
@@ -430,7 +439,7 @@ end
 let s = "CompletionFoo.test(CompletionFoo.array,"
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Array{Int, 1}, Any})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test, Tuple{Array{Int, 1}, Any, Vararg}))
     @test length(c) == 2
     @test r == 1:18
     @test s[r] == "CompletionFoo.test"
@@ -439,7 +448,7 @@ end
 let s = "CompletionFoo.test(1,1,1,"
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Any, Any, Any})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test, Tuple{Any, Any, Any, Vararg}))
     @test length(c) == 1
     @test r == 1:18
     @test s[r] == "CompletionFoo.test"
@@ -463,7 +472,7 @@ end
 
 let s = "prevind(\"θ\",1,"
     c, r, res = test_complete(s)
-    @test c[1] == string(first(methods(prevind, Tuple{String, Int})))
+    @test c[1] == first(test_methods_list(prevind, Tuple{String, Int, Vararg}))
     @test r == 1:7
     @test s[r] == "prevind"
 end
@@ -472,7 +481,7 @@ for (T, arg) in [(String,"\")\""),(Char, "')'")]
     s = "(1, CompletionFoo.test2($arg,"
     c, r, res = test_complete(s)
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test2, Tuple{T,})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test2, Tuple{T, Vararg}))
     @test r == 5:23
     @test s[r] == "CompletionFoo.test2"
 end
@@ -480,19 +489,19 @@ end
 let s = "(1, CompletionFoo.test2(`')'`,"
     c, r, res = test_complete(s)
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test2, Tuple{Cmd})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test2, Tuple{Cmd, Vararg}))
 end
 
 let s = "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat,"
     c, r, res = test_complete(s)
     @test !res
-    @test_broken only(c) == string(first(methods(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64})))
+    @test_broken only(c) == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
 end
 
 let s = "CompletionFoo.test3([1.,2.], 1.,"
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
     @test r == 1:19
     @test length(c) == 1
     @test s[r] == "CompletionFoo.test3"
@@ -501,7 +510,7 @@ end
 let s = "CompletionFoo.test4(\"e\",r\" \","
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test4, Tuple{String, Regex})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test4, Tuple{String, Regex, Vararg}))
     @test r == 1:19
     @test length(c) == 1
     @test s[r] == "CompletionFoo.test4"
@@ -512,7 +521,7 @@ end
 let s = "CompletionFoo.test5(broadcast((x,y)->x==y, push!(Base.split(\"\",' '),\"\",\"\"), \"\"),"
     c, r, res = test_complete(s)
     @test !res
-    @test_broken only(c) == string(first(methods(Main.CompletionFoo.test5, Tuple{BitArray{1}})))
+    @test_broken only(c) == first(test_methods_list(Main.CompletionFoo.test5, Tuple{BitArray{1}, Vararg}))
 end
 
 # test partial expression expansion
@@ -520,14 +529,14 @@ let s = "CompletionFoo.test5(Bool[x==1 for x=1:4],"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test5, Tuple{Array{Bool,1}})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test5, Tuple{Array{Bool,1}, Vararg}))
 end
 
 let s = "CompletionFoo.test4(CompletionFoo.test_y_array[1]()[1], CompletionFoo.test_y_array[1]()[2], "
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test4, Tuple{String, String})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test4, Tuple{String, String, Vararg}))
 end
 
 # Test that string escaping is handled correct
@@ -548,22 +557,17 @@ let s = "convert("
     @test length(c2) > REPL.REPLCompletions.MAX_METHOD_COMPLETIONS
 end
 
-########## Test where the current inference logic fails ########
-# Fails due to inference fails to determine a concrete type for arg 1
-# But it returns AbstractArray{T,N} and hence is able to remove test5(x::Float64) from the suggestions
-let s = "CompletionFoo.test5(AbstractArray[[]][1],"
+let s = "CompletionFoo.test5(AbstractArray[Bool[]][1],"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 2
+    @test length(c) == 1
 end
 
-# equivalent to above but due to the time macro the completion fails to find the concrete type
-let s = "CompletionFoo.test3(@time([1, 2] + CompletionFoo.varfloat),"
+let s = "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat),"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 2
 end
-#################################################################
 
 # method completions with kwargs
 let s = "CompletionFoo.kwtest( "
@@ -780,7 +784,7 @@ end
 let s = "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], "
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 4
+    @test length(c) == 2
     @test all(startswith("test10("), c)
     @test allunique(c)
     @test !any(str->occursin("test10(a::Integer, b::Integer, c)", str), c)
@@ -790,7 +794,7 @@ end
 let s = "CompletionFoo.test11(Integer[false][1], Integer[14][1], "
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 4
+    @test length(c) == 3
     @test all(startswith("test11("), c)
     @test allunique(c)
 end
@@ -798,16 +802,16 @@ end
 let s = "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6,"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 3
+    @test length(c) == 2
     @test any(str->occursin("test11(a::Integer, b, c)", str), c)
     @test any(str->occursin("test11(u, v::Integer, w)", str), c)
-    @test any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
+    @test !any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
 end
 
 let s = "CompletionFoo.test11(3, 4,"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 4
+    @test length(c) == 2
     @test any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
     @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
 end
@@ -1606,11 +1610,17 @@ let s = ":(function foo(::Int) end).args[1].args[2]."
     @test c == Any[]
 end
 
-let s = "log(log.(x),"
+let s = "log(log.(varfloat),"
     c, r = test_complete_foo(s)
     @test !isempty(c)
 end
 
+# TODO: this is a bad test
+#let s = "log(log.(noexist),"
+#    c, r = test_complete_foo(s)
+#    @test isempty(c)
+#end
+
 let s = "Base.return_types(getin"
     c, r = test_complete_foo(s)
     @test "getindex" in c
@@ -1626,9 +1636,10 @@ end
 let s = "test(1,1, "
     c, r, res = test_complete_foo(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
-    @test c[2] == string(first(methods(Main.CompletionFoo.test, Tuple{})))  # corresponding to the vararg
-    @test length(c) == 2
+    m = test_methods_list(Main.CompletionFoo.test, Tuple{Int, Int, Vararg})
+    @test length(m) == 2 == length(c)
+    @test c[1] == m[1]
+    @test c[2] == m[2]
     # In particular, this checks that test(x::Real, y::Real) is not a valid completion
     # since it is strictly less specific than test(x::T, y::T) where T
     @test r == 1:4
@@ -1647,7 +1658,7 @@ end
 
 let s = "prevind(\"θ\",1,"
     c, r, res = test_complete_foo(s)
-    @test c[1] == string(first(methods(prevind, Tuple{String, Int})))
+    @test c[1] == first(test_methods_list(prevind, Tuple{String, Int, Vararg}))
     @test r == 1:7
     @test s[r] == "prevind"
 end
@@ -1698,8 +1709,7 @@ end
 @testset "https://github.com/JuliaLang/julia/issues/40247" begin
     # getfield type completion can work for complicated expression
 
-    let
-        m = Module()
+    let m = Module()
         @eval m begin
             struct Rs
                 rs::Vector{Regex}
@@ -1716,8 +1726,7 @@ end
         @test length(c) == fieldcount(Regex)
     end
 
-    let
-        m = Module()
+    let m = Module()
         @eval m begin
             struct R
                 r::Regex
@@ -1739,10 +1748,8 @@ end
     end
 end
 
-
 @testset "https://github.com/JuliaLang/julia/issues/47593" begin
-    let
-        m = Module()
+    let m = Module()
         @eval m begin
             struct TEST_47594
                 var"("::Int
@@ -1754,3 +1761,78 @@ end
         @test c == Any["var\"(\""]
     end
 end
+
+# https://github.com/JuliaLang/julia/issues/36437
+struct Issue36437{T}
+    v::T
+end
+Base.propertynames(::Issue36437) = (:a, :b, :c)
+function Base.getproperty(v::Issue36437, s::Symbol)
+    if s === :a
+        return 1
+    elseif s === :b
+        return 2
+    elseif s === :c
+        return getfield(v, :v)
+    else
+        throw(ArgumentError(lazy"`(v::Issue36437).$s` is not supported"))
+    end
+end
+
+let s = "Issue36437(42)."
+    c, r, res = test_complete_context(s, @__MODULE__)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+let s = "Some(Issue36437(42)).value."
+    c, r, res = test_complete_context(s, @__MODULE__)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+# aggressive concrete evaluation on mutable allocation in `repl_frame`
+let s = "Ref(Issue36437(42))[]."
+    c, r, res = test_complete_context(s, @__MODULE__)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+    @test "v" ∉ c
+end
+
+const global_xs = [Some(42)]
+let s = "pop!(global_xs)."
+    c, r, res = test_complete_context(s, @__MODULE__)
+    @test res
+    @test "value" in c
+end
+@test length(global_xs) == 1 # the completion above shouldn't evaluate `pop!` call
+
+# Test completion of var"" identifiers (#49280)
+let s = "var\"complicated "
+    c, r = test_complete_foo(s)
+    @test c == Any["var\"complicated symbol with spaces\""]
+end
+
+for s in ("WeirdNames().var\"oh ", "WeirdNames().var\"")
+    c, r = test_complete_foo(s)
+    @test c == Any["var\"oh no!\"", "var\"oh yes!\""]
+end
+
+# Test completion of non-Expr literals
+let s = "\"abc\"."
+    c, r = test_complete(s)
+    # (no completion, but shouldn't error)
+    @test isempty(c)
+end
+
+let s = "`abc`.e"
+    c, r = test_complete(s)
+    # (completions for the fields of `Cmd`)
+    @test c == Any["env", "exec"]
+end
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index 199dcab940c86..f32fc3e2a4f84 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -2,7 +2,6 @@ name = "Random"
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [deps]
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
 [extras]
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
index 5b8aa4644d140..3be276ad23754 100644
--- a/stdlib/Random/src/Xoshiro.jl
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -113,12 +113,17 @@ struct TaskLocalRNG <: AbstractRNG end
 TaskLocalRNG(::Nothing) = TaskLocalRNG()
 rng_native_52(::TaskLocalRNG) = UInt64
 
-function setstate!(x::TaskLocalRNG, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+function setstate!(
+    x::TaskLocalRNG,
+    s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64, # xoshiro256 state
+    s4::UInt64 = 1s0 + 3s1 + 5s2 + 7s3, # internal splitmix state
+)
     t = current_task()
     t.rngState0 = s0
     t.rngState1 = s1
     t.rngState2 = s2
     t.rngState3 = s3
+    t.rngState4 = s4
     x
 end
 
@@ -128,11 +133,11 @@ end
     tmp = s0 + s3
     res = ((tmp << 23) | (tmp >> 41)) + s0
     t = s1 << 17
-    s2 = xor(s2, s0)
-    s3 = xor(s3, s1)
-    s1 = xor(s1, s2)
-    s0 = xor(s0, s3)
-    s2 = xor(s2, t)
+    s2 ⊻= s0
+    s3 ⊻= s1
+    s1 ⊻= s2
+    s0 ⊻= s3
+    s2 ⊻= t
     s3 = s3 << 45 | s3 >> 19
     task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
     res
@@ -159,7 +164,7 @@ seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, make_seed(s
 @inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
     first = rand(rng, UInt64)
     second = rand(rng,UInt64)
-    second + UInt128(first)<<64
+    second + UInt128(first) << 64
 end
 
 @inline rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{Int128}) = rand(rng, UInt128) % Int128
@@ -178,14 +183,14 @@ end
 
 function copy!(dst::TaskLocalRNG, src::Xoshiro)
     t = current_task()
-    t.rngState0, t.rngState1, t.rngState2, t.rngState3 = src.s0, src.s1, src.s2, src.s3
-    dst
+    setstate!(dst, src.s0, src.s1, src.s2, src.s3)
+    return dst
 end
 
 function copy!(dst::Xoshiro, src::TaskLocalRNG)
     t = current_task()
-    dst.s0, dst.s1, dst.s2, dst.s3 = t.rngState0, t.rngState1, t.rngState2, t.rngState3
-    dst
+    setstate!(dst, t.rngState0, t.rngState1, t.rngState2, t.rngState3)
+    return dst
 end
 
 function ==(a::Xoshiro, b::TaskLocalRNG)
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index 4e8d3b4ffb39a..3f570d862b743 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -1018,3 +1018,50 @@ guardseed() do
         @test f42752(true) === val
     end
 end
+
+@testset "TaskLocalRNG: stream collision smoke test" begin
+    # spawn a trinary tree of tasks:
+    # - spawn three recursive child tasks in each
+    # - generate a random UInt64 in each before, after and between
+    # - collect and count all the generated random values
+    # these should all be distinct across all tasks
+    function gen(d)
+        r = rand(UInt64)
+        vals = [r]
+        if d ≥ 0
+            append!(vals, gent(d - 1))
+            isodd(r) && append!(vals, gent(d - 1))
+            push!(vals, rand(UInt64))
+            iseven(r) && append!(vals, gent(d - 1))
+        end
+        push!(vals, rand(UInt64))
+    end
+    gent(d) = fetch(@async gen(d))
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        vals = gen(6)
+        @test allunique(vals)
+    end
+end
+
+@testset "TaskLocalRNG: child doesn't affect parent" begin
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        x = rand(UInt64)
+        y = rand(UInt64)
+        n = 3
+        for i = 1:n
+            Random.seed!(seed)
+            @sync for j = 0:i
+                @async rand(UInt64)
+            end
+            @test x == rand(UInt64)
+            @sync for j = 0:(n-i)
+                @async rand(UInt64)
+            end
+            @test y == rand(UInt64)
+        end
+    end
+end
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index a45523bc94d7d..dd901d6910abf 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -39,7 +39,7 @@ const TAGS = Any[
     Float16, Float32, Float64, Char, DataType, Union, UnionAll, Core.TypeName, Tuple,
     Array, Expr, LineNumberNode, :__LabelNode__, GotoNode, QuoteNode, CodeInfo, TypeVar,
     Core.Box, Core.MethodInstance, Module, Task, String, SimpleVector, Method,
-    GlobalRef, SlotNumber, TypedSlot, NewvarNode, SSAValue,
+    GlobalRef, SlotNumber, Const, NewvarNode, SSAValue,
 
     # dummy entries for tags that don't correspond directly to types
     Symbol, # UNDEFREF_TAG
@@ -77,15 +77,14 @@ const TAGS = Any[
     (Int64(0):Int64(n_int_literals-1))...
 ]
 
-@assert length(TAGS) == 255
+const NTAGS = length(TAGS)
+@assert NTAGS == 255
 
-const ser_version = 22 # do not make changes without bumping the version #!
+const ser_version = 23 # do not make changes without bumping the version #!
 
 format_version(::AbstractSerializer) = ser_version
 format_version(s::Serializer) = s.version
 
-const NTAGS = length(TAGS)
-
 function sertag(@nospecialize(v))
     # NOTE: we use jl_value_ptr directly since we know at least one of the arguments
     # in the comparison below is a singleton.
@@ -194,7 +193,7 @@ serialize(s::AbstractSerializer, ::Tuple{}) = writetag(s.io, EMPTYTUPLE_TAG)
 
 function serialize(s::AbstractSerializer, t::Tuple)
     l = length(t)
-    if l <= 255
+    if l <= NTAGS
         writetag(s.io, TUPLE_TAG)
         write(s.io, UInt8(l))
     else
@@ -224,7 +223,7 @@ function serialize(s::AbstractSerializer, x::Symbol)
     if len > 7
         serialize_cycle(s, x) && return
     end
-    if len <= 255
+    if len <= NTAGS
         writetag(s.io, SYMBOL_TAG)
         write(s.io, UInt8(len))
     else
@@ -295,7 +294,7 @@ function serialize(s::AbstractSerializer, ss::String)
         serialize_cycle(s, ss) && return
         writetag(s.io, SHARED_REF_TAG)
     end
-    if len <= 255
+    if len <= NTAGS
         writetag(s.io, STRING_TAG)
         write(s.io, UInt8(len))
     else
@@ -327,7 +326,7 @@ end
 function serialize(s::AbstractSerializer, ex::Expr)
     serialize_cycle(s, ex) && return
     l = length(ex.args)
-    if l <= 255
+    if l <= NTAGS
         writetag(s.io, EXPR_TAG)
         write(s.io, UInt8(l))
     else
@@ -1060,7 +1059,6 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         if template !== nothing
             # TODO: compress template
             meth.source = template::CodeInfo
-            meth.pure = template.pure
             if !@isdefined(slot_syms)
                 slot_syms = ccall(:jl_compress_argnames, Ref{String}, (Any,), meth.source.slotnames)
             end
@@ -1191,7 +1189,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         end
     end
     ci.propagate_inbounds = deserialize(s)
-    ci.pure = deserialize(s)
+    if format_version(s) < 23
+        deserialize(s) # `pure` field has been removed
+    end
     if format_version(s) >= 20
         ci.has_fcall = deserialize(s)
     end
@@ -1368,7 +1368,7 @@ function deserialize_typename(s::AbstractSerializer, number)
                     end
                 else
                     # old object format -- try to forward from old to new
-                    @eval Core.kwcall(kwargs, f::$ty, args...) = $kws(kwargs, f, args...)
+                    @eval Core.kwcall(kwargs::NamedTuple, f::$ty, args...) = $kws(kwargs, f, args...)
                 end
             end
         end
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index 347369ea45bc4..d4a548daef5d7 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = 4eaa4582569a76c3199849d8194582d948b7a70f
+SPARSEARRAYS_SHA1 = 8affe9e499379616e33fc60a24bb31500e8423d7
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index 22857e138655a..27197b12be54c 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,4 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = e9ac70b760dcf87b77affe6c068548a3325d6e2b
+STATISTICS_SHA1 = a3feba2bb63f06b7f40024185e9fa5f6385e2510
 STATISTICS_GIT_URL := https://github.com/JuliaStats/Statistics.jl.git
 STATISTICS_TAR_URL = https://api.github.com/repos/JuliaStats/Statistics.jl/tarball/$1
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
index 2940970ceff9f..6b87d417fc2a8 100644
--- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -14,31 +14,31 @@ export libamd, libbtf, libcamd, libccolamd, libcholmod, libcolamd, libklu, libld
 # Man I can't wait until these are automatically handled by an in-Base JLLWrappers clone.
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libamd_handle = C_NULL
-libamd_path = ""
-libbtf_handle = C_NULL
-libbtf_path = ""
-libcamd_handle = C_NULL
-libcamd_path = ""
-libccolamd_handle = C_NULL
-libccolamd_path = ""
-libcholmod_handle = C_NULL
-libcholmod_path = ""
-libcolamd_handle = C_NULL
-libcolamd_path = ""
-libklu_handle = C_NULL
-libklu_path = ""
-libldl_handle = C_NULL
-libldl_path = ""
-librbio_handle = C_NULL
-librbio_path = ""
-libspqr_handle = C_NULL
-libspqr_path = ""
-libsuitesparseconfig_handle = C_NULL
-libsuitesparseconfig_path = ""
-libumfpack_handle = C_NULL
-libumfpack_path = ""
+artifact_dir::String = ""
+libamd_handle::Ptr{Cvoid} = C_NULL
+libamd_path::String = ""
+libbtf_handle::Ptr{Cvoid} = C_NULL
+libbtf_path::String = ""
+libcamd_handle::Ptr{Cvoid} = C_NULL
+libcamd_path::String = ""
+libccolamd_handle::Ptr{Cvoid} = C_NULL
+libccolamd_path::String = ""
+libcholmod_handle::Ptr{Cvoid} = C_NULL
+libcholmod_path::String = ""
+libcolamd_handle::Ptr{Cvoid} = C_NULL
+libcolamd_path::String = ""
+libklu_handle::Ptr{Cvoid} = C_NULL
+libklu_path::String = ""
+libldl_handle::Ptr{Cvoid} = C_NULL
+libldl_path::String = ""
+librbio_handle::Ptr{Cvoid} = C_NULL
+librbio_path::String = ""
+libspqr_handle::Ptr{Cvoid} = C_NULL
+libspqr_path::String = ""
+libsuitesparseconfig_handle::Ptr{Cvoid} = C_NULL
+libsuitesparseconfig_path::String = ""
+libumfpack_handle::Ptr{Cvoid} = C_NULL
+libumfpack_path::String = ""
 
 if Sys.iswindows()
     const libamd = "libamd.dll"
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index f5bef8344f64f..1fa9f97405504 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -171,7 +171,8 @@ function print_table(f::MbyFunc, io::IO, a::AbstractDict,
         end
         if is_table(value)
             push!(ks, String(key))
-            header = isempty(value) || !all(is_tabular(v) for v in values(value))::Bool
+            _values = @invokelatest values(value)
+            header = isempty(value) || !all(is_tabular(v) for v in _values)::Bool
             if header
                 # print table
                 first_block || println(io)
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index 1db2f1ab967f1..1c9a55480d2c9 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -55,6 +55,7 @@ julia> @test foo("f") == 20
 Test Failed at none:1
   Expression: foo("f") == 20
    Evaluated: 1 == 20
+
 ERROR: There was an error during testing
 ```
 
@@ -224,6 +225,7 @@ julia> @test 1 ≈ 0.999999
 Test Failed at none:1
   Expression: 1 ≈ 0.999999
    Evaluated: 1 ≈ 0.999999
+
 ERROR: There was an error during testing
 ```
 You can specify relative and absolute tolerances by setting the `rtol` and `atol` keyword arguments of `isapprox`, respectively,
@@ -258,6 +260,16 @@ in the test set reporting. The test will not run but gives a `Broken` `Result`.
 Test.@test_skip
 ```
 
+## Test result types
+
+```@docs
+Test.Result
+Test.Pass
+Test.Fail
+Test.Error
+Test.Broken
+```
+
 ## Creating Custom `AbstractTestSet` Types
 
 Packages can create their own `AbstractTestSet` subtypes by implementing the `record` and `finish`
@@ -393,13 +405,13 @@ using Test
 
 @testset "Example tests" begin
 
-	@testset "Math tests" begin
-		include("math_tests.jl")
-	end
+    @testset "Math tests" begin
+        include("math_tests.jl")
+    end
 
-	@testset "Greeting tests" begin
-		include("greeting_tests.jl")
-	end
+    @testset "Greeting tests" begin
+        include("greeting_tests.jl")
+    end
 end
 ```
 
@@ -414,16 +426,16 @@ Using our knowledge of `Test.jl`, here are some example tests we could add to `m
 
 ```julia
 @testset "Testset 1" begin
-	@test 2 == simple_add(1, 1)
-	@test 3.5 == simple_add(1, 2.5)
+    @test 2 == simple_add(1, 1)
+    @test 3.5 == simple_add(1, 2.5)
         @test_throws MethodError simple_add(1, "A")
         @test_throws MethodError simple_add(1, 2, 3)
 end
 
 @testset "Testset 2" begin
-	@test 1.0 == type_multiply(1.0, 1.0)
+    @test 1.0 == type_multiply(1.0, 1.0)
         @test isa(type_multiply(2.0, 2.0), Float64)
-	@test_throws MethodError type_multiply(1, 2.5)
+    @test_throws MethodError type_multiply(1, 2.5)
 end
 ```
 
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index f1216371d0b27..392b736c09837 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -47,30 +47,64 @@ const FAIL_FAST = Ref{Bool}(false)
 
 # Backtrace utility functions
 function ip_has_file_and_func(ip, file, funcs)
-    return any(fr -> (string(fr.file) == file && fr.func in funcs), StackTraces.lookup(ip))
+    return any(fr -> (in_file(fr, file) && fr.func in funcs), StackTraces.lookup(ip))
 end
+in_file(frame, file) = string(frame.file) == file
 
-function scrub_backtrace(bt)
+function test_location(bt, file_ts, file_t)
+    if (isnothing(file_ts) || isnothing(file_t))
+        return macrocall_location(bt, something(file_ts, @__FILE__))
+    else
+        return test_callsite(bt, file_ts, file_t)
+    end
+end
+
+function test_callsite(bt, file_ts, file_t)
+    # We avoid duplicate calls to `StackTraces.lookup`, as it is an expensive call.
+    # For that, we retrieve locations from lower to higher stack elements
+    # and only traverse parts of the backtrace which we haven't traversed before.
+    # The order will always be <internal functions> -> `@test` -> `@testset`.
+    internal = @something(macrocall_location(bt, @__FILE__), return nothing)
+    test = internal - 1 + @something(findfirst(ip -> any(frame -> in_file(frame, file_t), StackTraces.lookup(ip)), @view bt[internal:end]), return nothing)
+    testset = test - 1 + @something(macrocall_location(@view(bt[test:end]), file_ts), return nothing)
+
+    # If stacktrace locations differ, include frames until the `@testset` appears.
+    test != testset && return testset
+    # `@test` and `@testset` occurred at the same stacktrace location.
+    # This may happen if `@test` occurred directly in scope of the testset,
+    # or if `@test` occurred in a function that has been inlined in the testset.
+    frames = StackTraces.lookup(bt[testset])
+    outer_frame = findfirst(frame -> in_file(frame, file_ts) && frame.func == Symbol("macro expansion"), frames)
+    isnothing(outer_frame) && return nothing
+    # The `@test` call occurred directly in scope of a `@testset`.
+    # The __source__ from `@test` will be printed in the test message upon failure.
+    # There is no need to include more frames, but always include at least the internal macrocall location in the stacktrace.
+    in_file(frames[outer_frame], file_t) && return internal
+    # The `@test` call was inlined, so we still need to include the callsite.
+    return testset
+end
+
+macrocall_location(bt, file) = findfirst(ip -> ip_has_file_and_func(ip, file, (Symbol("macro expansion"),)), bt)
+
+function scrub_backtrace(bt, file_ts, file_t)
     do_test_ind = findfirst(ip -> ip_has_file_and_func(ip, @__FILE__, (:do_test, :do_test_throws)), bt)
     if do_test_ind !== nothing && length(bt) > do_test_ind
         bt = bt[do_test_ind + 1:end]
     end
-    name_ind = findfirst(ip -> ip_has_file_and_func(ip, @__FILE__, (Symbol("macro expansion"),)), bt)
-    if name_ind !== nothing && length(bt) != 0
-        bt = bt[1:name_ind]
-    end
+    stop_at = test_location(bt, file_ts, file_t)
+    !isnothing(stop_at) && !isempty(bt) && return bt[1:stop_at]
     return bt
 end
 
-function scrub_exc_stack(stack)
-    return Any[ (x[1], scrub_backtrace(x[2]::Vector{Union{Ptr{Nothing},Base.InterpreterIP}})) for x in stack ]
+function scrub_exc_stack(stack, file_ts, file_t)
+    return Any[ (x[1], scrub_backtrace(x[2]::Vector{Union{Ptr{Nothing},Base.InterpreterIP}}, file_ts, file_t)) for x in stack ]
 end
 
 # define most of the test infrastructure without type specialization
 @nospecialize
 
 """
-    Result
+    Test.Result
 
 All tests produce a result object. This object may or may not be
 stored, depending on whether the test is part of a test set.
@@ -78,7 +112,7 @@ stored, depending on whether the test is part of a test set.
 abstract type Result end
 
 """
-    Pass
+    Test.Pass <: Test.Result
 
 The test condition was true, i.e. the expression evaluated to true or
 the correct exception was thrown.
@@ -108,7 +142,7 @@ function Base.show(io::IO, t::Pass)
 end
 
 """
-    Fail
+    Test.Fail <: Test.Result
 
 The test condition was false, i.e. the expression evaluated to false or
 the correct exception was not thrown.
@@ -165,10 +199,11 @@ function Base.show(io::IO, t::Fail)
             print(io, "\n     Context: ", t.context)
         end
     end
+    println(io) # add some visual space to separate sequential failures
 end
 
 """
-    Error
+    Test.Error <: Test.Result
 
 The test condition couldn't be evaluated due to an exception, or
 it evaluated to something other than a [`Bool`](@ref).
@@ -184,7 +219,7 @@ struct Error <: Result
 
     function Error(test_type::Symbol, orig_expr, value, bt, source::LineNumberNode)
         if test_type === :test_error
-            bt = scrub_exc_stack(bt)
+            bt = scrub_exc_stack(bt, nothing, extract_file(source))
         end
         if test_type === :test_error || test_type === :nontest_error
             bt_str = try # try the latest world for this, since we might have eval'd new code for show
@@ -249,7 +284,7 @@ function Base.show(io::IO, t::Error)
 end
 
 """
-    Broken
+    Test.Broken <: Test.Result
 
 The test condition is the expected (failed) result of a broken test,
 or was explicitly skipped with `@test_skip`.
@@ -713,7 +748,7 @@ Test Passed
 
 julia> @test_throws "Try sqrt(Complex" sqrt(-1)
 Test Passed
-     Message: "DomainError with -1.0:\\nsqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
+     Message: "DomainError with -1.0:\\nsqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
 ```
 
 In the final example, instead of matching a single string it could alternatively have been performed with:
@@ -1012,8 +1047,9 @@ mutable struct DefaultTestSet <: AbstractTestSet
     time_start::Float64
     time_end::Union{Float64,Nothing}
     failfast::Bool
+    file::Union{String,Nothing}
 end
-function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing)
+function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing, source = nothing)
     if isnothing(failfast)
         # pass failfast state into child testsets
         parent_ts = get_testset()
@@ -1023,8 +1059,11 @@ function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming:
             failfast = false
         end
     end
-    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast)
+    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast, extract_file(source))
 end
+extract_file(source::LineNumberNode) = extract_file(source.file)
+extract_file(file::Symbol) = string(file)
+extract_file(::Nothing) = nothing
 
 struct FailFastError <: Exception end
 
@@ -1035,14 +1074,14 @@ record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
 
 # For the other result types, immediately print the error message
 # but do not terminate. Print a backtrace.
-function record(ts::DefaultTestSet, t::Union{Fail, Error})
-    if TESTSET_PRINT_ENABLE[]
+function record(ts::DefaultTestSet, t::Union{Fail, Error}; print_result::Bool=TESTSET_PRINT_ENABLE[])
+    if print_result
         print(ts.description, ": ")
         # don't print for interrupted tests
         if !(t isa Error) || t.test_type !== :test_interrupted
             print(t)
             if !isa(t, Error) # if not gets printed in the show method
-                Base.show_backtrace(stdout, scrub_backtrace(backtrace()))
+                Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)))
             end
             println()
         end
@@ -1127,7 +1166,7 @@ const TESTSET_PRINT_ENABLE = Ref(true)
 
 # Called at the end of a @testset, behaviour depends on whether
 # this is a child of another testset, or the "root" testset
-function finish(ts::DefaultTestSet)
+function finish(ts::DefaultTestSet; print_results::Bool=TESTSET_PRINT_ENABLE[])
     ts.time_end = time()
     # If we are a nested test set, do not print a full summary
     # now - let the parent test set do the printing
@@ -1144,7 +1183,7 @@ function finish(ts::DefaultTestSet)
     total_broken = broken + c_broken
     total = total_pass + total_fail + total_error + total_broken
 
-    if TESTSET_PRINT_ENABLE[]
+    if print_results
         print_test_results(ts)
     end
 
@@ -1405,6 +1444,7 @@ julia> @testset let logi = log(im)
 Test Failed at none:3
   Expression: !(iszero(real(logi)))
      Context: logi = 0.0 + 1.5707963267948966im
+
 ERROR: There was an error during testing
 ```
 """
@@ -1487,7 +1527,11 @@ function testset_beginend_call(args, tests, source)
     ex = quote
         _check_testset($testsettype, $(QuoteNode(testsettype.args[1])))
         local ret
-        local ts = $(testsettype)($desc; $options...)
+        local ts = if ($testsettype === $DefaultTestSet) && $(isa(source, LineNumberNode))
+            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options...)
+        else
+            $(testsettype)($desc; $options...)
+        end
         push_testset(ts)
         # we reproduce the logic of guardseed, but this function
         # cannot be used as it changes slightly the semantic of @testset,
@@ -1583,7 +1627,11 @@ function testset_forloop(args, testloop, source)
             copy!(RNG, tmprng)
 
         end
-        ts = $(testsettype)($desc; $options...)
+        ts = if ($testsettype === $DefaultTestSet) && $(isa(source, LineNumberNode))
+            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options...)
+        else
+            $(testsettype)($desc; $options...)
+        end
         push_testset(ts)
         first_iteration = false
         try
@@ -1970,54 +2018,11 @@ function detect_unbound_args(mods...;
     return collect(ambs)
 end
 
-# find if var will be constrained to have a definite value
-# in any concrete leaftype subtype of typ
-function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool)
-    typ === var && return true
-    while typ isa UnionAll
-        covariant && constrains_param(var, typ.var.ub, covariant) && return true
-        # typ.var.lb doesn't constrain var
-        typ = typ.body
-    end
-    if typ isa Union
-        # for unions, verify that both options would constrain var
-        ba = constrains_param(var, typ.a, covariant)
-        bb = constrains_param(var, typ.b, covariant)
-        (ba && bb) && return true
-    elseif typ isa DataType
-        # return true if any param constrains var
-        fc = length(typ.parameters)
-        if fc > 0
-            if typ.name === Tuple.name
-                # vararg tuple needs special handling
-                for i in 1:(fc - 1)
-                    p = typ.parameters[i]
-                    constrains_param(var, p, covariant) && return true
-                end
-                lastp = typ.parameters[fc]
-                vararg = Base.unwrap_unionall(lastp)
-                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
-                    constrains_param(var, vararg.N, covariant) && return true
-                    # T = vararg.parameters[1] doesn't constrain var
-                else
-                    constrains_param(var, lastp, covariant) && return true
-                end
-            else
-                for i in 1:fc
-                    p = typ.parameters[i]
-                    constrains_param(var, p, false) && return true
-                end
-            end
-        end
-    end
-    return false
-end
-
 function has_unbound_vars(@nospecialize sig)
     while sig isa UnionAll
         var = sig.var
         sig = sig.body
-        if !constrains_param(var, sig, true)
+        if !Core.Compiler.constrains_param(var, sig, #=covariant=#true, #=type_constrains=#true)
             return true
         end
     end
@@ -2137,5 +2142,6 @@ function _check_bitarray_consistency(B::BitArray{N}) where N
 end
 
 include("logging.jl")
+include("precompile.jl")
 
 end # module
diff --git a/stdlib/Test/src/precompile.jl b/stdlib/Test/src/precompile.jl
new file mode 100644
index 0000000000000..2cb2fb7f3f0c6
--- /dev/null
+++ b/stdlib/Test/src/precompile.jl
@@ -0,0 +1,9 @@
+redirect_stdout(devnull) do
+    @testset "example" begin
+        @test 1 == 1
+        @test_throws ErrorException error()
+        @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
+        @test_broken 1 == 2
+        @test 1 ≈ 1.0000000000000001
+    end
+end
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index ac643e0ccfca2..0388e2107e098 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -722,6 +722,115 @@ end
     rm(f; force=true)
 end
 
+@testset "provide informative location in backtrace for test failures" begin
+    win2unix(filename) = replace(filename, "\\" => '/')
+    utils = win2unix(tempname())
+    write(utils,
+    """
+    function test_properties2(value)
+        @test isodd(value)
+    end
+    """)
+
+    included = win2unix(tempname())
+    write(included,
+    """
+    @testset "Other tests" begin
+        @test 1 + 1 == 3
+        test_properties2(2)
+    end
+    test_properties2(8)
+
+    # Test calls to `@test` and `@testset` with no file/lineno information (__source__ == nothing).
+    eval(Expr(:macrocall, Symbol("@test"), nothing, :false))
+    eval(Expr(:macrocall, Symbol("@testset"), nothing, "Testset without source", quote
+        @test false
+        @test error("failed")
+    end))
+    """)
+
+    runtests = win2unix(tempname())
+    write(runtests,
+    """
+    using Test
+
+    include("$utils")
+
+    function test_properties(value)
+        @test isodd(value)
+    end
+
+    @testset "Tests" begin
+        test_properties(8)
+        @noinline test_properties(8)
+        test_properties2(8)
+
+        include("$included")
+    end
+    """)
+    msg = read(pipeline(ignorestatus(`$(Base.julia_cmd()) --startup-file=no --color=no $runtests`), stderr=devnull), String)
+    msg = win2unix(msg)
+    regex = r"((?:Tests|Other tests|Testset without source): Test Failed (?:.|\n)*?)\n\nStacktrace:(?:.|\n)*?(?=\n(?:Tests|Other tests))"
+    failures = map(eachmatch(regex, msg)) do m
+        m = match(r"(Tests|Other tests|Testset without source): .*? at (.*?)\n  Expression: (.*)(?:.|\n)*\n+Stacktrace:\n((?:.|\n)*)", m.match)
+        (; testset = m[1], source = m[2], ex = m[3], stacktrace = m[4])
+    end
+    @test length(failures) == 8 # 8 failed tests
+    @test count(contains("Error During Test"), split(msg, '\n')) == 1 # 1 error
+    test_properties_macro_source = runtests * ":6"
+    test_properties2_macro_source = utils * ":2"
+
+    fail = failures[1]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Tests" && fail.source == test_properties_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 2 # @testset + test
+
+    fail = failures[2]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Tests" && fail.source == test_properties_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":11"), lines) == 1 # test
+
+    fail = failures[3]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Tests" && fail.source == test_properties2_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":12"), lines) == 1 # test
+
+    fail = failures[4]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 5
+    @test fail.testset == "Other tests" && fail.source == included * ":2" && fail.ex == "1 + 1 == 3"
+    @test count(contains(included * ":2"), lines) == 2 # @testset + test
+    @test count(contains(runtests * ":10"), lines) == 0 # @testset (stop at the innermost testset)
+
+    fail = failures[5]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Other tests" && fail.source == test_properties2_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(included * ":2"), lines) == 1 # @testset
+    @test count(contains(included * ":3"), lines) == 1 # test
+    @test count(contains(runtests * ":10"), lines) == 0 # @testset (stop at the innermost testset)
+
+    fail = failures[6]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 8
+    @test fail.testset == "Tests" && fail.source == test_properties2_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":14"), lines) == 1 # include
+    @test count(contains(included * ":5"), lines) == 1 # test
+
+    fail = failures[7]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 9
+    @test fail.testset == "Tests" && fail.source == "none:0" && fail.ex == "false"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":14"), lines) == 1 # include
+    @test count(contains(included * ":8"), lines) == 1 # test
+
+    fail = failures[8]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 5
+    @test fail.testset == "Testset without source" && fail.source == included * ":10" && fail.ex == "false"
+    @test count(contains(included * ":10"), lines) == 2 # @testset + test
+    @test count(contains(runtests * ":10"), lines) == 0 # @testset (stop at the innermost testset)
+end
+
 let io = IOBuffer()
     exc = Test.TestSetException(1,2,3,4,Vector{Union{Test.Error, Test.Fail}}())
     Base.showerror(io, exc, backtrace())
diff --git a/stdlib/Zlib_jll/src/Zlib_jll.jl b/stdlib/Zlib_jll/src/Zlib_jll.jl
index c05e26c4c6993..ea381b8b0683c 100644
--- a/stdlib/Zlib_jll/src/Zlib_jll.jl
+++ b/stdlib/Zlib_jll/src/Zlib_jll.jl
@@ -13,9 +13,9 @@ export libz
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libz_handle = C_NULL
-libz_path = ""
+artifact_dir::String = ""
+libz_handle::Ptr{Cvoid} = C_NULL
+libz_path::String = ""
 
 if Sys.iswindows()
     const libz = "libz.dll"
diff --git a/stdlib/dSFMT_jll/src/dSFMT_jll.jl b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
index f1d6d019faf59..35ada23778a94 100644
--- a/stdlib/dSFMT_jll/src/dSFMT_jll.jl
+++ b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
@@ -14,9 +14,9 @@ export libdSFMT
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libdSFMT_handle = C_NULL
-libdSFMT_path = ""
+artifact_dir::String = ""
+libdSFMT_handle::Ptr{Cvoid} = C_NULL
+libdSFMT_path::String = ""
 
 if Sys.iswindows()
     const libdSFMT = "libdSFMT.dll"
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index 8332d68102f8e..87519e5a824b0 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "14.0.6+0"
+version = "15.0.7+5"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
index 331600eab6523..3140dc3989a72 100644
--- a/stdlib/libLLVM_jll/src/libLLVM_jll.jl
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -14,16 +14,16 @@ export libLLVM
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libLLVM_handle = C_NULL
-libLLVM_path = ""
+artifact_dir::String = ""
+libLLVM_handle::Ptr{Cvoid} = C_NULL
+libLLVM_path::String = ""
 
 if Sys.iswindows()
-    const libLLVM = "libLLVM-14jl.dll"
+    const libLLVM = "$(Base.libllvm_name).dll"
 elseif Sys.isapple()
     const libLLVM = "@rpath/libLLVM.dylib"
 else
-    const libLLVM = "libLLVM-14jl.so"
+    const libLLVM = "$(Base.libllvm_name).so"
 end
 
 function __init__()
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 08ba2e266512d..4699baa7dad23 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.4.0+0"
+version = "5.8.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.8"
+julia = "1.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
index 108b7d6558079..49e7932a6b701 100644
--- a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
+++ b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
@@ -14,10 +14,11 @@ export libblastrampoline
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libblastrampoline_handle = C_NULL
-libblastrampoline_path = ""
+artifact_dir::String = ""
+libblastrampoline_handle::Ptr{Cvoid} = C_NULL
+libblastrampoline_path::String = ""
 
+# NOTE: keep in sync with `Base.libblas_name` and `Base.liblapack_name`.
 const libblastrampoline = if Sys.iswindows()
     "libblastrampoline-5.dll"
 elseif Sys.isapple()
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
index 32ea7d0f34134..b8a9394c50e37 100644
--- a/stdlib/nghttp2_jll/Project.toml
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "nghttp2_jll"
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.48.0+0"
+version = "1.52.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/nghttp2_jll/src/nghttp2_jll.jl b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
index 09af350636943..76e8d3582c402 100644
--- a/stdlib/nghttp2_jll/src/nghttp2_jll.jl
+++ b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
@@ -13,9 +13,9 @@ export libnghttp2
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libnghttp2_handle = C_NULL
-libnghttp2_path = ""
+artifact_dir::String = ""
+libnghttp2_handle::Ptr{Cvoid} = C_NULL
+libnghttp2_path::String = ""
 
 if Sys.iswindows()
     const libnghttp2 = "libnghttp2-14.dll"
diff --git a/stdlib/nghttp2_jll/test/runtests.jl b/stdlib/nghttp2_jll/test/runtests.jl
index d752251f98ebc..2f9af6d6a3338 100644
--- a/stdlib/nghttp2_jll/test/runtests.jl
+++ b/stdlib/nghttp2_jll/test/runtests.jl
@@ -11,5 +11,5 @@ end
 
 @testset "nghttp2_jll" begin
     info = unsafe_load(ccall((:nghttp2_version,libnghttp2), Ptr{nghttp2_info}, (Cint,), 0))
-    @test VersionNumber(unsafe_string(info.version_str)) == v"1.48.0"
+    @test VersionNumber(unsafe_string(info.version_str)) == v"1.52.0"
 end
diff --git a/stdlib/p7zip_jll/src/p7zip_jll.jl b/stdlib/p7zip_jll/src/p7zip_jll.jl
index 4320003b282f7..01f26de936e78 100644
--- a/stdlib/p7zip_jll/src/p7zip_jll.jl
+++ b/stdlib/p7zip_jll/src/p7zip_jll.jl
@@ -13,8 +13,8 @@ export p7zip
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-p7zip_path = ""
+artifact_dir::String = ""
+p7zip_path::String = ""
 if Sys.iswindows()
     const p7zip_exe = "7z.exe"
 else
@@ -69,8 +69,8 @@ end
 
 function init_p7zip_path()
     # Prefer our own bundled p7zip, but if we don't have one, pick it up off of the PATH
-    # If this is an in-tree build, `7z` will live in `bin`.  Otherwise, it'll be in `libexec`
-    for bundled_p7zip_path in (joinpath(Sys.BINDIR, Base.LIBEXECDIR, p7zip_exe),
+    # If this is an in-tree build, `7z` will live in `bindir`.  Otherwise, it'll be in `private_libexecdir`
+    for bundled_p7zip_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, p7zip_exe),
                                joinpath(Sys.BINDIR, p7zip_exe))
         if isfile(bundled_p7zip_path)
             global p7zip_path = abspath(bundled_p7zip_path)
diff --git a/sysimage.mk b/sysimage.mk
index b426a74454b1d..7ed61d471a153 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -74,7 +74,7 @@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAH
 	@mv $@.tmp $@
 
 define sysimg_builder
-$$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji
+$$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji $$(JULIAHOME)/contrib/generate_precompile.jl
 	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
 	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
 			JULIA_NUM_THREADS=1 \
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 4928f35f5fad0..c5ff97deb6777 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -520,9 +520,6 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test convert(Matrix, Y) == Y
     @test convert(Matrix, view(Y, 1:2, 1:2)) == Y
     @test_throws MethodError convert(Matrix, X)
-
-    # convert(::Type{Union{}}, A::AbstractMatrix)
-    @test_throws MethodError convert(Union{}, X)
 end
 
 mutable struct TestThrowNoGetindex{T} <: AbstractVector{T} end
@@ -1160,8 +1157,9 @@ Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S
             Ps = Strider{Int, 3}(vec(A), 1, strides(A)[collect(perm)], sz[collect(perm)])
             @test pointer(Ap) == pointer(Sp) == pointer(Ps)
             for i in 1:length(Ap)
-                # This is intentionally disabled due to ambiguity
-                @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i)
+                # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`.
+                # But only evaluate one iteration as broken to reduce test report noise
+                i == 1 && @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i)
                 @test P[i] == Ap[i] == Sp[i] == Ps[i]
             end
             Pv = view(P, idxs[collect(perm)]...)
@@ -1180,8 +1178,9 @@ Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S
             Svp = Base.PermutedDimsArray(Sv, perm)
             @test pointer(Avp) == pointer(Svp)
             for i in 1:length(Avp)
-                # This is intentionally disabled due to ambiguity
-                @test_broken pointer(Avp, i) == pointer(Svp, i)
+                # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`
+                # But only evaluate one iteration as broken to reduce test report noise
+                i == 1 && @test_broken pointer(Avp, i) == pointer(Svp, i)
                 @test Ip[i] == Vp[i] == Avp[i] == Svp[i]
             end
         end
@@ -1220,8 +1219,9 @@ end
         Ps = Strider{Int, 2}(vec(A), 1, strides(A)[collect(perm)], sz[collect(perm)])
         @test pointer(Ap) == pointer(Sp) == pointer(Ps) == pointer(At) == pointer(Aa)
         for i in 1:length(Ap)
-            # This is intentionally disabled due to ambiguity
-            @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i) == pointer(At, i) == pointer(Aa, i) == pointer(St, i) == pointer(Sa, i)
+            # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`
+            # But only evaluate one iteration as broken to reduce test report noise
+            i == 1 && @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i) == pointer(At, i) == pointer(Aa, i) == pointer(St, i) == pointer(Sa, i)
             @test pointer(Ps, i) == pointer(At, i) == pointer(Aa, i) == pointer(St, i) == pointer(Sa, i)
             @test P[i] == Ap[i] == Sp[i] == Ps[i] == At[i] == Aa[i] == St[i] == Sa[i]
         end
@@ -1247,8 +1247,9 @@ end
         Svp = Base.PermutedDimsArray(Sv, perm)
         @test pointer(Avp) == pointer(Svp) == pointer(Avt) == pointer(Ava)
         for i in 1:length(Avp)
-            # This is intentionally disabled due to ambiguity
-            @test_broken pointer(Avp, i) == pointer(Svp, i) == pointer(Avt, i) == pointer(Ava, i) == pointer(Svt, i) == pointer(Sva, i)
+            # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`
+            # But only evaluate one iteration as broken to reduce test report noise
+            i == 1 && @test_broken pointer(Avp, i) == pointer(Svp, i) == pointer(Avt, i) == pointer(Ava, i) == pointer(Svt, i) == pointer(Sva, i)
             @test pointer(Avt, i) == pointer(Ava, i) == pointer(Svt, i) == pointer(Sva, i)
             @test Vp[i] == Avp[i] == Svp[i] == Avt[i] == Ava[i] == Svt[i] == Sva[i]
         end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index e96954299b702..5056fc626e84a 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -46,8 +46,8 @@ let err = try
     @test occursin("Possible fix, define\n  ambig(::Integer, ::Integer)", errstr)
 end
 
-ambig_with_bounds(x, ::Int, ::T) where {T<:Integer,S} = 0
-ambig_with_bounds(::Int, x, ::T) where {T<:Integer,S} = 1
+@test_warn "declares type variable S but does not use it" @eval ambig_with_bounds(x, ::Int, ::T) where {T<:Integer,S} = 0
+@test_warn "declares type variable S but does not use it" @eval ambig_with_bounds(::Int, x, ::T) where {T<:Integer,S} = 1
 let err = try
               ambig_with_bounds(1, 2, 3)
           catch _e_
@@ -177,12 +177,10 @@ ambs = detect_ambiguities(Ambig48312)
         @test good
     end
 
-    # some ambiguities involving Union{} type parameters are expected, but not required
+    # some ambiguities involving Union{} type parameters may be expected, but not required
     let ambig = Set(detect_ambiguities(Core; recursive=true, ambiguous_bottom=true))
-        m1 = which(Core.Compiler.convert, Tuple{Type{<:Core.IntrinsicFunction}, Any})
-        m2 = which(Core.Compiler.convert, Tuple{Type{<:Nothing}, Any})
-        pop!(ambig, (m1, m2))
         @test !isempty(ambig)
+        @test length(ambig) < 30
     end
 
     STDLIB_DIR = Sys.STDLIB
@@ -357,7 +355,7 @@ f35983(::Type, ::Type) = 2
 @test length(Base.methods(f35983, (Any, Any))) == 2
 @test first(Base.methods(f35983, (Any, Any))).sig == Tuple{typeof(f35983), Type, Type}
 let ambig = Ref{Int32}(0)
-    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
     @test ms isa Vector
     @test length(ms) == 1
     @test ambig[] == 0
@@ -366,7 +364,7 @@ f35983(::Type{Int16}, ::Any) = 3
 @test length(Base.methods_including_ambiguous(f35983, (Type, Type))) == 2
 @test length(Base.methods(f35983, (Type, Type))) == 1
 let ambig = Ref{Int32}(0)
-    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
     @test ms isa Vector
     @test length(ms) == 2
     @test ambig[] == 1
@@ -374,18 +372,29 @@ end
 
 struct B38280 <: Real; val; end
 let ambig = Ref{Int32}(0)
-    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, nothing, 1, typemax(UInt), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, nothing, 1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
     @test ms isa Vector
     @test length(ms) == 1
     @test ambig[] == 1
 end
 
+fnoambig(::Int,::Int) = 1
+fnoambig(::Int,::Any) = 2
+fnoambig(::Any,::Int) = 3
+fnoambig(::Any,::Any) = 4
+let has_ambig = Ref(Int32(0))
+    ms = Base._methods_by_ftype(Tuple{typeof(fnoambig), Any, Any}, nothing, 4, Base.get_world_counter(), false, Ref(typemin(UInt)), Ref(typemax(UInt)), has_ambig)
+    @test ms isa Vector
+    @test length(ms) == 4
+    @test has_ambig[] == 0
+end
+
 # issue #11407
 f11407(::Dict{K,V}, ::Dict{Any,V}) where {K,V} = 1
 f11407(::Dict{K,V}, ::Dict{K,Any}) where {K,V} = 2
 @test_throws MethodError f11407(Dict{Any,Any}(), Dict{Any,Any}()) # ambiguous
 @test f11407(Dict{Any,Int}(), Dict{Any,Int}()) == 1
-f11407(::Dict{Any,Any}, ::Dict{Any,Any}) where {K,V} = 3
+@test_warn "declares type variable V but does not use it" @eval f11407(::Dict{Any,Any}, ::Dict{Any,Any}) where {K,V} = 3
 @test f11407(Dict{Any,Any}(), Dict{Any,Any}()) == 3
 
 # issue #12814
@@ -399,8 +408,9 @@ end
 
 # issue #43040
 module M43040
+   using Test
    struct C end
-   stripType(::Type{C}) where {T} = C # where {T} is intentionally incorrect
+   @test_warn "declares type variable T but does not use it" @eval M43040 stripType(::Type{C}) where {T} = C # where {T} is intentionally incorrect
 end
 
 @test isempty(detect_ambiguities(M43040; recursive=true))
diff --git a/test/arrayops.jl b/test/arrayops.jl
index e7ac6a1132568..770cec3705038 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -1705,6 +1705,39 @@ end
     @test istriu([1 2 0; 0 4 1])
 end
 
+#issue 49021
+@testset "reverse cartesian indices" begin
+    @test reverse(CartesianIndices((2, 3))) === CartesianIndices((2:-1:1, 3:-1:1))
+    @test reverse(CartesianIndices((2:5, 3:7))) === CartesianIndices((5:-1:2, 7:-1:3))
+    @test reverse(CartesianIndices((5:-1:2, 7:-1:3))) === CartesianIndices((2:1:5, 3:1:7))
+end
+
+@testset "reverse cartesian indices dim" begin
+    A = CartesianIndices((2, 3, 5:-1:1))
+    @test reverse(A, dims=1) === CartesianIndices((2:-1:1, 3, 5:-1:1))
+    @test reverse(A, dims=3) === CartesianIndices((2, 3, 1:1:5))
+    @test_throws ArgumentError reverse(A, dims=0)
+    @test_throws ArgumentError reverse(A, dims=4)
+end
+
+@testset "reverse cartesian indices multiple dims" begin
+    A = CartesianIndices((2, 3, 5:-1:1))
+    @test reverse(A, dims=(1, 3)) === CartesianIndices((2:-1:1, 3, 1:1:5))
+    @test reverse(A, dims=(3, 1)) === CartesianIndices((2:-1:1, 3, 1:1:5))
+    @test_throws ArgumentError reverse(A, dims=(1, 2, 4))
+    @test_throws ArgumentError reverse(A, dims=(0, 1, 2))
+    @test_throws ArgumentError reverse(A, dims=(1, 1))
+end
+
+@testset "stability of const propagation" begin
+    A = CartesianIndices((2, 3, 5:-1:1))
+    f1(x) = reverse(x; dims=1)
+    f2(x) = reverse(x; dims=(1, 3))
+    @test @inferred(f1(A)) === CartesianIndices((2:-1:1, 3, 5:-1:1))
+    @test @inferred(f2(A)) === CartesianIndices((2:-1:1, 3, 1:1:5))
+    @test @inferred(reverse(A; dims=())) === A
+end
+
 # issue 4228
 let A = [[i i; i i] for i=1:2]
     @test cumsum(A) == Any[[1 1; 1 1], [3 3; 3 3]]
diff --git a/test/binaryplatforms.jl b/test/binaryplatforms.jl
index 793a9b1f06a41..8de522e9c6c8b 100644
--- a/test/binaryplatforms.jl
+++ b/test/binaryplatforms.jl
@@ -315,8 +315,9 @@ end
         P("x86_64", "linux"; libgfortran_version=v"5") => "linux8",
 
         # Ambiguity test
-        P("aarch64", "linux"; libgfortran_version=v"3") => "linux4",
+        P("aarch64", "linux"; libgfortran_version=v"3") => "linux3",
         P("aarch64", "linux"; libgfortran_version=v"3", libstdcxx_version=v"3.4.18") => "linux5",
+        P("aarch64", "linux"; libgfortran_version=v"3", libstdcxx_version=v"3.4.18", foo="bar") => "linux9",
 
         # OS test
         P("x86_64", "macos"; libgfortran_version=v"3") => "mac4",
@@ -327,8 +328,9 @@ end
     @test select_platform(platforms, P("x86_64", "linux"; libgfortran_version=v"4")) == "linux7"
 
     # Ambiguity test
-    @test select_platform(platforms, P("aarch64", "linux")) == "linux5"
-    @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"3")) == "linux5"
+    @test select_platform(platforms, P("aarch64", "linux")) == "linux3"
+    @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"3")) == "linux3"
+    @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"3", libstdcxx_version=v"3.4.18")) === "linux5"
     @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"4")) === nothing
 
     @test select_platform(platforms, P("x86_64", "macos")) == "mac4"
@@ -339,6 +341,22 @@ end
 
     # Sorry, Alex. ;)
     @test select_platform(platforms, P("x86_64", "freebsd")) === nothing
+
+    # The new "most complete match" algorithm deals with ambiguities as follows:
+    platforms = Dict(
+        P("x86_64", "linux") => "normal",
+        P("x86_64", "linux"; sanitize="memory") => "sanitized",
+    )
+    @test select_platform(platforms, P("x86_64", "linux")) == "normal"
+    @test select_platform(platforms, P("x86_64", "linux"; sanitize="memory")) == "sanitized"
+
+    # Ties are broken by reverse-sorting by triplet:
+    platforms = Dict(
+        P("x86_64", "linux"; libgfortran_version=v"3") => "libgfortran3",
+        P("x86_64", "linux"; libgfortran_version=v"4") => "libgfortran4",
+    )
+    @test select_platform(platforms, P("x86_64", "linux")) == "libgfortran4"
+    @test select_platform(platforms, P("x86_64", "linux"; libgfortran_version=v"3")) == "libgfortran3"
 end
 
 @testset "Custom comparators" begin
diff --git a/test/bitarray.jl b/test/bitarray.jl
index dd1d0d7d6c5a4..5d0bff62ab6e1 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -1652,7 +1652,7 @@ timesofar("cat")
     @test ((svdb1, svdb1A) = (svd(b1), svd(Array(b1)));
             svdb1.U == svdb1A.U && svdb1.S == svdb1A.S && svdb1.V == svdb1A.V)
     @test ((qrb1, qrb1A) = (qr(b1), qr(Array(b1)));
-            qrb1.Q == qrb1A.Q && qrb1.R == qrb1A.R)
+            Matrix(qrb1.Q) == Matrix(qrb1A.Q) && qrb1.R == qrb1A.R)
 
     b1 = bitrand(v1)
     @check_bit_operation diagm(0 => b1) BitMatrix
diff --git a/test/bitset.jl b/test/bitset.jl
index ca8e06adc1ec4..f8c5d3fffd7d2 100644
--- a/test/bitset.jl
+++ b/test/bitset.jl
@@ -38,9 +38,12 @@ end
     @test !in(1,s)
     @test in(2,s)
     @test !in(10002,s)
-    @test in(10000,s)
+    @test in(UInt128(10000),s)
+    @test in(Int32(10000),s)
     @test in(10000.0,s)
     @test !in(10002.0,s)
+    @test !in(typemax(UInt), s)
+    @test !in(typemin(Int)-Int128(14), s)
     @test_throws ArgumentError first(BitSet())
     @test_throws ArgumentError last(BitSet())
     t = copy(s)
@@ -157,13 +160,16 @@ end
     for n in -20:0
         @test length(delete!(s, n)) == len
     end
+    @test length(delete!(s, typemax(UInt))) == len
     @test pop!(s, 1) === 1
     @test !(1 in s)
     @test_throws KeyError pop!(s, 1)
     @test_throws KeyError pop!(s, -1)
     @test pop!(s, -1, 1) === 1
     @test pop!(s, 1, 0) === 0
-    @test s === delete!(s, 1)
+    @test 5 in s
+    @test s === delete!(s, 1) === delete!(s, Int8(5))
+    @test !(5 in s)
     for i in s; pop!(s, i); end
     @test isempty(s)
     push!(s, 100)
@@ -348,8 +354,8 @@ end
     x = BitSet(rand(-1000:1000, 500))
     y = copy(x)
     @test union!(x, BitSet(a:b)) == union!(y, BitSet(a:1:b))
-    @test_throws ArgumentError BitSet(Int128(typemin(Int))-1:typemin(Int))
-    @test_throws ArgumentError BitSet(typemax(Int):Int128(typemax(Int))+1)
+    @test_throws InexactError BitSet(Int128(typemin(Int))-1:typemin(Int))
+    @test_throws InexactError BitSet(typemax(Int):Int128(typemax(Int))+1)
     # union! with an empty range doesn't modify the BitSet
     @test union!(x, b:a) == y
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 41ca604cb50e4..87858dd0f08fc 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -880,7 +880,7 @@ let
     @test Broadcast.broadcasted(+, AD1(rand(3)), AD2(rand(3))) isa Broadcast.Broadcasted{Broadcast.ArrayConflict}
     @test Broadcast.broadcasted(+, AD1(rand(3)), AD2(rand(3))) isa Broadcast.Broadcasted{<:Broadcast.AbstractArrayStyle{Any}}
 
-    @test @inferred(Base.IteratorSize(Broadcast.broadcasted((1,2,3),a1,zeros(3,3,3)))) === Base.HasShape{3}()
+    @test @inferred(Base.IteratorSize(Broadcast.broadcasted(+, (1,2,3), a1, zeros(3,3,3)))) === Base.HasShape{3}()
 
     # inference on nested
     bc = Base.broadcasted(+, AD1(randn(3)), AD1(randn(3)))
diff --git a/test/cartesian.jl b/test/cartesian.jl
index 772ce259c7d24..ed33f2c1035f7 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -515,6 +515,12 @@ end
 f39705() = Base.Cartesian.@nany 0 _ -> true
 @test f39705() === false
 
+@testset "Cartesian @nall macro test" begin
+    i_1, i_2, i_3 = 1, 2, 3;
+    @test Base.Cartesian.@nall 2 d->(i_d <= 2)
+    @test !Base.Cartesian.@nall 3 d->(i_d <= 2)
+end
+
 @testset "CartesianIndices with Bool" begin
     @test @inferred(CartesianIndices((true,))) == CartesianIndices((1,))
     @test @inferred(CartesianIndices((false,))) == CartesianIndices((0,))
diff --git a/test/ccall.jl b/test/ccall.jl
index d88e667b55c72..0266dabd6332b 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -802,7 +802,7 @@ if cfunction_closure
 verbose && println("Testing cfunction closures: ")
 
 # helper Type for testing that constructors work
-# with cfucntion and that object identity is preserved
+# with cfunction and that object identity is preserved
 mutable struct IdentityTestKV{K, V}
     (T::Type{<:IdentityTestKV})(S) = (@test T === S; T)
 end
@@ -1516,6 +1516,12 @@ end
 @test_throws(ErrorException("ccall return type struct fields cannot contain a reference"),
              @eval ccall(:fn, typeof(Ref("")), ()))
 
+fn45187() = nothing
+
+@test_throws(TypeError, @eval ccall(nothing, Cvoid, ()))
+@test_throws(TypeError, @eval ccall(49142, Cvoid, ()))
+@test_throws(TypeError, @eval ccall((:fn, fn45187), Cvoid, ()))
+
 # test for malformed syntax errors
 @test Expr(:error, "more arguments than types for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (), x)))
 @test Expr(:error, "more arguments than types for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (B,), x, y)))
@@ -1910,6 +1916,12 @@ end
     function cglobal33413_literal_notype()
         return cglobal(:sin)
     end
+    function cglobal49142_nothing()
+        return cglobal(nothing)
+    end
+    function cglobal45187fn()
+        return cglobal((:fn, fn45187))
+    end
     @test unsafe_load(cglobal33413_ptrvar()) == 1
     @test unsafe_load(cglobal33413_ptrinline()) == 1
     @test unsafe_load(cglobal33413_tupleliteral()) == 1
@@ -1918,6 +1930,10 @@ end
     @test unsafe_load(convert(Ptr{Cint}, cglobal33413_tupleliteral_notype())) == 1
     @test cglobal33413_literal() != C_NULL
     @test cglobal33413_literal_notype() != C_NULL
+    @test_throws(TypeError, cglobal49142_nothing())
+    @test_throws(TypeError, cglobal45187fn())
+    @test_throws(TypeError, @eval cglobal(nothing))
+    @test_throws(TypeError, @eval cglobal((:fn, fn45187)))
 end
 
 @testset "ccall_effects" begin
diff --git a/test/channels.jl b/test/channels.jl
index eb82a20686ae9..dbda5cf069081 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -311,6 +311,7 @@ end
 
 @testset "timedwait on multiple channels" begin
     Experimental.@sync begin
+        sync = Channel(1)
         rr1 = Channel(1)
         rr2 = Channel(1)
         rr3 = Channel(1)
@@ -320,20 +321,17 @@ end
         @test !callback()
         @test timedwait(callback, 0) === :timed_out
 
-        @async begin sleep(0.5); put!(rr1, :ok) end
+        @async begin put!(sync, :ready); sleep(0.5); put!(rr1, :ok) end
         @async begin sleep(1.0); put!(rr2, :ok) end
-        @async begin sleep(2.0); put!(rr3, :ok) end
+        @async begin @test take!(rr3) == :done end
 
+        @test take!(sync) == :ready
         et = @elapsed timedwait(callback, 1)
 
-        # assuming that 0.5 seconds is a good enough buffer on a typical modern CPU
-        try
-            @assert (et >= 1.0) && (et <= 1.5)
-            @assert !isready(rr3)
-        catch
-            @warn "`timedwait` tests delayed. et=$et, isready(rr3)=$(isready(rr3))"
-        end
+        @test et >= 1.0
+
         @test isready(rr1)
+        put!(rr3, :done)
     end
 end
 
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 34737fe255343..18af88ea191e9 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -21,7 +21,7 @@ const TESTNAMES = [
         "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
         "euler", "show", "client",
         "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
-        "some", "meta", "stacktraces", "docs",
+        "some", "meta", "stacktraces", "docs", "gc",
         "misc", "threads", "stress", "binaryplatforms", "atexit",
         "enums", "cmdlineargs", "int", "interpreter",
         "checked", "bitset", "floatfuncs", "precompile",
@@ -157,8 +157,8 @@ function choosetests(choices = [])
         "compiler/datastructures", "compiler/inference", "compiler/effects",
         "compiler/validation", "compiler/ssair", "compiler/irpasses",
         "compiler/codegen", "compiler/inline", "compiler/contextual",
-        "compiler/AbstractInterpreter", "compiler/EscapeAnalysis/local",
-        "compiler/EscapeAnalysis/interprocedural"])
+        "compiler/invalidation", "compiler/AbstractInterpreter",
+        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
     filtertests!(tests, "compiler/EscapeAnalysis", [
         "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
     filtertests!(tests, "stdlib", STDLIBS)
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index f0b32c54bc7b8..0ff5e633622ce 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -352,6 +352,9 @@ void assoc_exact_broken(jl_value_t **args, size_t n, int8_t offs, size_t world)
 }
 */
 
+// declare
+jl_typemap_level_t *jl_new_typemap_level(void);
+
 void assoc_exact_ok(jl_value_t *args1, jl_value_t **args, size_t n, int8_t offs, size_t world) {
     jl_typemap_level_t *cache = jl_new_typemap_level();
     JL_GC_PUSH1(&cache);
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 81478cd63836b..1d04926ef23af 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -60,6 +60,78 @@ let
     @test format_filename("%a%%b") == "a%b"
 end
 
+@testset "julia_cmd" begin
+    julia_basic = Base.julia_cmd()
+    opts = Base.JLOptions()
+    get_julia_cmd(arg) = strip(read(`$julia_basic $arg -e 'print(repr(Base.julia_cmd()))'`, String), ['`'])
+
+    for (arg, default) in (
+                            ("-C$(unsafe_string(opts.cpu_target))",  false),
+
+                            ("-J$(unsafe_string(opts.image_file))",  false),
+
+                            ("--depwarn=yes",   false),
+                            ("--depwarn=error", false),
+                            ("--depwarn=no",    true),
+
+                            ("--check-bounds=yes",  false),
+                            ("--check-bounds=no",   false),
+                            ("--check-bounds=auto", true),
+
+                            ("--inline=no",         false),
+                            ("--inline=yes",        true),
+
+                            ("-O0", false),
+                            ("-O1", false),
+                            ("-O2", true),
+                            ("-O3", false),
+
+                            ("--min-optlevel=0",    true),
+                            ("--min-optlevel=1",    false),
+                            ("--min-optlevel=2",    false),
+                            ("--min-optlevel=3",    false),
+
+                            ("-g0", false),
+                            ("-g1", false),
+                            ("-g2", false),
+
+                            ("--compile=no",    false),
+                            ("--compile=all",   false),
+                            ("--compile=min",   false),
+                            ("--compile=yes",   true),
+
+                            ("--code-coverage=@",    false),
+                            ("--code-coverage=user", false),
+                            ("--code-coverage=all",  false),
+                            ("--code-coverage=none", true),
+
+                            ("--track-allocation=@",    false),
+                            ("--track-allocation=user", false),
+                            ("--track-allocation=all",  false),
+                            ("--track-allocation=none", true),
+
+                            ("--color=yes", false),
+                            ("--color=no",  false),
+
+                            ("--startup-file=no",   false),
+                            ("--startup-file=yes",  true),
+
+                            # ("--sysimage-native-code=no",   false), # takes a lot longer (30s)
+                            ("--sysimage-native-code=yes",  true),
+
+                            ("--pkgimages=yes", true),
+                            ("--pkgimages=no",  false),
+                        )
+        @testset "$arg" begin
+            if default
+                @test !occursin(arg, get_julia_cmd(arg))
+            else
+                @test occursin(arg, get_julia_cmd(arg))
+            end
+        end
+    end
+end
+
 let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tests for handling of ENV errors
     let v = writereadpipeline("println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))",
@@ -116,10 +188,12 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test contains(v[2], r"enable-tail-merge + = 1")
         @test isempty(v[3])
     end
-    @testset let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir()))
-        @test !v[1]
-        @test isempty(v[2])
-        @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!"
+    if Base.libllvm_version < v"15" #LLVM over 15 doesn't care for multiple options
+        @testset let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir()))
+            @test !v[1]
+            @test isempty(v[2])
+            @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!"
+        end
     end
 end
 
@@ -269,6 +343,24 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test p.exitcode == 1 && p.termsignal == 0
     end
 
+    # --gcthreads
+    code = "print(Threads.ngcthreads())"
+    cpu_threads = ccall(:jl_effective_threads, Int32, ())
+    @test (cpu_threads == 1 ? "1" : string(div(cpu_threads, 2))) ==
+          read(`$exename --threads auto -e $code`, String) ==
+          read(`$exename --threads=auto -e $code`, String) ==
+          read(`$exename -tauto -e $code`, String) ==
+          read(`$exename -t auto -e $code`, String)
+    for nt in (nothing, "1")
+        withenv("JULIA_NUM_GC_THREADS" => nt) do
+            @test read(`$exename --gcthreads=2 -e $code`, String) == "2"
+        end
+    end
+
+    withenv("JULIA_NUM_GC_THREADS" => 2) do
+        @test read(`$exename -e $code`, String) == "2"
+    end
+
     # --machine-file
     # this does not check that machine file works,
     # only that the filename gets correctly passed to the option struct
@@ -625,6 +717,8 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         (false, "", "ERROR: option `--inline` is missing an argument")
     @test readchomperrors(`$exename --startup-file=no -e "@show ARGS" -now -- julia RUN.jl`) ==
         (false, "", "ERROR: unknown option `-n`")
+    @test readchomperrors(`$exename --interactive=yes`) ==
+        (false, "", "ERROR: option `-i/--interactive` does not accept an argument")
 
     # --compiled-modules={yes|no}
     @test readchomp(`$exename -E "Bool(Base.JLOptions().use_compiled_modules)"`) == "true"
@@ -652,6 +746,37 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     end
 end
 
+# Object file with multiple cpu targets
+@testset "Object file for multiple microarchitectures" begin
+    julia_path = joinpath(Sys.BINDIR, Base.julia_exename())
+    outputo_file = tempname()
+    write(outputo_file, "1")
+    object_file = tempname() * ".o"
+
+    # This is to test that even with `pkgimages=no`, we can create object file
+    # with multiple cpu-targets
+    # The cmd is checked for `--object-o` as soon as it is run. So, to avoid long
+    # testing times, intentionally don't pass `--sysimage`; when we reach the
+    # corresponding error, we know that `check_cmdline` has already passed
+    let v = readchomperrors(`$julia_path
+        --cpu-target='native;native'
+        --output-o=$object_file $outputo_file
+        --pkgimages=no`)
+
+        @test v[1] == false
+        @test v[2] == ""
+        @test !contains(v[3], "More than one command line CPU targets specified")
+        @test v[3] == "ERROR: File \"boot.jl\" not found"
+    end
+
+    # This is to test that with `pkgimages=yes`, multiple CPU targets are parsed.
+    # We intentionally fail fast due to a lack of an `--output-o` flag.
+    let v = readchomperrors(`$julia_path --cpu-target='native;native' --pkgimages=yes`)
+        @test v[1] == false
+        @test v[2] == ""
+        @test contains(v[3], "More than one command line CPU targets specified")
+    end
+end
 
 # Find the path of libjulia (or libjulia-debug, as the case may be)
 # to use as a dummy shlib to open
@@ -827,4 +952,6 @@ end
         @test lines[3] == "foo"
         @test lines[4] == "bar"
     end
+#heap-size-hint
+@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000"
 end
diff --git a/test/combinatorics.jl b/test/combinatorics.jl
index b04259f397304..f8fe4e0bd0829 100644
--- a/test/combinatorics.jl
+++ b/test/combinatorics.jl
@@ -16,6 +16,16 @@ using Random: randcycle
     @test binomial(Int64(67), Int64(29)) == binomial(BigInt(67), BigInt(29)) == 7886597962249166160
     @test binomial(Int128(131), Int128(62)) == binomial(BigInt(131), BigInt(62)) == 157311720980559117816198361912717812000
     @test_throws OverflowError binomial(Int64(67), Int64(30))
+
+    #Issue 48072
+    ∐ = parse(BigInt, "1" * "0"^13 * "666" * "0"^13 * "1")
+    @test binomial(∐, ∐ - 1) == ∐
+    @test binomial(∐, ∐ - 2) == 500000000000066600000000002218280000000000033300000000000000
+    @test binomial(∐, ∐ - 3) == binomial(∐, 3)
+    @test binomial(-big(2), ∐ - 3) == 1000000000000066599999999999999
+    @test_throws OverflowError binomial(big(2)^65, big(2)^64)
+    @test_throws OverflowError binomial(-big(2)^65, big(2)^64)
+    @test binomial(∐, 2 * ∐) == BigInt(0)
 end
 
 @testset "permutations" begin
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index ac1f34743e18e..0e94d42fa8866 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -2,45 +2,9 @@
 
 using Test
 const CC = Core.Compiler
-import Core: MethodInstance, CodeInstance
-import .CC: WorldRange, WorldView
 
 include("irutils.jl")
-
-"""
-    @newinterp NewInterpreter
-
-Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
-from the native code cache, satisfying the minimum interface requirements.
-"""
-macro newinterp(name)
-    cachename = Symbol(string(name, "Cache"))
-    name = esc(name)
-    quote
-        struct $cachename
-            dict::IdDict{MethodInstance,CodeInstance}
-        end
-        struct $name <: CC.AbstractInterpreter
-            interp::CC.NativeInterpreter
-            cache::$cachename
-            meta # additional information
-            $name(world = Base.get_world_counter();
-                interp = CC.NativeInterpreter(world),
-                cache = $cachename(IdDict{MethodInstance,CodeInstance}()),
-                meta = nothing,
-                ) = new(interp, cache, meta)
-        end
-        CC.InferenceParams(interp::$name) = CC.InferenceParams(interp.interp)
-        CC.OptimizationParams(interp::$name) = CC.OptimizationParams(interp.interp)
-        CC.get_world_counter(interp::$name) = CC.get_world_counter(interp.interp)
-        CC.get_inference_cache(interp::$name) = CC.get_inference_cache(interp.interp)
-        CC.code_cache(interp::$name) = WorldView(interp.cache, WorldRange(CC.get_world_counter(interp)))
-        CC.get(wvc::WorldView{<:$cachename}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-        CC.getindex(wvc::WorldView{<:$cachename}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-        CC.haskey(wvc::WorldView{<:$cachename}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-        CC.setindex!(wvc::WorldView{<:$cachename}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
-    end
-end
+include("newinterp.jl")
 
 # OverlayMethodTable
 # ==================
@@ -51,6 +15,14 @@ import Base.Experimental: @MethodTable, @overlay
 @MethodTable(OverlayedMT)
 CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_world_counter(interp), OverlayedMT)
 
+function CC.add_remark!(interp::MTOverlayInterp, ::CC.InferenceState, remark)
+    if interp.meta !== nothing
+        # Core.println(remark)
+        push!(interp.meta, remark)
+    end
+    return nothing
+end
+
 strangesin(x) = sin(x)
 @overlay OverlayedMT strangesin(x::Float64) = iszero(x) ? nothing : cos(x)
 
@@ -70,6 +42,21 @@ end |> !Core.Compiler.is_nonoverlayed
     @invoke strangesin(x::Float64)
 end |> !Core.Compiler.is_nonoverlayed
 
+# account for overlay possibility in unanalyzed matching method
+callstrange(::Float64) = strangesin(x)
+callstrange(::Nothing) = Core.compilerbarrier(:type, nothing) # trigger inference bail out
+callstrange_entry(x) = callstrange(x) # needs to be defined here because of world age
+let interp = MTOverlayInterp(Set{Any}())
+    matches = Core.Compiler.findall(Tuple{typeof(callstrange),Any}, Core.Compiler.method_table(interp)).matches
+    @test Core.Compiler.length(matches) == 2
+    if Core.Compiler.getindex(matches, 1).method == which(callstrange, (Nothing,))
+        @test Base.infer_effects(callstrange_entry, (Any,); interp) |> !Core.Compiler.is_nonoverlayed
+        @test "Call inference reached maximally imprecise information. Bailing on." in interp.meta
+    else
+        @warn "`nonoverlayed` test for inference bailing out is skipped since the method match sort order is changed."
+    end
+end
+
 # but it should never apply for the native compilation
 @test Base.infer_effects((Float64,)) do x
     strangesin(x)
@@ -93,7 +80,7 @@ overlay_match(::Any) = nothing
     overlay_match(x)
 end |> only === Union{Nothing,Missing}
 
-# partial pure/concrete evaluation
+# partial concrete evaluation
 @test Base.return_types(; interp=MTOverlayInterp()) do
     isbitstype(Int) ? nothing : missing
 end |> only === Nothing
@@ -110,7 +97,7 @@ end
     issue41694(3) == 6 ? nothing : missing
 end |> only === Nothing
 
-# disable partial pure/concrete evaluation when tainted by any overlayed call
+# disable partial concrete evaluation when tainted by any overlayed call
 Base.@assume_effects :total totalcall(f, args...) = f(args...)
 @test Base.return_types(; interp=MTOverlayInterp()) do
     if totalcall(strangesin, 1.0) == cos(1.0)
@@ -120,6 +107,18 @@ Base.@assume_effects :total totalcall(f, args...) = f(args...)
     end
 end |> only === Nothing
 
+# GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
+# https://github.com/JuliaLang/julia/issues/48097
+@newinterp Issue48097Interp
+@MethodTable Issue48097MT
+CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_world_counter(interp), Issue48097MT)
+CC.InferenceParams(::Issue48097Interp) = CC.InferenceParams(; unoptimize_throw_blocks=false)
+@overlay Issue48097MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
+issue48097(; kwargs...) = return 42
+@test fully_eliminated(; interp=Issue48097Interp(), retval=42) do
+    issue48097(; a=1f0, b=1.0)
+end
+
 # AbstractLattice
 # ===============
 
@@ -267,35 +266,10 @@ end |> only === Any
 # CallInfo × inlining
 # ===================
 
-import .CC: CallInfo
+@newinterp NoinlineInterpreter
+noinline_modules(interp::NoinlineInterpreter) = interp.meta::Set{Module}
 
-struct NoinlineInterpreterCache
-    dict::IdDict{MethodInstance,CodeInstance}
-end
-
-"""
-    NoinlineInterpreter(noinline_modules::Set{Module}) <: AbstractInterpreter
-
-An `AbstractInterpreter` that has additional inlineability rules based on caller module context.
-"""
-struct NoinlineInterpreter <: CC.AbstractInterpreter
-    noinline_modules::Set{Module}
-    interp::CC.NativeInterpreter
-    cache::NoinlineInterpreterCache
-    NoinlineInterpreter(noinline_modules::Set{Module}, world = Base.get_world_counter();
-        interp = CC.NativeInterpreter(world),
-        cache = NoinlineInterpreterCache(IdDict{MethodInstance,CodeInstance}())
-        ) = new(noinline_modules, interp, cache)
-end
-CC.InferenceParams(interp::NoinlineInterpreter) = CC.InferenceParams(interp.interp)
-CC.OptimizationParams(interp::NoinlineInterpreter) = CC.OptimizationParams(interp.interp)
-CC.get_world_counter(interp::NoinlineInterpreter) = CC.get_world_counter(interp.interp)
-CC.get_inference_cache(interp::NoinlineInterpreter) = CC.get_inference_cache(interp.interp)
-CC.code_cache(interp::NoinlineInterpreter) = WorldView(interp.cache, WorldRange(CC.get_world_counter(interp)))
-CC.get(wvc::WorldView{<:NoinlineInterpreterCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-CC.getindex(wvc::WorldView{<:NoinlineInterpreterCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-CC.haskey(wvc::WorldView{<:NoinlineInterpreterCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-CC.setindex!(wvc::WorldView{<:NoinlineInterpreterCache}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+import .CC: CallInfo
 
 struct NoinlineCallInfo <: CallInfo
     info::CallInfo # wrapped call
@@ -308,7 +282,7 @@ function CC.abstract_call(interp::NoinlineInterpreter,
     arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing})
     ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
         arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing})
-    if sv.mod in interp.noinline_modules
+    if sv.mod in noinline_modules(interp)
         return CC.CallMeta(ret.rt, ret.effects, NoinlineCallInfo(ret.info))
     end
     return ret
@@ -349,7 +323,7 @@ let NoinlineModule = Module()
     # it should work for cached results
     method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
     mi = CC.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
-    @test haskey(interp.cache.dict, mi)
+    @test haskey(interp.code_cache.dict, mi)
     let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
             inlined_usually(x, y, z)
         end
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
index 51b4b66c22643..6894733e0fa45 100644
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ b/test/compiler/EscapeAnalysis/EAUtils.jl
@@ -141,9 +141,9 @@ function invalidate_cache!(replaced, max_world, depth = 0)
 end
 
 function CC.optimize(interp::EscapeAnalyzer,
-    opt::OptimizationState, params::OptimizationParams, caller::InferenceResult)
+    opt::OptimizationState, caller::InferenceResult)
     ir = run_passes_with_ea(interp, opt.src, opt, caller)
-    return CC.finish(interp, opt, params, ir, caller)
+    return CC.finish(interp, opt, ir, caller)
 end
 
 function CC.cache_result!(interp::EscapeAnalyzer, caller::InferenceResult)
diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/local.jl
index e5d8f1bf2c940..dd324c3619dc7 100644
--- a/test/compiler/EscapeAnalysis/local.jl
+++ b/test/compiler/EscapeAnalysis/local.jl
@@ -1997,9 +1997,9 @@ let result = code_escapes((Int,String,)) do n,s
     i = only(findall(isarrayalloc, result.ir.stmts.inst))
     r = only(findall(isreturn, result.ir.stmts.inst))
     @test has_return_escape(result.state[SSAValue(i)], r)
-    Base.JLOptions().check_bounds ≠ 0 && @test has_thrown_escape(result.state[SSAValue(i)])
+    @test !has_thrown_escape(result.state[SSAValue(i)])
     @test has_return_escape(result.state[Argument(3)], r) # s
-    Base.JLOptions().check_bounds ≠ 0 && @test has_thrown_escape(result.state[Argument(3)])    # s
+    @test !has_thrown_escape(result.state[Argument(3)])    # s
 end
 let result = code_escapes((Int,String,)) do n,s
         xs = String[]
@@ -2011,9 +2011,9 @@ let result = code_escapes((Int,String,)) do n,s
     i = only(findall(isarrayalloc, result.ir.stmts.inst))
     r = only(findall(isreturn, result.ir.stmts.inst))
     @test has_return_escape(result.state[SSAValue(i)], r) # xs
-    @test has_thrown_escape(result.state[SSAValue(i)])    # xs
+    @test !has_thrown_escape(result.state[SSAValue(i)])    # xs
     @test has_return_escape(result.state[Argument(3)], r) # s
-    @test has_thrown_escape(result.state[Argument(3)])    # s
+    @test !has_thrown_escape(result.state[Argument(3)])    # s
 end
 let result = code_escapes((String,String,String)) do s, t, u
         xs = String[]
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index 09f6c772fea52..8a3949212ea16 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -649,7 +649,7 @@ end
 
 # issue #41157
 f41157(a, b) = a[1] = b[1]
-@test_throws BoundsError f41157(Tuple{Int}[], Tuple{Union{}}[])
+@test_throws BoundsError f41157(Tuple{Int}[], (NTuple{N,Union{}} where N)[])
 
 # issue #41096
 struct Modulate41096{M<:Union{Function, Val{true}, Val{false}}, id}
@@ -786,11 +786,6 @@ f_isa_type(@nospecialize(x)) = isa(x, Type)
 f47247(a::Ref{Int}, b::Nothing) = setfield!(a, :x, b)
 @test_throws TypeError f47247(Ref(5), nothing)
 
-@testset "regression in generic_bitcast: should support Union{} values" begin
-    f(x) = Core.bitcast(UInt64, x)
-    @test occursin("llvm.trap", get_llvm(f, Tuple{Union{}}))
-end
-
 f48085(@nospecialize x...) = length(x)
 @test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Vararg{Int}}, Core.svec()) === nothing
 @test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Int, Vararg{Int}}, Core.svec()) === Tuple{typeof(f48085), Any, Vararg{Any}}
@@ -821,3 +816,7 @@ function F48394(a, b, i)
 end
 @test F48394(X48394(nothing,true), Y48394(nothing, missing), true)
 @test occursin("llvm.trap", get_llvm(F48394, Tuple{X48394, Y48394, Bool}))
+
+# issue 48917, hoisting load to above the parent
+f48917(x, w) = (y = (a=1, b=x); z = (; a=(a=(1, w), b=(3, y))))
+@test f48917(1,2) == (a = (a = (1, 2), b = (3, (a = 1, b = 1))),)
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
index 79285f62b0947..0e8fe27591a5e 100644
--- a/test/compiler/contextual.jl
+++ b/test/compiler/contextual.jl
@@ -7,9 +7,9 @@ module MiniCassette
     # A minimal demonstration of the cassette mechanism. Doesn't support all the
     # fancy features, but sufficient to exercise this code path in the compiler.
 
-    using Core.Compiler: method_instances, retrieve_code_info, CodeInfo,
-        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, Slot, SlotNumber, quoted,
-        signature_type
+    using Core.Compiler: retrieve_code_info, CodeInfo,
+        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, SlotNumber, quoted,
+        signature_type, anymap
     using Base: _methods_by_ftype
     using Base.Meta: isexpr
     using Test
@@ -19,10 +19,11 @@ module MiniCassette
     struct Ctx; end
 
     # A no-op cassette-like transform
-    function transform_expr(expr, map_slot_number, map_ssa_value, sparams)
-        transform(expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
+    function transform_expr(expr, map_slot_number, map_ssa_value, sparams::Core.SimpleVector)
+        @nospecialize expr
+        transform(@nospecialize expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
         if isexpr(expr, :call)
-            return Expr(:call, overdub, SlotNumber(2), map(transform, expr.args)...)
+            return Expr(:call, overdub, SlotNumber(2), anymap(transform, expr.args)...)
         elseif isa(expr, GotoIfNot)
             return GotoIfNot(transform(expr.cond), map_ssa_value(SSAValue(expr.dest)).id)
         elseif isexpr(expr, :static_parameter)
@@ -30,10 +31,10 @@ module MiniCassette
         elseif isa(expr, ReturnNode)
             return ReturnNode(transform(expr.val))
         elseif isa(expr, Expr)
-            return Expr(expr.head, map(transform, expr.args)...)
+            return Expr(expr.head, anymap(transform, expr.args)...)
         elseif isa(expr, GotoNode)
             return GotoNode(map_ssa_value(SSAValue(expr.label)).id)
-        elseif isa(expr, Slot)
+        elseif isa(expr, SlotNumber)
             return map_slot_number(expr.id)
         elseif isa(expr, SSAValue)
             return map_ssa_value(expr)
@@ -42,16 +43,16 @@ module MiniCassette
         end
     end
 
-    function transform!(ci, nargs, sparams)
+    function transform!(ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
         code = ci.code
         ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+1:end]...]
         ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+1:end]...]
         # Insert one SSAValue for every argument statement
-        prepend!(code, [Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
-        prepend!(ci.codelocs, [0 for i = 1:nargs])
-        prepend!(ci.ssaflags, [0x00 for i = 1:nargs])
+        prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
+        prepend!(ci.codelocs, fill(0, nargs))
+        prepend!(ci.ssaflags, fill(0x00, nargs))
         ci.ssavaluetypes += nargs
-        function map_slot_number(slot)
+        function map_slot_number(slot::Int)
             if slot == 1
                 # self in the original function is now `f`
                 return SlotNumber(3)
@@ -69,24 +70,28 @@ module MiniCassette
         end
     end
 
-    function overdub_generator(self, c, f, args)
+    function overdub_generator(world::UInt, source, self, c, f, args)
+        @nospecialize
         if !Base.issingletontype(f)
-            return :(return f(args...))
+            # (c, f, args..) -> f(args...)
+            code_info = :(return f(args...))
+            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :c, :f, :args), Core.svec())(world, source, code_info)
         end
 
         tt = Tuple{f, args...}
-        match = Base._which(tt; world=typemax(UInt))
+        match = Base._which(tt; world)
         mi = Core.Compiler.specialize_method(match)
         # Unsupported in this mini-cassette
         @assert !mi.def.isva
-        code_info = retrieve_code_info(mi)
+        code_info = retrieve_code_info(mi, world)
         @assert isa(code_info, CodeInfo)
         code_info = copy(code_info)
-        if isdefined(code_info, :edges)
-            code_info.edges = MethodInstance[mi]
-        end
+        @assert code_info.edges === nothing
+        code_info.edges = MethodInstance[mi]
         transform!(code_info, length(args), match.sparams)
-        code_info
+        # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+        # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
+        return code_info
     end
 
     @inline function overdub(c::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...)
@@ -95,16 +100,7 @@ module MiniCassette
 
     @eval function overdub(c::Ctx, f, args...)
         $(Expr(:meta, :generated_only))
-        $(Expr(:meta,
-                :generated,
-                Expr(:new,
-                    Core.GeneratedFunctionStub,
-                    :overdub_generator,
-                    Any[:overdub, :ctx, :f, :args],
-                    Any[],
-                    @__LINE__,
-                    QuoteNode(Symbol(@__FILE__)),
-                    true)))
+        $(Expr(:meta, :generated, overdub_generator))
     end
 end
 
@@ -119,23 +115,11 @@ f() = 2
 # Test that MiniCassette is at least somewhat capable by overdubbing gcd
 @test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
 
-# Test that pure propagates for Cassette
-Base.@pure isbitstype(T) = Base.isbitstype(T)
-f31012(T) = Val(isbitstype(T))
-@test @inferred(overdub(Ctx(), f31012, Int64)) == Val(true)
-
 @generated bar(::Val{align}) where {align} = :(42)
 foo(i) = i+bar(Val(1))
 
 @test @inferred(overdub(Ctx(), foo, 1)) == 43
 
-# Check that misbehaving pure functions propagate their error
-Base.@pure func1() = 42
-Base.@pure func2() = (this_is_an_exception; func1())
-func3() = func2()
-@test_throws UndefVarError func3()
-
-
 # overlay method tables
 # =====================
 
@@ -208,12 +192,12 @@ try
      Baz = Base.require(Main, :Baz)
      @test length(Bar.mt) == 1
 finally
+    filter!((≠)(load_path), LOAD_PATH)
+    filter!((≠)(depot_path), DEPOT_PATH)
     rm(load_path, recursive=true, force=true)
     try
         rm(depot_path, force=true, recursive=true)
     catch err
         @show err
     end
-    filter!((≠)(load_path), LOAD_PATH)
-    filter!((≠)(depot_path), DEPOT_PATH)
 end
diff --git a/test/compiler/datastructures.jl b/test/compiler/datastructures.jl
index a25a884373ab4..8dbaee61503d0 100644
--- a/test/compiler/datastructures.jl
+++ b/test/compiler/datastructures.jl
@@ -7,7 +7,7 @@ using Test
     table = Core.Compiler.method_table(interp)
     sig = Tuple{typeof(*), Any, Any}
     result1 = Core.Compiler.findall(sig, table; limit=-1)
-    result2 = Core.Compiler.findall(sig, table; limit=Core.Compiler.get_max_methods(*, @__MODULE__, interp))
+    result2 = Core.Compiler.findall(sig, table; limit=Core.Compiler.InferenceParams().max_methods)
     @test result1 !== nothing && !Core.Compiler.isempty(result1.matches)
     @test result2 === nothing
 end
diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl
index 656ac9268dcb4..f809192d8d1ed 100644
--- a/test/compiler/effects.jl
+++ b/test/compiler/effects.jl
@@ -12,9 +12,6 @@ end
     nothing
 end
 
-# Test that arraysize has proper effect modeling
-@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
-
 # Test that effect modeling for return_type doesn't incorrectly pick
 # up the effects of the function being analyzed
 f_throws() = error()
@@ -109,6 +106,21 @@ recur_termination22(x) = x * recur_termination21(x-1)
     recur_termination21(12) + recur_termination22(12)
 end
 
+# anonymous function support for `@assume_effects`
+@test fully_eliminated() do
+    map((2,3,4)) do x
+        # this :terminates_locally allows this anonymous function to be constant-folded
+        Base.@assume_effects :terminates_locally
+        res = 1
+        1 < x < 20 || error("bad pow")
+        while x > 1
+            res *= x
+            x -= 1
+        end
+        return res
+    end
+end
+
 # control flow backedge should taint `terminates`
 @test Base.infer_effects((Int,)) do n
     for i = 1:n; end
@@ -342,9 +354,9 @@ function f_boundscheck_elim(n)
     # Inbounds here assumes that this is only ever called with `n==0`, but of
     # course the compiler has no way of knowing that, so it must not attempt
     # to run the `@inbounds getfield(sin, 1)` that `ntuple` generates.
-    ntuple(x->(@inbounds getfield(sin, x)), n)
+    ntuple(x->(@inbounds ()[x]), n)
 end
-@test !Core.Compiler.is_consistent(Base.infer_effects(f_boundscheck_elim, (Int,)))
+@test_broken !Core.Compiler.is_consistent(Base.infer_effects(f_boundscheck_elim, (Int,)))
 @test Tuple{} <: only(Base.return_types(f_boundscheck_elim, (Int,)))
 
 # Test that purity modeling doesn't accidentally introduce new world age issues
@@ -439,11 +451,6 @@ let effects = Base.infer_effects(f_setfield_nothrow, ())
     @test Core.Compiler.is_nothrow(effects)
 end
 
-# nothrow for arrayset
-@test Base.infer_effects((Vector{Int},Int)) do a, i
-    a[i] = 0 # may throw
-end |> !Core.Compiler.is_nothrow
-
 # even if 2-arg `getfield` may throw, it should be still `:consistent`
 @test Core.Compiler.is_consistent(Base.infer_effects(getfield, (NTuple{5, Float64}, Int)))
 
@@ -462,14 +469,27 @@ end |> Core.Compiler.is_consistent
 end |> Core.Compiler.is_effect_free
 
 # `getfield_effects` handles access to union object nicely
-@test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{String}, Core.Const(:value)]), String))
-@test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{Symbol}, Core.Const(:value)]), Symbol))
-@test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Union{Some{Symbol},Some{String}}, Core.Const(:value)]), Union{Symbol,String}))
+let 𝕃 = Core.Compiler.fallback_lattice
+    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{String}, Core.Const(:value)]), String))
+    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Some{Symbol}, Core.Const(:value)]), Symbol))
+    @test Core.Compiler.is_consistent(Core.Compiler.getfield_effects(𝕃, Core.Compiler.ArgInfo(nothing, Any[Core.Const(getfield), Union{Some{Symbol},Some{String}}, Core.Const(:value)]), Union{Symbol,String}))
+end
 @test Base.infer_effects((Bool,)) do c
     obj = c ? Some{String}("foo") : Some{Symbol}(:bar)
     return getfield(obj, :value)
 end |> Core.Compiler.is_consistent
 
+# getfield is nothrow when bounds checking is turned off
+@test Base.infer_effects((Tuple{Int,Int},Int)) do t, i
+    getfield(t, i, false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},Symbol)) do t, i
+    getfield(t, i, false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},String)) do t, i
+    getfield(t, i, false) # invalid name type
+end |> !Core.Compiler.is_nothrow
+
 @test Core.Compiler.is_consistent(Base.infer_effects(setindex!, (Base.RefValue{Int}, Int)))
 
 # :inaccessiblememonly effect
@@ -544,7 +564,7 @@ end |> !Core.Compiler.is_inaccessiblememonly
 end |> !Core.Compiler.is_inaccessiblememonly
 
 # the `:inaccessiblememonly` helper effect allows us to prove `:consistent`-cy of frames
-# including `getfield` accessing to local mutable object
+# including `getfield` / `isdefined` accessing to local mutable object
 
 mutable struct SafeRef{T}
     x::T
@@ -573,13 +593,11 @@ const consistent_global = Some(:foo)
 @test Base.infer_effects() do
     consistent_global.value
 end |> Core.Compiler.is_consistent
-
 const inconsistent_global = SafeRef(:foo)
 @test Base.infer_effects() do
     inconsistent_global[]
 end |> !Core.Compiler.is_consistent
-
-global inconsistent_condition_ref = Ref{Bool}(false)
+const inconsistent_condition_ref = Ref{Bool}(false)
 @test Base.infer_effects() do
     if inconsistent_condition_ref[]
         return 0
@@ -588,6 +606,21 @@ global inconsistent_condition_ref = Ref{Bool}(false)
     end
 end |> !Core.Compiler.is_consistent
 
+# should handle va-method properly
+callgetfield1(xs...) = getfield(getfield(xs, 1), 1)
+@test !Core.Compiler.is_inaccessiblememonly(Base.infer_effects(callgetfield1, (Base.RefValue{Symbol},)))
+const GLOBAL_XS = Ref(:julia)
+global_getfield() = callgetfield1(GLOBAL_XS)
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia
+GLOBAL_XS[] = :julia2
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia2
+
 # the `:inaccessiblememonly` helper effect allows us to prove `:effect_free`-ness of frames
 # including `setfield!` modifying local mutable object
 
@@ -647,12 +680,14 @@ end
 end
 @test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused3!))
 
-@testset "effects analysis on array ops" begin
+# array ops
+# =========
 
-@testset "effects analysis on array construction" begin
+# allocation
+# ----------
 
+# low-level constructor
 @noinline construct_array(@nospecialize(T), args...) = Array{T}(undef, args...)
-
 # should eliminate safe but dead allocations
 let good_dims = @static Int === Int64 ? (1:10) : (1:8)
     Ns = @static Int === Int64 ? (1:10) : (1:8)
@@ -667,7 +702,6 @@ let good_dims = @static Int === Int64 ? (1:10) : (1:8)
         end
     end
 end
-
 # should analyze throwness correctly
 let bad_dims = [-1, typemax(Int)]
     for dim in bad_dims, N in 1:10
@@ -683,9 +717,132 @@ let bad_dims = [-1, typemax(Int)]
     end
 end
 
-end # @testset "effects analysis on array construction" begin
+# high-level interfaces
+# getindex
+for safesig = Any[
+        (Type{Int},)
+        (Type{Int}, Int)
+        (Type{Int}, Int, Int)
+        (Type{Number},)
+        (Type{Number}, Number)
+        (Type{Number}, Int)
+        (Type{Any},)
+        (Type{Any}, Any,)
+        (Type{Any}, Any, Any)
+    ]
+    let effects = Base.infer_effects(getindex, safesig)
+        @test Core.Compiler.is_consistent_if_notreturned(effects)
+        @test Core.Compiler.is_removable_if_unused(effects)
+    end
+end
+for unsafesig = Any[
+        (Type{Int}, String)
+        (Type{Int}, Any)
+        (Type{Number}, AbstractString)
+        (Type{Number}, Any)
+    ]
+    let effects = Base.infer_effects(getindex, unsafesig)
+        @test !Core.Compiler.is_nothrow(effects)
+    end
+end
+# vect
+for safesig = Any[
+        ()
+        (Int,)
+        (Int, Int)
+    ]
+    let effects = Base.infer_effects(Base.vect, safesig)
+        @test Core.Compiler.is_consistent_if_notreturned(effects)
+        @test Core.Compiler.is_removable_if_unused(effects)
+    end
+end
+
+# arrayref
+# --------
+
+let effects = Base.infer_effects(Base.arrayref, (Vector{Any},Int))
+    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+
+# arrayset
+# --------
+
+let effects = Base.infer_effects(Base.arrayset, (Vector{Any},Any,Int))
+    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+# nothrow for arrayset
+@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
+    Base.arrayset(true, a, v, i)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
+    a[i] = v # may throw
+end |> !Core.Compiler.is_nothrow
+# when bounds checking is turned off, it should be safe
+@test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i
+    Base.arrayset(false, a, v, i)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Vector{Number},Number,Int)) do a, v, i
+    Base.arrayset(false, a, v, i)
+end |> Core.Compiler.is_nothrow
+
+# arraysize
+# ---------
+
+let effects = Base.infer_effects(Base.arraysize, (Array,Int))
+    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+# Test that arraysize has proper effect modeling
+@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
+
+# arraylen
+# --------
+
+let effects = Base.infer_effects(Base.arraylen, (Vector{Any},))
+    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+
+# resize
+# ------
+
+for op = Any[
+        Base._growbeg!,
+        Base._growend!,
+        Base._deletebeg!,
+        Base._deleteend!,
+    ]
+    let effects = Base.infer_effects(op, (Vector, Int))
+        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test !Core.Compiler.is_nothrow(effects)
+    end
+end
+
+# end to end
+# ----------
 
-end # @testset "effects analysis on array ops" begin
+function simple_vec_ops(T, op!, op, xs...)
+    a = T[]
+    op!(a, xs...)
+    return op(a)
+end
+for T = Any[Int,Any], op! = Any[push!,pushfirst!], op = Any[length,size],
+    xs = Any[(Int,), (Int,Int,)]
+    let effects = Base.infer_effects(simple_vec_ops, (Type{T},typeof(op!),typeof(op),xs...))
+        @test Core.Compiler.is_foldable(effects)
+    end
+end
 
 # Test that builtin_effects handles vararg correctly
 @test !Core.Compiler.is_nothrow(Core.Compiler.builtin_effects(Core.Compiler.fallback_lattice, Core.isdefined,
@@ -708,7 +865,7 @@ end
 # Effects for getfield of type instance
 @test Base.infer_effects(Tuple{Nothing}) do x
     WrapperOneField{typeof(x)}.instance
-end |> Core.Compiler.is_total
+end |> Core.Compiler.is_foldable_nothrow
 @test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
     getfield(w, s)
 end |> Core.Compiler.is_foldable
@@ -720,19 +877,23 @@ end |> Core.Compiler.is_foldable
 # Flow-sensitive consistenct for _typevar
 @test Base.infer_effects() do
     return WrapperOneField == (WrapperOneField{T} where T)
-end |> Core.Compiler.is_total
+end |> Core.Compiler.is_foldable_nothrow
 
 # Test that dead `@inbounds` does not taint consistency
 # https://github.com/JuliaLang/julia/issues/48243
-@test Base.infer_effects() do
-    false && @inbounds (1,2,3)[1]
+@test Base.infer_effects(Tuple{Int64}) do i
+    false && @inbounds (1,2,3)[i]
     return 1
-end |> Core.Compiler.is_total
+end |> Core.Compiler.is_foldable_nothrow
 
 @test Base.infer_effects(Tuple{Int64}) do i
     @inbounds (1,2,3)[i]
 end |> !Core.Compiler.is_consistent
 
+@test Base.infer_effects(Tuple{Tuple{Int64}}) do x
+    @inbounds x[1]
+end |> Core.Compiler.is_foldable_nothrow
+
 # Test that :new of non-concrete, but otherwise known type
 # does not taint consistency.
 @eval struct ImmutRef{T}
@@ -741,5 +902,83 @@ end |> !Core.Compiler.is_consistent
 end
 @test Core.Compiler.is_foldable(Base.infer_effects(ImmutRef, Tuple{Any}))
 
-@test Base.ismutationfree(Type{Union{}})
-@test Core.Compiler.is_total(Base.infer_effects(typejoin, ()))
+@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(typejoin, ()))
+
+# nothrow-ness of subtyping operations
+# https://github.com/JuliaLang/julia/pull/48566
+@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A<:B, (Any,Any)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A>:B, (Any,Any)))
+
+# GotoIfNot should properly mark itself as throwing when given a non-Bool
+# https://github.com/JuliaLang/julia/pull/48583
+gotoifnot_throw_check_48583(x) = x ? x : 0
+@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Missing,)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Any,)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Bool,)))
+
+# unknown :static_parameter should taint :nothrow
+# https://github.com/JuliaLang/julia/issues/46771
+unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = (T; nothing)
+unknown_sparam_nothrow1(x::Ref{T}) where T = (T; nothing)
+unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{Int},)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{<:Integer},)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type,)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Nothing,)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Union{Type{Int},Nothing},)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Any,)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow1, (Ref,)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,)))
+
+# purely abstract recursion should not taint :terminates
+# https://github.com/JuliaLang/julia/issues/48983
+abstractly_recursive1() = abstractly_recursive2()
+abstractly_recursive2() = (Core.Compiler._return_type(abstractly_recursive1, Tuple{}); 1)
+abstractly_recursive3() = abstractly_recursive2()
+@test Core.Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
+actually_recursive1(x) = actually_recursive2(x)
+actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
+actually_recursive3(x) = actually_recursive2(x)
+@test !Core.Compiler.is_terminates(Base.infer_effects(actually_recursive3, (Int,)))
+
+# `isdefined` effects
+struct MaybeSome{T}
+    value::T
+    MaybeSome(x::T) where T = new{T}(x)
+    MaybeSome{T}(x::T) where T = new{T}(x)
+    MaybeSome{T}() where T = new{T}()
+end
+const undefined_ref = Ref{String}()
+const defined_ref = Ref{String}("julia")
+const undefined_some = MaybeSome{String}()
+const defined_some = MaybeSome{String}("julia")
+let effects = Base.infer_effects() do
+        isdefined(undefined_ref, :x)
+    end
+    @test !Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_ref, :x)
+    end
+    @test Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(undefined_some, :value)
+    end
+    @test Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_some, :value)
+    end
+    @test Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+# high-level interface test
+isassigned_effects(s) = isassigned(Ref(s))
+@test Core.Compiler.is_consistent(Base.infer_effects(isassigned_effects, (Symbol,)))
+@test fully_eliminated(; retval=true) do
+    isassigned_effects(:foo)
+end
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index f70b1f73f55ad..5987e10401bc8 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -27,7 +27,6 @@ let comparison = Tuple{X, X} where X<:Tuple
     @test Core.Compiler.limit_type_size(sig, comparison, comparison, 100, 100) == Tuple{Tuple, Tuple}
     @test Core.Compiler.limit_type_size(sig, ref, comparison, 100, 100) == Tuple{Any, Any}
     @test Core.Compiler.limit_type_size(Tuple{sig}, Tuple{ref}, comparison, 100, 100) == Tuple{Tuple{Any, Any}}
-    @test Core.Compiler.limit_type_size(sig, ref, Tuple{comparison}, 100,  100) == Tuple{Tuple{X, X} where X<:Tuple, Tuple{X, X} where X<:Tuple}
     @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 
@@ -51,6 +50,13 @@ let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tu
     @test Core.Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
 end
 
+mutable struct TS14009{T}; end
+let A = TS14009{TS14009{TS14009{TS14009{TS14009{T}}}}} where {T},
+    B = Base.rewrap_unionall(TS14009{Base.unwrap_unionall(A)}, A)
+
+    @test Core.Compiler.Compiler.limit_type_size(B, A, A, 2, 2) == TS14009
+end
+
 # issue #42835
 @test !Core.Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
 @test !Core.Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
@@ -81,9 +87,12 @@ end
 @test !Core.Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 @test !Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 0, 1, 1)
-@test_broken Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 
+# issue #49287
+@test !Core.Compiler.type_more_complex(Tuple{Vararg{Tuple{}}}, Tuple{Vararg{Tuple}}, Core.svec(), 0, 0, 0)
+@test  Core.Compiler.type_more_complex(Tuple{Vararg{Tuple}}, Tuple{Vararg{Tuple{}}}, Core.svec(), 0, 0, 0)
 
 let # 40336
     t = Type{Type{Int}}
@@ -557,27 +566,6 @@ f18450() = ifelse(true, Tuple{Vararg{Int}}, Tuple{Vararg})
 # issue #18569
 @test !Core.Compiler.isconstType(Type{Tuple})
 
-# ensure pure attribute applies correctly to all signatures of fpure
-Base.@pure function fpure(a=rand(); b=rand())
-    # use the `rand` function since it is known to be `@inline`
-    # but would be too big to inline
-    return a + b + rand()
-end
-gpure() = fpure()
-gpure(x::Irrational) = fpure(x)
-@test which(fpure, ()).pure
-@test which(fpure, (typeof(pi),)).pure
-@test !which(gpure, ()).pure
-@test !which(gpure, (typeof(pi),)).pure
-@test code_typed(gpure, ())[1][1].pure
-@test code_typed(gpure, (typeof(π),))[1][1].pure
-@test gpure() == gpure() == gpure()
-@test gpure(π) == gpure(π) == gpure(π)
-
-# Make sure @pure works for functions using the new syntax
-Base.@pure (fpure2(x::T) where T) = T
-@test which(fpure2, (Int64,)).pure
-
 # issue #10880
 function cat10880(a, b)
     Tuple{a.parameters..., b.parameters...}
@@ -597,7 +585,6 @@ function is_typed_expr(e::Expr)
 end
 is_typed_expr(@nospecialize other) = false
 test_inferred_static(@nospecialize(other)) = true
-test_inferred_static(slot::TypedSlot) = @test isdispatchelem(slot.typ)
 function test_inferred_static(expr::Expr)
     for a in expr.args
         test_inferred_static(a)
@@ -654,17 +641,8 @@ for (codetype, all_ssa) in Any[
         (code_typed(h18679, ())[1], true),
         (code_typed(g19348, (typeof((1, 2.0)),))[1], true)]
     code = codetype[1]
-    local notconst(@nospecialize(other)) = true
-    notconst(slot::TypedSlot) = @test isa(slot.typ, Type)
-    function notconst(expr::Expr)
-        for a in expr.args
-            notconst(a)
-        end
-    end
     local i
-    for i = 1:length(code.code)
-        e = code.code[i]
-        notconst(e)
+    for i = 1:length(code.ssavaluetypes)
         typ = code.ssavaluetypes[i]
         typ isa Core.Compiler.MaybeUndef && (typ = typ.typ)
         @test isa(typ, Type) || isa(typ, Const) || isa(typ, Conditional) || typ
@@ -707,6 +685,7 @@ end
 # inference of `fieldtype`
 mutable struct UndefField__
     x::Union{}
+    UndefField__() = new()
 end
 f_infer_undef_field() = fieldtype(UndefField__, :x)
 @test Base.return_types(f_infer_undef_field, ()) == Any[Type{Union{}}]
@@ -913,35 +892,6 @@ end
 f20267(x::T20267{T}, y::T) where (T) = f20267(Any[1][1], x.inds)
 @test Base.return_types(f20267, (Any, Any)) == Any[Union{}]
 
-# issue #20704
-f20704(::Int) = 1
-Base.@pure b20704(@nospecialize(x)) = f20704(x)
-@test b20704(42) === 1
-@test_throws MethodError b20704(42.0)
-
-bb20704() = b20704(Any[1.0][1])
-@test_throws MethodError bb20704()
-
-v20704() = Val{b20704(Any[1.0][1])}
-@test_throws MethodError v20704()
-@test Base.return_types(v20704, ()) == Any[Type{Val{1}}]
-
-Base.@pure g20704(::Int) = 1
-h20704(@nospecialize(x)) = g20704(x)
-@test g20704(1) === 1
-@test_throws MethodError h20704(1.2)
-
-Base.@pure c20704() = (f20704(1.0); 1)
-d20704() = c20704()
-@test_throws MethodError d20704()
-
-Base.@pure function a20704(x)
-    rand()
-    42
-end
-aa20704(x) = x(nothing)
-@test code_typed(aa20704, (typeof(a20704),))[1][1].pure
-
 #issue #21065, elision of _apply_iterate when splatted expression is not effect_free
 function f21065(x,y)
     println("x=$x, y=$y")
@@ -999,7 +949,7 @@ end
 
 # issue #21410
 f21410(::V, ::Pair{V,E}) where {V, E} = E
-@test code_typed(f21410, Tuple{Ref, Pair{Ref{T},Ref{T}} where T<:Number})[1].second ==
+@test only(Base.return_types(f21410, Tuple{Ref, Pair{Ref{T},Ref{T}} where T<:Number})) ==
     Type{E} where E <: (Ref{T} where T<:Number)
 
 # issue #21369
@@ -1074,7 +1024,7 @@ end
 g21771(T) = T
 f21771(::Val{U}) where {U} = Tuple{g21771(U)}
 @test @inferred(f21771(Val{Int}())) === Tuple{Int}
-@test @inferred(f21771(Val{Union{}}())) === Tuple{Union{}}
+@test_throws ErrorException @inferred(f21771(Val{Union{}}()))
 @test @inferred(f21771(Val{Integer}())) === Tuple{Integer}
 
 # PR #28284, check that constants propagate through calls to new
@@ -1217,16 +1167,10 @@ let typeargs = Tuple{Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int}
     @test only(Base.return_types(promote_type, typeargs)) === Type{Int}
 end
 
-# demonstrate that inference must converge
-# while doing constant propagation
-Base.@pure plus1(x) = x + 1
-f21933(x::Val{T}) where {T} = f(Val(plus1(T)))
-code_typed(f21933, (Val{1},))
-Base.return_types(f21933, (Val{1},))
-
 function count_specializations(method::Method)
     specs = method.specializations
-    n = count(i -> isassigned(specs, i), 1:length(specs))
+    specs isa Core.MethodInstance && return 1
+    n = count(!isnothing, specs::Core.SimpleVector)
     return n
 end
 
@@ -1241,7 +1185,7 @@ copy_dims_pair(out) = ()
 copy_dims_pair(out, dim::Int, tail...) =  copy_dims_pair(out => dim, tail...)
 copy_dims_pair(out, dim::Colon, tail...) = copy_dims_pair(out => dim, tail...)
 @test Base.return_types(copy_dims_pair, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}]
-@test all(m -> 5 < count_specializations(m) < 15, methods(copy_dims_pair)) # currently about 7
+@test all(m -> 3 < count_specializations(m) < 15, methods(copy_dims_pair)) # currently about 5
 
 # splatting an ::Any should still allow inference to use types of parameters preceding it
 f22364(::Int, ::Any...) = 0
@@ -1651,7 +1595,7 @@ gg13183(x::X...) where {X} = (_false13183 ? gg13183(x, x) : 0)
 let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     world = UInt(23) # some small-numbered world that should be valid
     interp = Core.Compiler.NativeInterpreter()
-    opt = Core.Compiler.OptimizationState(linfo, Core.Compiler.OptimizationParams(interp), interp)
+    opt = Core.Compiler.OptimizationState(linfo, interp)
     # make sure the state of the properties look reasonable
     @test opt.src !== linfo.def.source
     @test length(opt.src.slotflags) == linfo.def.nargs <= length(opt.src.slotnames)
@@ -1848,9 +1792,17 @@ bar_22708(x) = f_22708(x)
 
 @test bar_22708(1) == "x"
 
+struct EarlyGeneratedFunctionStub
+    stub::Core.GeneratedFunctionStub
+end
+(stub::EarlyGeneratedFunctionStub)(args...) = (@nospecialize; stub.stub(args...))
+
 # mechanism for spoofing work-limiting heuristics and early generator expansion (#24852)
-function _generated_stub(gen::Symbol, args::Vector{Any}, params::Vector{Any}, line, file, expand_early)
-    stub = Expr(:new, Core.GeneratedFunctionStub, gen, args, params, line, file, expand_early)
+function _generated_stub(gen::Symbol, args::Core.SimpleVector, params::Core.SimpleVector, expand_early::Bool)
+    stub = Expr(:new, Core.GeneratedFunctionStub, gen, args, params)
+    if expand_early
+        stub = Expr(:new, EarlyGeneratedFunctionStub, stub)
+    end
     return Expr(:meta, :generated, stub)
 end
 
@@ -1859,10 +1811,21 @@ f24852_kernel2(x, y::Tuple) = f24852_kernel1(x, (y,))
 f24852_kernel3(x, y::Tuple) = f24852_kernel2(x, (y,))
 f24852_kernel(x, y::Number) = f24852_kernel3(x, (y,))
 
-function f24852_kernel_cinfo(fsig::Type)
-    world = typemax(UInt) # FIXME
-    match = Base._methods_by_ftype(fsig, -1, world)[1]
-    isdefined(match.method, :source) || return (nothing, :(f(x, y)))
+function f24852_kernel_cinfo(world::UInt, source, fsig::Type)
+    matches = Base._methods_by_ftype(fsig, -1, world)
+    if matches === nothing || length(matches) != 1
+        match = nothing
+    else
+        match = matches[1]
+        if !isdefined(match.method, :source)
+            match = nothing
+        end
+    end
+    if match === nothing
+        code_info = :(f(x, y))
+        code_info = Core.GeneratedFunctionStub(identity, Core.svec(:self, :f, :x, :y), Core.svec(:X, :Y))(world, source, code_info)
+        return (nothing, code_info)
+    end
     code_info = Base.uncompressed_ir(match.method)
     Meta.partially_inline!(code_info.code, Any[], match.spec_types, Any[match.sparams...], 1, 0, :propagate)
     if startswith(String(match.method.name), "f24852")
@@ -1877,21 +1840,23 @@ function f24852_kernel_cinfo(fsig::Type)
     end
     pushfirst!(code_info.slotnames, Symbol("#self#"))
     pushfirst!(code_info.slotflags, 0x00)
+    # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+    # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return match.method, code_info
 end
 
-function f24852_gen_cinfo_uninflated(X, Y, _, f, x, y)
-    _, code_info = f24852_kernel_cinfo(Tuple{f, x, y})
+function f24852_gen_cinfo_uninflated(world::UInt, source, X, Y, _, f, x, y)
+    _, code_info = f24852_kernel_cinfo(world, source, Tuple{f, x, y})
     return code_info
 end
 
-function f24852_gen_cinfo_inflated(X, Y, _, f, x, y)
-    method, code_info = f24852_kernel_cinfo(Tuple{f, x, y})
+function f24852_gen_cinfo_inflated(world::UInt, source, X, Y, _, f, x, y)
+    method, code_info = f24852_kernel_cinfo(world, source, Tuple{f, x, y})
     code_info.method_for_inference_limit_heuristics = method
     return code_info
 end
 
-function f24852_gen_expr(X, Y, _, f, x, y) # deparse f(x::X, y::Y) where {X, Y}
+function f24852_gen_expr(X, Y, _, f, x, y) # deparse of f(x::X, y::Y) where {X, Y}
     if f === typeof(f24852_kernel)
         f2 = :f24852_kernel3
     elseif f === typeof(f24852_kernel3)
@@ -1908,20 +1873,8 @@ end
 
 @eval begin
     function f24852_late_expr(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_expr, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), false))
-        $(Expr(:meta, :generated_only))
-        #= no body =#
-    end
-    function f24852_late_inflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_inflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), false))
-        $(Expr(:meta, :generated_only))
-        #= no body =#
-    end
-    function f24852_late_uninflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_uninflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), false))
+        $(_generated_stub(:f24852_gen_expr, Core.svec(:self, :f, :x, :y),
+                          Core.svec(:X, :Y), false))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
@@ -1929,20 +1882,18 @@ end
 
 @eval begin
     function f24852_early_expr(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_expr, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), true))
+        $(_generated_stub(:f24852_gen_expr, Core.svec(:self, :f, :x, :y),
+                          Core.svec(:X, :Y), true))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
     function f24852_early_inflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_inflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), true))
+        $(Expr(:meta, :generated, f24852_gen_cinfo_inflated))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
     function f24852_early_uninflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_uninflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), true))
+        $(Expr(:meta, :generated, f24852_gen_cinfo_uninflated))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
@@ -1953,10 +1904,6 @@ result = f24852_kernel(x, y)
 
 @test result === f24852_late_expr(f24852_kernel, x, y)
 @test Base.return_types(f24852_late_expr, typeof((f24852_kernel, x, y))) == Any[Any]
-@test result === f24852_late_uninflated(f24852_kernel, x, y)
-@test Base.return_types(f24852_late_uninflated, typeof((f24852_kernel, x, y))) == Any[Any]
-@test result === f24852_late_uninflated(f24852_kernel, x, y)
-@test Base.return_types(f24852_late_uninflated, typeof((f24852_kernel, x, y))) == Any[Any]
 
 @test result === f24852_early_expr(f24852_kernel, x, y)
 @test Base.return_types(f24852_early_expr, typeof((f24852_kernel, x, y))) == Any[Any]
@@ -1964,7 +1911,6 @@ result = f24852_kernel(x, y)
 @test Base.return_types(f24852_early_uninflated, typeof((f24852_kernel, x, y))) == Any[Any]
 @test result === @inferred f24852_early_inflated(f24852_kernel, x, y)
 @test Base.return_types(f24852_early_inflated, typeof((f24852_kernel, x, y))) == Any[Float64]
-
 # TODO: test that `expand_early = true` + inflated `method_for_inference_limit_heuristics`
 # can be used to tighten up some inference result.
 
@@ -1998,7 +1944,7 @@ let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
     end
     foundslot = false
     for expr25261 in opt25261[i:end]
-        if expr25261 isa TypedSlot && expr25261.typ === Tuple{Int, Int}
+        if expr25261 isa Core.Compiler.TypedSlot && expr25261.typ === Tuple{Int, Int}
             # This should be the assignment to the SSAValue into the getfield
             # call - make sure it's a TypedSlot
             foundslot = true
@@ -2213,45 +2159,6 @@ end
 # =========================
 # `MustAlias` propagates constraints imposed on aliased fields
 
-import Core: MethodInstance, CodeInstance
-const CC = Core.Compiler
-import .CC: WorldRange, WorldView
-
-"""
-    @newinterp NewInterpreter
-
-Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
-from the native code cache, satisfying the minimum interface requirements.
-"""
-macro newinterp(name)
-    cachename = Symbol(string(name, "Cache"))
-    name = esc(name)
-    quote
-        struct $cachename
-            dict::IdDict{MethodInstance,CodeInstance}
-        end
-        struct $name <: CC.AbstractInterpreter
-            interp::CC.NativeInterpreter
-            cache::$cachename
-            meta # additional information
-            $name(world = Base.get_world_counter();
-                interp = CC.NativeInterpreter(world),
-                cache = $cachename(IdDict{MethodInstance,CodeInstance}()),
-                meta = nothing,
-                ) = new(interp, cache, meta)
-        end
-        CC.InferenceParams(interp::$name) = CC.InferenceParams(interp.interp)
-        CC.OptimizationParams(interp::$name) = CC.OptimizationParams(interp.interp)
-        CC.get_world_counter(interp::$name) = CC.get_world_counter(interp.interp)
-        CC.get_inference_cache(interp::$name) = CC.get_inference_cache(interp.interp)
-        CC.code_cache(interp::$name) = WorldView(interp.cache, WorldRange(CC.get_world_counter(interp)))
-        CC.get(wvc::WorldView{<:$cachename}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-        CC.getindex(wvc::WorldView{<:$cachename}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-        CC.haskey(wvc::WorldView{<:$cachename}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-        CC.setindex!(wvc::WorldView{<:$cachename}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
-    end
-end
-
 struct AliasableField{T}
     f::T
 end
@@ -2264,18 +2171,20 @@ mutable struct AliasableConstField{S,T}
     f2::T
 end
 
-# lattice
-# -------
-
 import Core.Compiler:
-    AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice, OptimizerLattice,
-    MustAliasesLattice, InterMustAliasesLattice, BaseInferenceLattice, IPOResultLattice,
-    typeinf_lattice, ipo_lattice, optimizer_lattice
+    InferenceLattice, OptimizerLattice, MustAliasesLattice, InterMustAliasesLattice,
+    BaseInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice
 
+include("newinterp.jl")
 @newinterp MustAliasInterpreter
-CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
-CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
-CC.optimizer_lattice(::MustAliasInterpreter) = OptimizerLattice()
+let CC = Core.Compiler
+    CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
+    CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
+    CC.optimizer_lattice(::MustAliasInterpreter) = OptimizerLattice()
+end
+
+# lattice
+# -------
 
 import Core.Compiler: MustAlias, Const, PartialStruct, ⊑, tmerge
 let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
@@ -2741,6 +2650,37 @@ end |> only === Int
 @test only(Base.return_types(Core.apply_type, Tuple{Any})) == Any
 @test only(Base.return_types(Core.apply_type, Tuple{Any,Any})) == Any
 
+# `apply_type_tfunc` accuracy for constrained type construction
+# https://github.com/JuliaLang/julia/issues/47089
+import Core: Const
+import Core.Compiler: apply_type_tfunc
+struct Issue47089{A<:Number,B<:Number} end
+let 𝕃 = Core.Compiler.fallback_lattice
+    A = Type{<:Integer}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), A, A) <: (Type{Issue47089{A,B}} where {A<:Integer, B<:Integer})
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Const(Int), Const(Int), Const(Int)) === Union{}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Const(String)) === Union{}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Const(AbstractString)) === Union{}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Type{Ptr}, Type{Ptr{T}} where T) === Base.rewrap_unionall(Type{Issue47089.body.body}, Issue47089)
+    # check complexity size limiting
+    @test apply_type_tfunc(𝕃, Const(Val), Type{Pair{Pair{Pair{Pair{A,B},C},D},E}} where {A,B,C,D,E}) == Type{Val{Pair{A, B}}} where {A, B}
+    @test apply_type_tfunc(𝕃, Const(Pair), Base.rewrap_unionall(Type{Pair.body.body},Pair), Type{Pair{Pair{Pair{Pair{A,B},C},D},E}} where {A,B,C,D,E}) == Type{Pair{Pair{A, B}, Pair{C, D}}} where {A, B, C, D}
+    @test apply_type_tfunc(𝕃, Const(Val), Type{Union{Int,Pair{Pair{Pair{Pair{A,B},C},D},E}}} where {A,B,C,D,E}) == Type{Val{_A}} where _A
+end
+@test only(Base.return_types(keys, (Dict{String},))) == Base.KeySet{String, T} where T<:(Dict{String})
+@test only(Base.return_types((r)->similar(Array{typeof(r[])}, 1), (Base.RefValue{Array{Int}},))) == Vector{<:Array{Int}}
+@test only(Base.return_types((r)->similar(Array{typeof(r[])}, 1), (Base.RefValue{Array{<:Real}},))) == Vector{<:Array{<:Real}}
+# test complexity limit on apply_type on a function capturing functions returning functions
+@test only(Base.return_types(Base.afoldl, (typeof((m, n) -> () -> Returns(nothing)(m, n)), Function, Function, Vararg{Function}))) === Function
+
+let A = Tuple{A,B,C,D,E,F,G,H} where {A,B,C,D,E,F,G,H}
+    B = Core.Compiler.rename_unionall(A)
+    for i in 1:8
+        @test A.var != B.var && (i == 1 ? A == B : A != B)
+        A, B = A.body, B.body
+    end
+end
+
 # PR 27351, make sure optimized type intersection for method invalidation handles typevars
 
 abstract type AbstractT27351 end
@@ -3021,11 +2961,11 @@ end
 # issue #28356
 # unit test to make sure countunionsplit overflows gracefully
 # we don't care what number is returned as long as it's large
-@test Core.Compiler.unionsplitcost(Any[Union{Int32, Int64} for i=1:80]) > 100000
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}]) == 2
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int32, Int64} for i=1:80]) > 100000
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}]) == 2
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
 
 # make sure compiler doesn't hang in union splitting
 
@@ -3690,10 +3630,10 @@ Base.iterate(::Itr41839_3 , i) = i < 16 ? (i, i + 1) : nothing
 
 # issue #32699
 f32699(a) = (id = a[1],).id
-@test Base.return_types(f32699, (Vector{Union{Int,Missing}},)) == Any[Union{Int,Missing}]
+@test only(Base.return_types(f32699, (Vector{Union{Int,Missing}},))) == Union{Int,Missing}
 g32699(a) = Tuple{a}
-@test Base.return_types(g32699, (Type{<:Integer},))[1] == Type{<:Tuple{Any}}
-@test Base.return_types(g32699, (Type,))[1] == Type{<:Tuple}
+@test only(Base.return_types(g32699, (Type{<:Integer},))) <: Type{<:Tuple{Any}}
+@test only(Base.return_types(g32699, (Type,))) <: Type{<:Tuple}
 
 # Inference precision of union-split calls
 function f_apply_union_split(fs, x)
@@ -3940,23 +3880,49 @@ g38888() = S38888(Base.inferencebarrier(3), nothing)
 f_inf_error_bottom(x::Vector) = isempty(x) ? error(x[1]) : x
 @test only(Base.return_types(f_inf_error_bottom, Tuple{Vector{Any}})) == Vector{Any}
 
-# @constprop :aggressive
-@noinline g_nonaggressive(y, x) = Val{x}()
-@noinline Base.@constprop :aggressive g_aggressive(y, x) = Val{x}()
-
-f_nonaggressive(x) = g_nonaggressive(x, 1)
-f_aggressive(x) = g_aggressive(x, 1)
-
-# The first test just makes sure that improvements to the compiler don't
-# render the annotation effectless.
-@test Base.return_types(f_nonaggressive, Tuple{Int})[1] == Val
-@test Base.return_types(f_aggressive, Tuple{Int})[1] == Val{1}
-
-# @constprop :none
-@noinline Base.@constprop :none g_noaggressive(flag::Bool) = flag ? 1 : 1.0
-ftrue_noaggressive() = g_noaggressive(true)
-@test only(Base.return_types(ftrue_noaggressive, Tuple{})) == Union{Int,Float64}
-
+# @constprop annotation
+@noinline f_constprop_simple(f, x) = (f(x); Val{x}())
+Base.@constprop :aggressive f_constprop_aggressive(f, x) = (f(x); Val{x}())
+Base.@constprop :aggressive @noinline f_constprop_aggressive_noinline(f, x) = (f(x); Val{x}())
+Base.@constprop :none f_constprop_none(f, x) = (f(x); Val{x}())
+Base.@constprop :none @inline f_constprop_none_inline(f, x) = (f(x); Val{x}())
+
+@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_simple)))
+@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_simple)))
+@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive)))
+@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive)))
+@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none)))
+@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none)))
+@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none_inline)))
+@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none_inline)))
+
+# make sure that improvements to the compiler don't render the annotation effectless.
+@test Base.return_types((Function,)) do f
+    f_constprop_simple(f, 1)
+end |> only == Val
+@test Base.return_types((Function,)) do f
+    f_constprop_aggressive(f, 1)
+end |> only == Val{1}
+@test Base.return_types((Function,)) do f
+    f_constprop_aggressive_noinline(f, 1)
+end |> only == Val{1}
+@test Base.return_types((Function,)) do f
+    f_constprop_none(f, 1)
+end |> only == Val
+@test Base.return_types((Function,)) do f
+    f_constprop_none_inline(f, 1)
+end |> only == Val
+
+# anonymous function support for `@constprop`
+@test Base.return_types((Function,)) do f
+    map((1,2,3)) do x
+        Base.@constprop :aggressive
+        f(x)
+        return Val{x}()
+    end
+end |> only == Tuple{Val{1},Val{2},Val{3}}
 
 function splat_lotta_unions()
     a = Union{Tuple{Int},Tuple{String,Vararg{Int}},Tuple{Int,Vararg{Int}}}[(2,)][1]
@@ -4000,13 +3966,13 @@ end
 
     # argtypes
     let
-        tunion = Core.Compiler.switchtupleunion(Any[Union{Int32,Int64}, Core.Const(nothing)])
+        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Core.Const(nothing)])
         @test length(tunion) == 2
         @test Any[Int32, Core.Const(nothing)] in tunion
         @test Any[Int64, Core.Const(nothing)] in tunion
     end
     let
-        tunion = Core.Compiler.switchtupleunion(Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
+        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
         @test length(tunion) == 4
         @test Any[Int32, Float32, Core.Const(nothing)] in tunion
         @test Any[Int32, Float64, Core.Const(nothing)] in tunion
@@ -4139,26 +4105,24 @@ function f_convert_me_to_ir(b, x)
     return a
 end
 
-let
-    # Test the presence of PhiNodes in lowered IR by taking the above function,
+let # Test the presence of PhiNodes in lowered IR by taking the above function,
     # running it through SSA conversion and then putting it into an opaque
     # closure.
     mi = Core.Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
         Tuple{Bool, Float64}, Core.svec())
     ci = Base.uncompressed_ast(mi.def)
+    ci.slottypes = Any[ Any for i = 1:length(ci.slotflags) ]
     ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
-    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.OptimizationParams(),
-        Core.Compiler.NativeInterpreter())
+    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.NativeInterpreter())
     ir = Core.Compiler.convert_to_ircode(ci, sv)
     ir = Core.Compiler.slot2reg(ir, ci, sv)
     ir = Core.Compiler.compact!(ir)
-    Core.Compiler.replace_code_newstyle!(ci, ir, 4)
-    ci.ssavaluetypes = length(ci.code)
+    Core.Compiler.replace_code_newstyle!(ci, ir)
+    ci.ssavaluetypes = length(ci.ssavaluetypes)
     @test any(x->isa(x, Core.PhiNode), ci.code)
     oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(b, 1.0)
     @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
-
     oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(true, 1.0)
     @test Base.return_types(oc, Tuple{}) == Any[Float64]
@@ -4440,18 +4404,18 @@ end
 
     init = Base.ImmutableDict{Number,Number}()
     a = Const(init)
-    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
+    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
     c = Core.Compiler.tmerge(a, b)
     @test ⊑(a, c) && ⊑(b, c)
     @test c === typeof(init)
 
-    a = Core.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+    a = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
     c = Core.Compiler.tmerge(a, b)
     @test ⊑(a, c) && ⊑(b, c)
     @test c.fields[2] === Any # or Number
     @test c.fields[3] === ComplexF64
 
-    b = Core.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
     c = Core.Compiler.tmerge(a, b)
     @test ⊑(a, c)
     @test ⊑(b, c)
@@ -4493,13 +4457,24 @@ end
     Core.Compiler.return_type(+, NTuple{2, Rational})
 end == Rational
 
-# vararg-tuple comparison within `PartialStruct`
+# vararg-tuple comparison within `Compiler.PartialStruct`
 # https://github.com/JuliaLang/julia/issues/44965
 let 𝕃ᵢ = Core.Compiler.fallback_lattice
-    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Core.Const(42), Vararg{Any}])
+    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Any}])
     @test Core.Compiler.issimplertype(𝕃ᵢ, t, t)
+
+    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Union{}}])
+    @test t === Const((42,))
+    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Int, Vararg{Union{}}])
+    @test t.typ === Tuple{Int, Int}
+    @test t.fields == Any[Const(42), Int]
 end
 
+foo_empty_vararg(i...) = i[2]
+bar_empty_vararg(i) = foo_empty_vararg(10, 20, 30, i...)
+@test bar_empty_vararg(Union{}[]) === 20
+
+
 # check the inference convergence with an empty vartable:
 # the inference state for the toplevel chunk below will have an empty vartable,
 # and so we may fail to terminate (or optimize) it if we don't update vartables correctly
@@ -4673,8 +4648,8 @@ end |> only === Type{Float64}
 global it_count47688 = 0
 struct CountsIterate47688{N}; end
 function Base.iterate(::CountsIterate47688{N}, n=0) where N
-	global it_count47688 += 1
-	n <= N ? (n, n+1) : nothing
+    global it_count47688 += 1
+    n <= N ? (n, n+1) : nothing
 end
 foo47688() = tuple(CountsIterate47688{5}()...)
 bar47688() = foo47688()
@@ -4714,7 +4689,218 @@ type_level_recurse_entry() = Val{type_level_recurse1(1)}()
 f_no_bail_effects_any(x::Any) = x
 f_no_bail_effects_any(x::NamedTuple{(:x,), Tuple{Any}}) = getfield(x, 1)
 g_no_bail_effects_any(x::Any) = f_no_bail_effects_any(x)
-@test Core.Compiler.is_total(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
+@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
 
 # issue #48374
 @test (() -> Union{<:Nothing})() == Nothing
+
+# :static_parameter accuracy
+unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = @isdefined(T) ? T::Type : nothing
+unknown_sparam_nothrow1(x::Ref{T}) where T = @isdefined(T) ? T::Type : nothing
+unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = @isdefined(T) ? T::Type : nothing
+@test only(Base.return_types(unknown_sparam_throw, (Type{Int},))) == Type{Int}
+@test only(Base.return_types(unknown_sparam_throw, (Type{<:Integer},))) == Type{<:Integer}
+@test only(Base.return_types(unknown_sparam_throw, (Type,))) == Union{Nothing, Type}
+@test_broken only(Base.return_types(unknown_sparam_throw, (Nothing,))) === Nothing
+@test_broken only(Base.return_types(unknown_sparam_throw, (Union{Type{Int},Nothing},))) === Union{Nothing,Type{Int}}
+@test only(Base.return_types(unknown_sparam_throw, (Any,))) === Union{Nothing,Type}
+@test only(Base.return_types(unknown_sparam_nothrow1, (Ref,))) === Type
+@test only(Base.return_types(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,))) === Type
+
+struct Issue49027{Ty<:Number}
+    x::Ty
+end
+function issue49027(::Type{<:Issue49027{Ty}}) where Ty
+    if @isdefined Ty # should be false when `Ty` is given as a free type var.
+        return Ty::DataType
+    end
+    return nothing
+end
+@test only(Base.return_types(issue49027, (Type{Issue49027{TypeVar(:Ty)}},))) >: Nothing
+@test isnothing(issue49027(Issue49027{TypeVar(:Ty)}))
+function issue49027_integer(::Type{<:Issue49027{Ty}}) where Ty<:Integer
+    if @isdefined Ty # should be false when `Ty` is given as a free type var.
+        return Ty::DataType
+    end
+    nothing
+end
+@test only(Base.return_types(issue49027_integer, (Type{Issue49027{TypeVar(:Ty,Int)}},))) >: Nothing
+@test isnothing(issue49027_integer(Issue49027{TypeVar(:Ty,Int)}))
+
+function fapplicable end
+gapplicable() = Val(applicable(fapplicable))
+gapplicable(x) = Val(applicable(fapplicable; x))
+@test only(Base.return_types(gapplicable, ())) === Val{false}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{false}
+fapplicable() = 1
+@test only(Base.return_types(gapplicable, ())) === Val{true}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{false}
+Base.delete_method(which(fapplicable, ()))
+@test only(Base.return_types(gapplicable, ())) === Val{false}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{false}
+fapplicable(; x) = x
+@test only(Base.return_types(gapplicable, ())) === Val{true}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{true}
+@test only(Base.return_types(()) do; applicable(); end) === Union{}
+@test only(Base.return_types((Any,)) do x; Val(applicable(x...)); end) == Val
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(+, 1, 2, x...)); end) == Val # could be improved to Val{true}
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(+, 1, 2, 3, x...)); end) === Val{true}
+@test only(Base.return_types((Int,)) do x; Val(applicable(+, 1, x)); end) === Val{true}
+@test only(Base.return_types((Union{Int32,Int64},)) do x; Val(applicable(+, 1, x)); end) === Val{true}
+@test only(Base.return_types((String,)) do x; Val(applicable(+, 1, x)); end) === Val{false}
+fapplicable(::Int, ::Integer) = 2
+fapplicable(::Integer, ::Int32) = 3
+@test only(Base.return_types((Int32,)) do x; Val(applicable(fapplicable, 1, x)); end) === Val{false}
+@test only(Base.return_types((Int64,)) do x; Val(applicable(fapplicable, 1, x)); end) === Val{true}
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(tuple, x...)); end) === Val{true}
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(sin, 1, x...)); end) == Val
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(sin, 1, 2, x...)); end) === Val{false}
+
+function fhasmethod end
+ghasmethod() = Val(hasmethod(fhasmethod, Tuple{}))
+@test only(Base.return_types(ghasmethod, ())) === Val{false}
+fhasmethod() = 1
+@test only(Base.return_types(ghasmethod, ())) === Val{true}
+Base.delete_method(which(fhasmethod, ()))
+@test only(Base.return_types(ghasmethod, ())) === Val{false}
+@test only(Base.return_types(()) do; Core._hasmethod(); end) === Any
+@test only(Base.return_types(()) do; Core._hasmethod(+, Tuple, 1); end) === Any
+@test only(Base.return_types(()) do; Core._hasmethod(+, 1); end) === Bool
+@test only(Base.return_types(()) do; Core._hasmethod(+, Tuple{1}); end) === Bool
+@test only(Base.return_types((Any,)) do x; Val(hasmethod(x...)); end) == Val
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int, Vararg{Int}})); end) === Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int, Int, Vararg{Int}})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Union{Int32,Int64}})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Union{Int,String}})); end) === Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Any})); end) === Val{false}
+@test only(Base.return_types() do; Val(hasmethod(+, Tuple{Int, String})); end) === Val{false}
+fhasmethod(::Int, ::Integer) = 2
+fhasmethod(::Integer, ::Int32) = 3
+@test only(Base.return_types(()) do; Val(hasmethod(fhasmethod, Tuple{Int, Int32})); end) === Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(fhasmethod, Tuple{Int, Int64})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(tuple, Tuple{Vararg{Int}})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(sin, Tuple{Int, Vararg{Int}})); end) == Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(sin, Tuple{Int, Int, Vararg{Int}})); end) === Val{false}
+
+# interprocedural call inference from irinterp
+@noinline Base.@assume_effects :total issue48679_unknown_any(x) = Base.inferencebarrier(x)
+
+@noinline _issue48679(y::Union{Nothing,T}) where {T} = T::Type
+Base.@constprop :aggressive function issue48679(x, b)
+    if b
+        x = issue48679_unknown_any(x)
+    end
+    return _issue48679(x)
+end
+@test Base.return_types((Float64,)) do x
+    issue48679(x, false)
+end |> only == Type{Float64}
+
+Base.@constprop :aggressive @noinline _issue48679_const(b, y::Union{Nothing,T}) where {T} = b ? nothing : T::Type
+Base.@constprop :aggressive function issue48679_const(x, b)
+    if b
+        x = issue48679_unknown_any(x)
+    end
+    return _issue48679_const(b, x)
+end
+@test Base.return_types((Float64,)) do x
+    issue48679_const(x, false)
+end |> only == Type{Float64}
+
+# `invoke` call in irinterp
+@noinline _irinterp_invoke(x::Any) = :any
+@noinline _irinterp_invoke(x::T) where T = T
+Base.@constprop :aggressive Base.@assume_effects :foldable function irinterp_invoke(x::T, b) where T
+    return @invoke _irinterp_invoke(x::(b ? T : Any))
+end
+@test Base.return_types((Int,)) do x
+    irinterp_invoke(x, true)
+end |> only == Type{Int}
+
+# recursion detection for semi-concrete interpretation
+# avoid direct infinite loop via `concrete_eval_invoke`
+Base.@assume_effects :foldable function recur_irinterp1(x, y)
+    if rand(Bool)
+        return x, y
+    end
+    return recur_irinterp1(x+1, y)
+end
+@test Base.return_types((Symbol,)) do y
+    recur_irinterp1(0, y)
+end |> only === Tuple{Int,Symbol}
+@test last(recur_irinterp1(0, :y)) === :y
+# avoid indirect infinite loop via `concrete_eval_invoke`
+Base.@assume_effects :foldable function recur_irinterp2(x, y)
+    if rand(Bool)
+        return x, y
+    end
+    return _recur_irinterp2(x+1, y)
+end
+Base.@assume_effects :foldable _recur_irinterp2(x, y) = @noinline recur_irinterp2(x, y)
+@test Base.return_types((Symbol,)) do y
+    recur_irinterp2(0, y)
+end |> only === Tuple{Int,Symbol}
+@test last(recur_irinterp2(0, :y)) === :y
+
+# test Conditional Union splitting of info derived from fieldtype (e.g. in abstract setproperty! handling)
+@test only(Base.return_types((Int, Pair{Int,Nothing}, Symbol)) do a, x, s
+    T = fieldtype(typeof(x), s)
+    if a isa T
+        throw(a)
+    else
+        return T
+    end
+end) == Type{Nothing}
+
+# Test that Core.Compiler.return_type inference works for the 1-arg version
+@test Base.return_types() do
+    Core.Compiler.return_type(Tuple{typeof(+), Int, Int})
+end |> only == Type{Int}
+
+# Test that NamedTuple abstract iteration works for PartialStruct/Const
+function nt_splat_const()
+    nt = (; x=1, y=2)
+    Val{tuple(nt...)[2]}()
+end
+@test @inferred(nt_splat_const()) == Val{2}()
+
+function nt_splat_partial(x::Int)
+    nt = (; x, y=2)
+    Val{tuple(nt...)[2]}()
+end
+@test @inferred(nt_splat_partial(42)) == Val{2}()
+
+# Test that irinterp refines based on discovered errors
+Base.@assume_effects :foldable Base.@constprop :aggressive function kill_error_edge(b1, b2, xs, x)
+    y = b1 ? "julia" : xs[]
+    if b2
+        a = length(y)
+    else
+        a = sin(y)
+    end
+    a + x
+end
+
+Base.@assume_effects :foldable Base.@constprop :aggressive function kill_error_edge(b1, b2, xs, ys, x)
+    y = b1 ? xs[] : ys[]
+    if b2
+        a = length(y)
+    else
+        a = sin(y)
+    end
+    a + x
+end
+
+let src = code_typed1((Bool,Base.RefValue{Any},Int,)) do b2, xs, x
+        kill_error_edge(true, b2, xs, x)
+    end
+    @test count(@nospecialize(x)->isa(x, Core.PhiNode), src.code) == 0
+end
+
+let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do b2, xs, ys, x
+        kill_error_edge(true, b2, xs, ys, x)
+    end
+    @test count(@nospecialize(x)->isa(x, Core.PhiNode), src.code) == 0
+end
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index cfcfc7228b3ed..7920212537608 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -4,7 +4,8 @@ using Test
 using Base.Meta
 using Core: ReturnNode
 
-include(normpath(@__DIR__, "irutils.jl"))
+include("irutils.jl")
+include("newinterp.jl")
 
 """
 Helper to walk the AST and call a function on every node.
@@ -28,7 +29,7 @@ function test_inlined_symbols(func, argtypes)
     ast = Expr(:block)
     ast.args = src.code
     walk(ast) do e
-        if isa(e, Core.Slot)
+        if isa(e, Core.SlotNumber)
             @test 1 <= e.id <= nl
         end
         if isa(e, Core.NewvarNode)
@@ -360,18 +361,6 @@ struct RealConstrained{T <: Real}; end
 @test !fully_eliminated(x->(RealConstrained{x}; nothing), Tuple{Int})
 @test !fully_eliminated(x->(RealConstrained{x}; nothing), Tuple{Type{Vector{T}} where T})
 
-# Check that pure functions with non-inlineable results still get deleted
-struct Big
-    x::NTuple{1024, Int}
-end
-Base.@pure Big() = Big(ntuple(identity, 1024))
-function pure_elim_full()
-    Big()
-    nothing
-end
-
-@test fully_eliminated(pure_elim_full, Tuple{})
-
 # Union splitting of convert
 f_convert_missing(x) = convert(Int64, x)
 let ci = code_typed(f_convert_missing, Tuple{Union{Int64, Missing}})[1][1],
@@ -628,8 +617,7 @@ let
         f42078(a)
     end
     let # make sure to discard the inferred source
-        specs = collect(only(methods(f42078)).specializations)
-        mi = specs[findfirst(!isnothing, specs)]::Core.MethodInstance
+        mi = only(methods(f42078)).specializations::Core.MethodInstance
         codeinf = getcache(mi)::Core.CodeInstance
         @atomic codeinf.inferred = nothing
     end
@@ -696,9 +684,9 @@ begin
 end
 
 # https://github.com/JuliaLang/julia/issues/42246
-@test mktempdir() do dir
+mktempdir() do dir
     cd(dir) do
-        code = quote
+        code = """
             issue42246() = @noinline IOBuffer("a")
             let
                 ci, rt = only(code_typed(issue42246))
@@ -711,9 +699,9 @@ end
                     exit(1)
                end
             end
-        end |> string
+            """
         cmd = `$(Base.julia_cmd()) --code-coverage=tmp.info -e $code`
-        success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr))
+        @test success(pipeline(cmd; stdout, stderr))
     end
 end
 
@@ -1121,7 +1109,7 @@ function f44200()
     x44200
 end
 let src = code_typed1(f44200)
-    @test count(x -> isa(x, Core.PiNode), src.code) == 0
+    @test_broken count(x -> isa(x, Core.PiNode), src.code) == 0
 end
 
 # Test that peeling off one case from (::Any) doesn't introduce
@@ -1766,8 +1754,7 @@ let interp = Core.Compiler.NativeInterpreter()
 
     # ok, now delete the callsite flag, and see the second inlining pass can inline the call
     @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
-    inlining = Core.Compiler.InliningState(Core.Compiler.OptimizationParams(interp), nothing,
-        Core.Compiler.get_world_counter(interp), interp)
+    inlining = Core.Compiler.InliningState(interp)
     ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
     @test count(isinvoke(:*), ir.stmts.inst) == 0
     @test count(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.inst) == 1
@@ -1935,3 +1922,121 @@ let res = @test_throws MethodError let
     err = res.value
     @test err.f === convert && err.args === (Union{Bool,Tuple{String,String}}, g48397)
 end
+
+# https://github.com/JuliaLang/julia/issues/49050
+abstract type Issue49050AbsTop{T,N} end
+abstract type Issue49050Abs1{T, N} <: Issue49050AbsTop{T,N} end
+abstract type Issue49050Abs2{T} <: Issue49050Abs1{T,3} end
+struct Issue49050Concrete{T} <: Issue49050Abs2{T}
+    x::T
+end
+issue49074(::Type{Issue49050AbsTop{T,N}}) where {T,N} = Issue49050AbsTop{T,N}
+Base.@assume_effects :foldable issue49074(::Type{C}) where {C<:Issue49050AbsTop} = issue49074(supertype(C))
+let src = code_typed1() do
+        issue49074(Issue49050Concrete)
+    end
+    @test any(isinvoke(:issue49074), src.code)
+end
+let result = @test_throws MethodError issue49074(Issue49050Concrete)
+    @test result.value.f === issue49074
+    @test result.value.args === (Any,)
+end
+
+# inlining of `TypeName`
+@test fully_eliminated() do
+    Ref.body.name
+end
+
+# Regression for finalizer inlining with more complex control flow
+global finalizer_escape::Int = 0
+mutable struct FinalizerEscapeTest
+    x::Int
+    function FinalizerEscapeTest()
+        this = new(0)
+        finalizer(this) do this
+            global finalizer_escape
+            finalizer_escape = this.x
+        end
+        return this
+    end
+end
+
+function run_finalizer_escape_test1(b1, b2)
+    x = FinalizerEscapeTest()
+    x.x = 1
+    if b1
+        x.x = 2
+    end
+    if b2
+        Base.donotdelete(b2)
+    end
+    x.x = 3
+    return nothing
+end
+
+function run_finalizer_escape_test2(b1, b2)
+    x = FinalizerEscapeTest()
+    x.x = 1
+    if b1
+        x.x = 2
+    end
+    x.x = 3
+    return nothing
+end
+
+for run_finalizer_escape_test in (run_finalizer_escape_test1, run_finalizer_escape_test2)
+    global finalizer_escape::Int = 0
+
+    let src = code_typed1(run_finalizer_escape_test, Tuple{Bool, Bool})
+        @test any(x->isexpr(x, :(=)), src.code)
+    end
+
+    let
+        run_finalizer_escape_test(true, true)
+        @test finalizer_escape == 3
+    end
+end
+
+# `compilesig_invokes` inlining option
+@newinterp NoCompileSigInvokes
+Core.Compiler.OptimizationParams(::NoCompileSigInvokes) =
+    Core.Compiler.OptimizationParams(; compilesig_invokes=false)
+@noinline no_compile_sig_invokes(@nospecialize x) = (x !== Any && !Base.has_free_typevars(x))
+# test the single dispatch candidate case
+let src = code_typed1((Type,)) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+    end == 1
+end
+let src = code_typed1((Type,); interp=NoCompileSigInvokes()) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Type}
+    end == 1
+end
+# test the union split case
+let src = code_typed1((Union{DataType,UnionAll},)) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+    end == 2
+end
+let src = code_typed1((Union{DataType,UnionAll},); interp=NoCompileSigInvokes()) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),DataType}
+    end == 1
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),UnionAll}
+    end == 1
+end
diff --git a/test/compiler/invalidation.jl b/test/compiler/invalidation.jl
new file mode 100644
index 0000000000000..20ab2483aa378
--- /dev/null
+++ b/test/compiler/invalidation.jl
@@ -0,0 +1,258 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# setup
+# -----
+
+include("irutils.jl")
+
+using Test
+const CC = Core.Compiler
+import Core: MethodInstance, CodeInstance
+import .CC: WorldRange, WorldView
+
+struct InvalidationTesterCache
+    dict::IdDict{MethodInstance,CodeInstance}
+end
+InvalidationTesterCache() = InvalidationTesterCache(IdDict{MethodInstance,CodeInstance}())
+
+const INVALIDATION_TESTER_CACHE = InvalidationTesterCache()
+
+struct InvalidationTester <: CC.AbstractInterpreter
+    callback!
+    world::UInt
+    inf_params::CC.InferenceParams
+    opt_params::CC.OptimizationParams
+    inf_cache::Vector{CC.InferenceResult}
+    code_cache::InvalidationTesterCache
+    function InvalidationTester(callback! = nothing;
+                                world::UInt = Base.get_world_counter(),
+                                inf_params::CC.InferenceParams = CC.InferenceParams(),
+                                opt_params::CC.OptimizationParams = CC.OptimizationParams(),
+                                inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
+                                code_cache::InvalidationTesterCache = INVALIDATION_TESTER_CACHE)
+        if callback! === nothing
+            callback! = function (replaced::MethodInstance)
+                # Core.println(replaced) # debug
+                delete!(code_cache.dict, replaced)
+            end
+        end
+        return new(callback!, world, inf_params, opt_params, inf_cache, code_cache)
+    end
+end
+
+struct InvalidationTesterCacheView
+    interp::InvalidationTester
+    dict::IdDict{MethodInstance,CodeInstance}
+end
+
+CC.InferenceParams(interp::InvalidationTester) = interp.inf_params
+CC.OptimizationParams(interp::InvalidationTester) = interp.opt_params
+CC.get_world_counter(interp::InvalidationTester) = interp.world
+CC.get_inference_cache(interp::InvalidationTester) = interp.inf_cache
+CC.code_cache(interp::InvalidationTester) = WorldView(InvalidationTesterCacheView(interp, interp.code_cache.dict), WorldRange(interp.world))
+CC.get(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
+CC.getindex(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
+CC.haskey(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
+function CC.setindex!(wvc::WorldView{InvalidationTesterCacheView}, ci::CodeInstance, mi::MethodInstance)
+    add_callback!(wvc.cache.interp.callback!, mi)
+    setindex!(wvc.cache.dict, ci, mi)
+end
+
+function add_callback!(@nospecialize(callback!), mi::MethodInstance)
+    callback = function (replaced::MethodInstance, max_world,
+                         seen::Base.IdSet{MethodInstance} = Base.IdSet{MethodInstance}())
+        push!(seen, replaced)
+        callback!(replaced)
+        if isdefined(replaced, :backedges)
+            for item in replaced.backedges
+                isa(item, MethodInstance) || continue # might be `Type` object representing an `invoke` signature
+                mi = item
+                mi in seen && continue # otherwise fail into an infinite loop
+                var"#self#"(mi, max_world, seen)
+            end
+        end
+        return nothing
+    end
+
+    if !isdefined(mi, :callbacks)
+        mi.callbacks = Any[callback]
+    else
+        callbacks = mi.callbacks::Vector{Any}
+        if !any(@nospecialize(cb)->cb===callback, callbacks)
+            push!(callbacks, callback)
+        end
+    end
+    return nothing
+end
+
+
+# basic functionality test
+# ------------------------
+
+basic_callee(x) = x
+basic_caller(x) = basic_callee(x)
+
+# run inference and check that cache exist
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Float64
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_callee
+end
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_caller
+end
+
+# this redefinition below should invalidate the cache
+basic_callee(x) = x, x
+@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_callee
+end
+@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_caller
+end
+
+# re-run inference and check the result is updated (and new cache exists)
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Tuple{Float64,Float64}
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_callee
+end
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_caller
+end
+
+# backedge optimization
+# ---------------------
+
+const GLOBAL_BUFFER = IOBuffer()
+
+# test backedge optimization when the callee's type and effects information are maximized
+begin take!(GLOBAL_BUFFER)
+
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller(x) = pr48932_callee(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee, (Any,))
+        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller(x)
+        end |> only
+        @test rt === Any
+        @test any(iscall((src, pr48932_callee)), src.code)
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller
+    end
+    @test 42 == pr48932_caller(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee` to `pr48932_caller`:
+    # this redefinition below should invalidate the cache of `pr48932_callee` but not that of `pr48932_caller`
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); nothing)
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller
+    end
+    @test isnothing(pr48932_caller(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
+
+# we can avoid adding backedge even if the callee's return type is not the top
+# when the return value is not used within the caller
+begin take!(GLOBAL_BUFFER)
+
+    pr48932_callee_inferrable(x) = (print(GLOBAL_BUFFER, x); nothing)
+    pr48932_caller_unuse(x) = (pr48932_callee_inferrable(Base.inferencebarrier(x)); nothing)
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inferrable, (Any,)))
+        @test rt === Nothing
+        effects = Base.infer_effects(pr48932_callee_inferrable, (Any,))
+        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`:
+    # we don't need to add backedge to `pr48932_callee` from `pr48932_caller`
+    # since the inference result of `pr48932_callee` is maximized and it's not inlined
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_unuse(x)
+        end |> only
+        @test rt === Nothing
+        @test any(iscall((src, pr48932_callee_inferrable)), src.code)
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inferrable
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_unuse
+    end
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee_inferrable` to `pr48932_caller_unuse`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inferrable` but not that of `pr48932_caller_unuse`
+    pr48932_callee_inferrable(x) = (print(GLOBAL_BUFFER, "foo"); x)
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inferrable
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_unuse
+    end
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "foo" == String(take!(GLOBAL_BUFFER))
+end
+
+# we need to add backedge when the callee is inlined
+begin take!(GLOBAL_BUFFER)
+
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller_inlined(x) = pr48932_callee_inlined(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inlined, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee_inlined, (Any,))
+        @test Core.Compiler.Effects(effects; noinbounds=false) == Core.Compiler.Effects()
+    end
+
+    # run inference on `pr48932_caller_inlined` and `pr48932_callee_inlined`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_inlined(x)
+        end |> only
+        @test rt === Any
+        @test any(isinvoke(:pr48932_callee_inlined), src.code)
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inlined
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_inlined
+    end
+    @test 42 == pr48932_caller_inlined(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we added the backedge from `pr48932_callee_inlined` to `pr48932_caller_inlined`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inlined` but not that of `pr48932_caller_inlined`
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); nothing)
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inlined
+    end
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_inlined
+    end
+    @test isnothing(pr48932_caller_inlined(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index bc2cb0d3507f3..c704a8cf1c434 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -708,9 +708,10 @@ let m = Meta.@lower 1 + 1
         Any
     ]
     nstmts = length(src.code)
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any])
+    src.codelocs = fill(one(Int32), nstmts)
+    src.ssaflags = fill(one(Int32), nstmts)
+    src.slotflags = fill(zero(UInt8), 3)
+    ir = Core.Compiler.inflate_ir(src)
     @test Core.Compiler.verify_ir(ir) === nothing
     ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
     @test Core.Compiler.verify_ir(ir) === nothing
@@ -1229,3 +1230,18 @@ let src = code_typed1(named_tuple_elim, Tuple{Symbol, Tuple})
           count(iscall((src, Core._svec_ref)), src.code) == 0 &&
           count(iscall(x->!isa(argextype(x, src).val, Core.Builtin)), src.code) == 0
 end
+
+# Test that sroa works if the struct type is a PartialStruct
+mutable struct OneConstField
+    const a::Int
+    b::Int
+end
+
+@eval function one_const_field_partial()
+    # Use explicit :new here to avoid inlining messing with the type
+    strct = $(Expr(:new, OneConstField, 1, 2))
+    strct.b = 4
+    strct.b = 5
+    return strct.b
+end
+@test fully_eliminated(one_const_field_partial; retval=5)
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
index ef8fe3efbb315..95ac0d555ef88 100644
--- a/test/compiler/irutils.jl
+++ b/test/compiler/irutils.jl
@@ -1,8 +1,8 @@
 import Core: CodeInfo, ReturnNode, MethodInstance
-import Core.Compiler: IRCode, IncrementalCompact, argextype, singleton_type
+import Core.Compiler: IRCode, IncrementalCompact, VarState, argextype, singleton_type
 import Base.Meta: isexpr
 
-argextype(@nospecialize args...) = argextype(args..., Any[])
+argextype(@nospecialize args...) = argextype(args..., VarState[])
 code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
 get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
 
diff --git a/test/compiler/newinterp.jl b/test/compiler/newinterp.jl
new file mode 100644
index 0000000000000..56a68f2a09545
--- /dev/null
+++ b/test/compiler/newinterp.jl
@@ -0,0 +1,45 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    @newinterp NewInterpreter
+
+Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
+from the native code cache, satisfying the minimum interface requirements.
+"""
+macro newinterp(InterpName)
+    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
+    InterpName = esc(InterpName)
+    C = Core
+    CC = Core.Compiler
+    quote
+        struct $InterpCacheName
+            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
+        end
+        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
+        struct $InterpName <: $CC.AbstractInterpreter
+            meta # additional information
+            world::UInt
+            inf_params::$CC.InferenceParams
+            opt_params::$CC.OptimizationParams
+            inf_cache::Vector{$CC.InferenceResult}
+            code_cache::$InterpCacheName
+            function $InterpName(meta = nothing;
+                                 world::UInt = Base.get_world_counter(),
+                                 inf_params::$CC.InferenceParams = $CC.InferenceParams(),
+                                 opt_params::$CC.OptimizationParams = $CC.OptimizationParams(),
+                                 inf_cache::Vector{$CC.InferenceResult} = $CC.InferenceResult[],
+                                 code_cache::$InterpCacheName = $InterpCacheName())
+                return new(meta, world, inf_params, opt_params, inf_cache, code_cache)
+            end
+        end
+        $CC.InferenceParams(interp::$InterpName) = interp.inf_params
+        $CC.OptimizationParams(interp::$InterpName) = interp.opt_params
+        $CC.get_world_counter(interp::$InterpName) = interp.world
+        $CC.get_inference_cache(interp::$InterpName) = interp.inf_cache
+        $CC.code_cache(interp::$InterpName) = $CC.WorldView(interp.code_cache, $CC.WorldRange(interp.world))
+        $CC.get(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance, default) = get(wvc.cache.dict, mi, default)
+        $CC.getindex(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = getindex(wvc.cache.dict, mi)
+        $CC.haskey(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = haskey(wvc.cache.dict, mi)
+        $CC.setindex!(wvc::$CC.WorldView{$InterpCacheName}, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+    end
+end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 6f485a7c781ec..43f17d4ad69f2 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -117,9 +117,9 @@ let cfg = CFG(BasicBlock[
     make_bb([2, 3]    , []    ),
 ], Int[])
     insts = Compiler.InstructionStream([], [], Any[], Int32[], UInt8[])
-    code = Compiler.IRCode(insts, cfg, LineInfoNode[], [], Expr[], [])
-    compact = Compiler.IncrementalCompact(code, true)
-    @test length(compact.result_bbs) == 4 && 0 in compact.result_bbs[3].preds
+    ir = Compiler.IRCode(insts, cfg, Core.LineInfoNode[], Any[], Expr[], Compiler.VarState[])
+    compact = Compiler.IncrementalCompact(ir, true)
+    @test length(compact.cfg_transform.result_bbs) == 4 && 0 in compact.cfg_transform.result_bbs[3].preds
 end
 
 # Issue #32579 - Optimizer bug involving type constraints
@@ -321,8 +321,8 @@ end
 f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
 @test_throws TypeError f_if_typecheck()
 
-@test let # https://github.com/JuliaLang/julia/issues/42258
-    code = quote
+let # https://github.com/JuliaLang/julia/issues/42258
+    code = """
         function foo()
             a = @noinline rand(rand(0:10))
             if isempty(a)
@@ -335,10 +335,11 @@ f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
         code_typed(foo; optimize=true)
 
         code_typed(Core.Compiler.setindex!, (Core.Compiler.UseRef,Core.Compiler.NewSSAValue); optimize=true)
-    end |> string
+        """
     cmd = `$(Base.julia_cmd()) -g 2 -e $code`
-    stderr = IOBuffer()
-    success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr)) && isempty(String(take!(stderr)))
+    stderr = Base.BufferStream()
+    @test success(pipeline(Cmd(cmd); stdout, stderr))
+    @test readchomp(stderr) == ""
 end
 
 @testset "code_ircode" begin
@@ -358,6 +359,25 @@ end
     @test first(only(Base.code_ircode(demo; optimize_until = "SROA"))) isa Compiler.IRCode
 end
 
+# slots after SSA conversion
+function f_with_slots(a, b)
+    # `c` and `d` are local variables
+    c = a + b
+    d = c > 0
+    return (c, d)
+end
+let # #self#, a, b, c, d
+    unopt = code_typed1(f_with_slots, (Int,Int); optimize=false)
+    @test length(unopt.slotnames) == length(unopt.slotflags) == length(unopt.slottypes) == 5
+    ir_withslots = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="convert")))
+    @test length(ir_withslots.argtypes) == 5
+    # #self#, a, b
+    opt = code_typed1(f_with_slots, (Int,Int); optimize=true)
+    @test length(opt.slotnames) == length(opt.slotflags) == length(opt.slottypes) == 3
+    ir_ssa = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="slot2reg")))
+    @test length(ir_ssa.argtypes) == 3
+end
+
 let
     function test_useref(stmt, v, op)
         if isa(stmt, Expr)
diff --git a/test/compiler/validation.jl b/test/compiler/validation.jl
index c25aae71ab157..5fd074fee73ae 100644
--- a/test/compiler/validation.jl
+++ b/test/compiler/validation.jl
@@ -22,10 +22,9 @@ msig = Tuple{typeof(f22938),Int,Int,Int,Int}
 world = Base.get_world_counter()
 match = only(Base._methods_by_ftype(msig, -1, world))
 mi = Core.Compiler.specialize_method(match)
-c0 = Core.Compiler.retrieve_code_info(mi)
+c0 = Core.Compiler.retrieve_code_info(mi, world)
 
-@test isempty(Core.Compiler.validate_code(mi))
-@test isempty(Core.Compiler.validate_code(c0))
+@test isempty(Core.Compiler.validate_code(mi, c0))
 
 @testset "INVALID_EXPR_HEAD" begin
     c = copy(c0)
@@ -116,7 +115,7 @@ end
 @testset "SIGNATURE_NARGS_MISMATCH" begin
     old_sig = mi.def.sig
     mi.def.sig = Tuple{1,2}
-    errors = Core.Compiler.validate_code(mi)
+    errors = Core.Compiler.validate_code(mi, nothing)
     mi.def.sig = old_sig
     @test length(errors) == 1
     @test errors[1].kind === Core.Compiler.SIGNATURE_NARGS_MISMATCH
@@ -132,7 +131,7 @@ end
 
 @testset "SLOTNAMES_NARGS_MISMATCH" begin
     mi.def.nargs += 20
-    errors = Core.Compiler.validate_code(mi)
+    errors = Core.Compiler.validate_code(mi, c0)
     mi.def.nargs -= 20
     @test length(errors) == 2
     @test count(e.kind === Core.Compiler.SLOTNAMES_NARGS_MISMATCH for e in errors) == 1
diff --git a/test/complex.jl b/test/complex.jl
index 40b45870feafc..2b87655f1ebe0 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -44,7 +44,12 @@ end
     @testset for T in (Float16, Float32, Float64, BigFloat)
         t = true
         f = false
-
+        @testset "equality" begin
+            @test isequal(T(0.0)*im, T(0.0))
+            @test !isequal(T(0.0)*im, T(-0.0))
+            @test isequal(Complex(T(-0.0), T(0.0)), T(-0.0))
+            @test !isequal(T(-0.0)*im, T(-0.0))
+        end
         @testset "add and subtract" begin
             @test isequal(T(+0.0) + im, Complex(T(+0.0), T(+1.0)))
             @test isequal(T(-0.0) + im, Complex(T(-0.0), T(+1.0)))
diff --git a/test/core.jl b/test/core.jl
index a6926860ed8db..f71baa843d25f 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -15,7 +15,7 @@ include("testenv.jl")
 for (T, c) in (
         (Core.CodeInfo, []),
         (Core.CodeInstance, [:def, :rettype, :rettype_const, :ipo_purity_bits, :argescapes]),
-        (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :pure, :is_for_opaque_closure, :constprop=#]),
+        (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :is_for_opaque_closure, :constprop=#]),
         (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals=#]),
         (Core.MethodTable, [:module]),
         (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :min_world, :max_world, :func, :isleafsig, :issimplesig, :va]),
@@ -275,6 +275,30 @@ let mi = T26321{3,NTuple{3,Int}}((1,2,3)), mf = T26321{3,NTuple{3,Float64}}((1.0
     @test a isa Vector{<:T26321{3}}
 end
 
+@test Base.return_types() do
+    typejoin(Int, UInt)
+end  |> only == Type{typejoin(Int, UInt)}
+@test Base.return_types() do
+    typejoin(Int, UInt, Float64)
+end  |> only == Type{typejoin(Int, UInt, Float64)}
+
+let res = @test_throws TypeError let
+        Base.Experimental.@force_compile
+        typejoin(1, 2)
+        nothing
+    end
+    err = res.value
+    @test err.func === :<:
+end
+let res = @test_throws TypeError let
+        Base.Experimental.@force_compile
+        typejoin(1, 2, 3)
+        nothing
+    end
+    err = res.value
+    @test err.func === :<:
+end
+
 # promote_typejoin returns a Union only with Nothing/Missing combined with concrete types
 for T in (Nothing, Missing)
     @test Base.promote_typejoin(Int, Float64) === Real
@@ -1670,7 +1694,9 @@ end
 
 # issue #3221
 let x = fill(nothing, 1)
-    @test_throws MethodError x[1] = 1
+    @test_throws ErrorException("cannot convert a value to nothing for assignment") x[1] = 1
+    x = Vector{Union{}}(undef, 1)
+    @test_throws ArgumentError("cannot convert a value to Union{} for assignment") x[1] = 1
 end
 
 # issue #3220
@@ -3859,7 +3885,8 @@ PossiblyInvalidUnion{T} = Union{T,Int}
 # issue #13007
 call13007(::Type{Array{T,N}}) where {T,N} = 0
 call13007(::Type{Array}) = 1
-@test length(Base._methods(call13007, Tuple{Type{x} where x<:Array}, 4, typemax(UInt))) == 2
+@test Base._methods(call13007, Tuple{Type{x} where x<:Array}, 4, typemax(UInt)) === nothing
+@test length(Base._methods(call13007, Tuple{Type{x} where x<:Array}, 4, Base.get_world_counter())) == 2
 
 # detecting cycles during type intersection, e.g. #1631
 cycle_in_solve_tvar_constraints(::Type{Some{S}}, x::S) where {S} = 0
@@ -4891,7 +4918,7 @@ struct f47209
     x::Int
     f47209()::Nothing = new(1)
 end
-@test_throws MethodError f47209()
+@test_throws ErrorException("cannot convert a value to nothing for assignment") f47209()
 
 # issue #12096
 let a = Val{Val{TypeVar(:_, Int)}},
@@ -5243,10 +5270,10 @@ end
 GC.enable(true)
 
 # issue #18710
-bad_tvars() where {T} = 1
+@test_warn "declares type variable T but does not use it" @eval bad_tvars() where {T} = 1
 @test isa(which(bad_tvars, ()), Method)
 @test bad_tvars() === 1
-bad_tvars2() where {T} = T
+@test_warn "declares type variable T but does not use it" @eval bad_tvars2() where {T} = T
 @test_throws UndefVarError(:T) bad_tvars2()
 missing_tvar(::T...) where {T} = T
 @test_throws UndefVarError(:T) missing_tvar()
@@ -7302,11 +7329,11 @@ struct sparse_t31649
 end
 Base.convert(::Any, v::sparse_t31649) = copy(v.val)
 let spvec = sparse_t31649(zeros(Float64,5), Vector{Int64}())
-    @test_throws MethodError repr(spvec)
+    @test_throws MethodError convert(Any, spvec)
     # Try manually putting the problematic method into the cache (in
     # the original issue compiling the showerror method caused this to happen)
     @test convert(Any, nothing) === nothing
-    @test_throws MethodError repr(spvec)
+    @test_throws MethodError convert(Any, spvec)
 end
 
 # Issue #31062 - Accidental recursion in jl_has_concrete_subtype
@@ -7346,16 +7373,20 @@ end
 let code = code_lowered(FieldConvert)[1].code
     @test code[1] == Expr(:call, GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA), Expr(:static_parameter, 1))
     @test code[2] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 1)
-    @test code[3] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(2), Core.SlotNumber(2))
-    @test code[4] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 2)
-    @test code[5] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(4), Core.SlotNumber(3))
-    @test code[6] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 4)
-    @test code[7] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(6), Core.SlotNumber(5))
-    @test code[8] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 5)
-    @test code[9] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(8), Core.SlotNumber(6))
-    @test code[10] == Expr(:new, Core.SSAValue(1), Core.SSAValue(3), Core.SSAValue(5), Core.SlotNumber(4), Core.SSAValue(7), Core.SSAValue(9))
-    @test code[11] == Core.ReturnNode(Core.SSAValue(10))
- end
+    @test code[7] == Expr(:(=), Core.SlotNumber(10), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(2), Core.SlotNumber(10)))
+    @test code[8] == Core.SlotNumber(10)
+    @test code[9] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 2)
+    @test code[14] == Expr(:(=), Core.SlotNumber(9), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(9), Core.SlotNumber(9)))
+    @test code[15] == Core.SlotNumber(9)
+    @test code[16] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 4)
+    @test code[21] == Expr(:(=), Core.SlotNumber(8), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(16), Core.SlotNumber(8)))
+    @test code[22] == Core.SlotNumber(8)
+    @test code[23] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 5)
+    @test code[28] == Expr(:(=), Core.SlotNumber(7), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(23), Core.SlotNumber(7)))
+    @test code[29] == Core.SlotNumber(7)
+    @test code[30] == Expr(:new, Core.SSAValue(1), Core.SSAValue(8), Core.SSAValue(15), Core.SlotNumber(4), Core.SSAValue(22), Core.SSAValue(29))
+    @test code[31] == Core.ReturnNode(Core.SSAValue(30))
+end
 
 # Issue #32820
 function f32820(refs)
@@ -7847,6 +7878,17 @@ import .Foo45350: x45350
 f45350() = (global x45350 = 2)
 @test_throws ErrorException f45350()
 
+@testset "Error behavior of unsafe_convert for RefValue" begin
+    b = Base.RefValue{Int}()
+    @test Base.unsafe_convert(Ptr{Int}, b) !== C_NULL
+    b = Base.RefValue{Base.RefValue{Int}}()
+    # throws because we hit `b.x`
+    @test_throws Core.UndefRefError Base.unsafe_convert(Ptr{Base.RefValue{Int}}, b)
+    # throws because we hit `b.x`
+    b = Base.RefValue{Integer}()
+    @test_throws Core.UndefRefError Base.unsafe_convert(Ptr{Integer}, b)
+end
+
 # #46503 - redefine `invoke`d methods
 foo46503(@nospecialize(a), b::Union{Vector{Any}, Float64, Nothing}) = rand()
 foo46503(a::Int, b::Nothing) = @invoke foo46503(a::Any, b)
@@ -7933,8 +7975,38 @@ vect47476(::Type{T}) where {T} = T
 g47476(::Union{Nothing,Int,Val{T}}...) where {T} = T
 @test_throws UndefVarError(:T) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5)
 @test g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5, Val(6)) === 6
-let spec = only(methods(g47476)).specializations
+let spec = only(methods(g47476)).specializations::Core.SimpleVector
     @test !isempty(spec)
     @test any(mi -> mi !== nothing && Base.isvatuple(mi.specTypes), spec)
     @test all(mi -> mi === nothing || !Base.has_free_typevars(mi.specTypes), spec)
 end
+
+f48950(::Union{Int,d}, ::Union{c,Nothing}...) where {c,d} = 1
+@test f48950(1, 1, 1) == 1
+
+# Module as tparam in unionall
+struct ModTParamUnionAll{A, B}; end
+@test isa(objectid(ModTParamUnionAll{Base}), UInt)
+
+# effects for objectid
+for T in (Int, String, Symbol, Module)
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (T,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (T,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Some{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Some{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Some{Some{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Some{Some{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T,T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T,T},)))
+end
+@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Ref{Int},)))
+@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Ref{Int}},)))
+# objectid for datatypes is inconsistant for types that have unbound type parameters.
+@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (DataType,)))
+@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Vector{Int}},)))
+
+# donotdelete should not taint consistency of the containing function
+f_donotdete(x) = (Core.Compiler.donotdelete(x); 1)
+@test Core.Compiler.is_consistent(Base.infer_effects(f_donotdete, (Tuple{Float64},)))
diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl
index 4f19f9415ba29..5b465e05f0a12 100644
--- a/test/deprecation_exec.jl
+++ b/test/deprecation_exec.jl
@@ -116,8 +116,8 @@ begin # @deprecate
 
     # test that positional and keyword arguments are forwarded when
     # there is no explicit type annotation
-    @test DeprecationTests.old_return_args(1, 2, 3) == ((1, 2, 3),(;))
-    @test DeprecationTests.old_return_args(1, 2, 3; a = 4, b = 5) == ((1, 2, 3), (a = 4, b = 5))
+    @test_logs (:warn,) @test DeprecationTests.old_return_args(1, 2, 3) == ((1, 2, 3),(;))
+    @test_logs (:warn,) @test DeprecationTests.old_return_args(1, 2, 3; a = 4, b = 5) == ((1, 2, 3), (a = 4, b = 5))
 end
 
 f24658() = depwarn24658()
diff --git a/test/dict.jl b/test/dict.jl
index 65f8939bc6dfc..6a47c3c6eea8b 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -1363,3 +1363,11 @@ end
     sizehint!(d, 10)
     @test length(d.slots) < 100
 end
+
+# getindex is :effect_free and :terminates but not :consistent
+for T in (Int, Float64, String, Symbol)
+    @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index e081695f2f15d..94722b803865f 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -92,8 +92,15 @@ method_c2(x::Int32, y::Float64) = true
 method_c2(x::Int32, y::Int32, z::Int32) = true
 method_c2(x::T, y::T, z::T) where {T<:Real} = true
 
-Base.show_method_candidates(buf, Base.MethodError(method_c2,(1., 1., 2)))
-@test occursin( "\n\nClosest candidates are:\n  method_c2(!Matched::Int32, ::Float64, ::Any...)$cmod$cfile$(c2line+2)\n  method_c2(::T, ::T, !Matched::T) where T<:Real$cmod$cfile$(c2line+5)\n  method_c2(!Matched::Int32, ::Any...)$cmod$cfile$(c2line+1)\n  ...\n", String(take!(buf)))
+let s
+    Base.show_method_candidates(buf, Base.MethodError(method_c2, (1., 1., 2)))
+    s = String(take!(buf))
+    @test occursin("\n\nClosest candidates are:\n  ", s)
+    @test occursin("\n  method_c2(!Matched::Int32, ::Float64, ::Any...)$cmod$cfile$(c2line+2)\n  ", s)
+    @test occursin("\n  method_c2(::T, ::T, !Matched::T) where T<:Real$cmod$cfile$(c2line+5)\n  ", s)
+    @test occursin("\n  method_c2(!Matched::Int32, ::Any...)$cmod$cfile$(c2line+1)\n  ", s)
+    @test occursin("\n  ...\n", s)
+end
 
 c3line = @__LINE__() + 1
 method_c3(x::Float64, y::Float64) = true
@@ -933,3 +940,66 @@ let err_str
     err_str = @except_str "a" + "b" MethodError
     @test occursin("String concatenation is performed with *", err_str)
 end
+
+@testset "unused argument names" begin
+    g(::Int) = backtrace()
+    bt = g(1)
+    @test !contains(sprint(Base.show_backtrace, bt), "#unused#")
+end
+
+# issue #49002
+let buf = IOBuffer()
+    Base.show_method_candidates(buf, Base.MethodError(typeof, (17,)), pairs((foo = :bar,)))
+    @test isempty(take!(buf))
+    Base.show_method_candidates(buf, Base.MethodError(isa, ()), pairs((a = 5,)))
+    @test isempty(take!(buf))
+end
+
+f_internal_wrap(g, a; kw...) = error();
+@inline f_internal_wrap(a; kw...) = f_internal_wrap(identity, a; kw...);
+bt = try
+    f_internal_wrap(1)
+catch
+    catch_backtrace()
+end
+@test !occursin("#f_internal_wrap#", sprint(Base.show_backtrace, bt))
+
+g_collapse_pos(x, y=1.0, z=2.0) = error()
+bt = try
+    g_collapse_pos(1.0)
+catch
+    catch_backtrace()
+end
+bt_str = sprint(Base.show_backtrace, bt)
+@test occursin("g_collapse_pos(x::Float64, y::Float64, z::Float64)", bt_str)
+@test !occursin("g_collapse_pos(x::Float64)", bt_str)
+
+g_collapse_kw(x; y=2.0) = error()
+bt = try
+    g_collapse_kw(1.0)
+catch
+    catch_backtrace()
+end
+bt_str = sprint(Base.show_backtrace, bt)
+@test occursin("g_collapse_kw(x::Float64; y::Float64)", bt_str)
+@test !occursin("g_collapse_kw(x::Float64)", bt_str)
+
+g_collapse_pos_kw(x, y=1.0; z=2.0) = error()
+bt = try
+    g_collapse_pos_kw(1.0)
+catch
+    catch_backtrace()
+end
+bt_str = sprint(Base.show_backtrace, bt)
+@test occursin("g_collapse_pos_kw(x::Float64, y::Float64; z::Float64)", bt_str)
+@test !occursin("g_collapse_pos_kw(x::Float64, y::Float64)", bt_str)
+@test !occursin("g_collapse_pos_kw(x::Float64)", bt_str)
+
+# Test Base.print_with_compare in convert MethodErrors
+struct TypeCompareError{A,B} <: Exception end
+let e = @test_throws MethodError convert(TypeCompareError{Float64,1}, TypeCompareError{Float64,2}())
+    str = sprint(Base.showerror, e.value)
+    @test  occursin("TypeCompareError{Float64,2}", str)
+    @test  occursin("TypeCompareError{Float64,1}", str)
+    @test !occursin("TypeCompareError{Float64{},2}", str) # No {...} for types without params
+end
diff --git a/test/file.jl b/test/file.jl
index 7ca49fe3a065b..1d2ac4c6f9132 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -598,6 +598,17 @@ close(s)
 # This section tests temporary file and directory creation.           #
 #######################################################################
 
+@testset "invalid read/write flags" begin
+    @test try
+        open("this file is not expected to exist", read=false, write=false)
+        false
+    catch e
+        isa(e, SystemError) || rethrow()
+        @test endswith(sprint(showerror, e), "Invalid argument")
+        true
+    end
+end
+
 @testset "quoting filenames" begin
     @test try
         open("this file is not expected to exist")
@@ -1253,7 +1264,7 @@ let f = open(file, "w")
     if Sys.iswindows()
         f = RawFD(ccall(:_open, Cint, (Cstring, Cint), file, Base.Filesystem.JL_O_RDONLY))
     else
-        f = RawFD(ccall(:open, Cint, (Cstring, Cint), file, Base.Filesystem.JL_O_RDONLY))
+        f = RawFD(ccall(:open, Cint, (Cstring, Cint, UInt32...), file, Base.Filesystem.JL_O_RDONLY))
     end
     test_LibcFILE(Libc.FILE(f, Libc.modestr(true, false)))
 end
diff --git a/test/functional.jl b/test/functional.jl
index c9b0b270baeb7..19355d13ff335 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -139,6 +139,13 @@ end
 @test findall(!iszero, x^2 for x in -1:0.5:1) == [1, 2, 4, 5]
 @test argmin(x^2 for x in -1:0.5:1) == 3
 
+# findall return type, see #45495
+let gen = (i for i in 1:3);
+    @test @inferred(findall(x -> true, gen))::Vector{Int} == [1, 2, 3]
+    @test @inferred(findall(x -> false, gen))::Vector{Int} == Int[]
+    @test @inferred(findall(x -> x < 0, gen))::Vector{Int} == Int[]
+end
+
 # inference on vararg generator of a type (see #22907 comments)
 let f(x) = collect(Base.Generator(=>, x, x))
     @test @inferred(f((1,2))) == [1=>1, 2=>2]
diff --git a/test/gc.jl b/test/gc.jl
new file mode 100644
index 0000000000000..ecf71fe51f6ad
--- /dev/null
+++ b/test/gc.jl
@@ -0,0 +1,25 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+function run_gctest(file)
+    let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file`
+        @testset for test_nthreads in (1, 2, 4)
+            new_env = copy(ENV)
+            new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
+            new_env["JULIA_NUM_GC_THREADS"] = string(test_nthreads)
+            @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+        end
+    end
+end
+
+# !!! note:
+#     Since we run our tests on 32bit OS as well we confine ourselves
+#     to parameters that allocate about 512MB of objects. Max RSS is lower
+#     than that.
+@testset "GC threads" begin
+    run_gctest("gc/binarytree.jl")
+    run_gctest("gc/linkedlist.jl")
+    run_gctest("gc/objarray.jl")
+    run_gctest("gc/chunks.jl")
+end
diff --git a/test/gc/binarytree.jl b/test/gc/binarytree.jl
new file mode 100644
index 0000000000000..896f47fa4c9c7
--- /dev/null
+++ b/test/gc/binarytree.jl
@@ -0,0 +1,54 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module BinaryTreeMutable
+
+# Adopted from
+# https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees
+
+using Base.Threads
+using Printf
+
+mutable struct Node
+    l::Union{Nothing, Node}
+    r::Union{Nothing, Node}
+end
+
+function make(n::Int)
+    return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1))
+end
+
+function check(node::Node)
+    return  1 + (node.l === nothing ? 0 : check(node.l) + check(node.r))
+end
+
+function binary_trees(io, n::Int)
+    @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1))
+
+    long_tree = make(n)
+    minDepth = 4
+    resultSize = div((n - minDepth), 2) + 1
+    results = Vector{String}(undef, resultSize)
+    Threads.@threads for depth in minDepth:2:n
+        c = 0
+        niter = 1 << (n - depth + minDepth)
+        for _ in 1:niter
+            c += check(make(depth))
+        end
+        index = div((depth - minDepth),2) + 1
+        results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c
+    end
+
+    for i in results
+        write(io, i)
+    end
+
+    @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree)
+end
+
+end #module
+
+using .BinaryTreeMutable
+
+# Memory usage is 466MB
+BinaryTreeMutable.binary_trees(devnull, 16)
+GC.gc()
diff --git a/test/gc/chunks.jl b/test/gc/chunks.jl
new file mode 100644
index 0000000000000..08af59ecbf973
--- /dev/null
+++ b/test/gc/chunks.jl
@@ -0,0 +1,17 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# MWE from https://github.com/JuliaLang/julia/issues/49501
+N = 1_000_000  # or larger
+T = BigFloat
+
+struct Q{T}
+    a::T
+    b::T
+end
+
+# Memoy use is ~512MB
+let
+    A = [Q(rand(T), rand(T)) for _ in 1:N]
+end
+
+GC.gc()
diff --git a/test/gc/linkedlist.jl b/test/gc/linkedlist.jl
new file mode 100644
index 0000000000000..669e5f8ec21d9
--- /dev/null
+++ b/test/gc/linkedlist.jl
@@ -0,0 +1,23 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+mutable struct ListNode
+  key::Int64
+  next::ListNode
+  ListNode() = new()
+  ListNode(x)= new(x)
+  ListNode(x,y) = new(x,y);
+end
+
+function list(N=16*1024^2)
+    start::ListNode = ListNode(1)
+    current::ListNode = start
+    for i = 2:N
+        current = ListNode(i,current)
+    end
+    return current.key
+end
+
+# Memory use is 512 MB
+_ = list()
+
+GC.gc()
diff --git a/test/gc/objarray.jl b/test/gc/objarray.jl
new file mode 100644
index 0000000000000..d36fcedef71a4
--- /dev/null
+++ b/test/gc/objarray.jl
@@ -0,0 +1,36 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Random: seed!
+seed!(1)
+
+abstract type Cell end
+
+struct CellA<:Cell
+    a::Ref{Int}
+end
+
+struct CellB<:Cell
+    b::String
+end
+
+function fillcells!(mc::Array{Cell})
+    for ind in eachindex(mc)
+        mc[ind] = ifelse(rand() > 0.5, CellA(ind), CellB(string(ind)))
+    end
+    return mc
+end
+
+function work(size)
+    mcells = Array{Cell}(undef, size, size)
+    fillcells!(mcells)
+end
+
+function run(maxsize)
+    Threads.@threads for i in 1:maxsize
+        work(i*375)
+    end
+end
+
+# Memory usage 581 MB
+run(4)
+GC.gc()
diff --git a/test/gmp.jl b/test/gmp.jl
index be11c70e5064f..8f6be13c38054 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -336,11 +336,13 @@ end
 @testset "digits" begin
     n = Int64(2080310129088201558)
     N = big(n)
-    for base in (2,7,10,11,16,30,50,62,64,100), pad in (0,1,10,100)
-        @test digits(n; base, pad) == digits(N; base, pad)
+    for base in (2,7,10,11,16,30,50,62,64,100,128), pad in (0,1,10,100)
+        @test digits(n; base, pad) == digits(N; base, pad) == digits(UInt8, N; base, pad)
         @test digits(-n; base, pad) == digits(-N; base, pad)
         @test digits!(Vector{Int}(undef, pad), n; base) == digits!(Vector{Int}(undef, pad), N; base)
     end
+    @test digits(UInt8, n; base=1<<8) == digits(UInt8, N; base=1<<8)
+    @test digits(UInt16, n; base=1<<16) == digits(UInt16, N; base=1<<16)
 end
 
 # serialization (#5133)
diff --git a/test/hashing.jl b/test/hashing.jl
index 9bd076554962f..943109924f280 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -60,6 +60,9 @@ end
 @test hash(nextfloat(2.0^63)) == hash(UInt64(nextfloat(2.0^63)))
 @test hash(prevfloat(2.0^64)) == hash(UInt64(prevfloat(2.0^64)))
 
+# issue #48744
+@test hash(typemin(Int)//1) === hash(big(typemin(Int)//1))
+
 # issue #9264
 @test hash(1//6,zero(UInt)) == invoke(hash, Tuple{Real, UInt}, 1//6, zero(UInt))
 @test hash(1//6) == hash(big(1)//big(6))
@@ -201,9 +204,9 @@ let a = QuoteNode(1), b = QuoteNode(1.0)
     @test (hash(a)==hash(b)) == (a==b)
 end
 
-let a = Expr(:block, Core.TypedSlot(1, Any)),
-    b = Expr(:block, Core.TypedSlot(1, Any)),
-    c = Expr(:block, Core.TypedSlot(3, Any))
+let a = Expr(:block, Core.SlotNumber(1)),
+    b = Expr(:block, Core.SlotNumber(1)),
+    c = Expr(:block, Core.SlotNumber(3))
     @test a == b && hash(a) == hash(b)
     @test a != c && hash(a) != hash(c)
     @test b != c && hash(b) != hash(c)
@@ -284,3 +287,18 @@ end
         end
     end
 end
+
+if Sys.WORD_SIZE >= 64
+    @testset "very large string" begin
+        N = 2^31+1
+        s = String('\0'^N);
+        objectid(s)
+    end
+end
+
+# Issue #49620
+let t1 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}},
+    t2 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}}
+    @test hash(t1) == hash(t2)
+    @test length(Set{Type}([t1, t2])) == 1
+end
diff --git a/test/int.jl b/test/int.jl
index 3bfa6adc99301..f79bc5a9781d0 100644
--- a/test/int.jl
+++ b/test/int.jl
@@ -202,7 +202,7 @@ end
         for T2 in Base.BitInteger_types
             for op in (>>, <<, >>>)
                 if sizeof(T2)==sizeof(Int) || T <: Signed || (op==>>>) || T2 <: Unsigned
-                    @test Core.Compiler.is_total(Base.infer_effects(op, (T, T2)))
+                    @test Core.Compiler.is_foldable_nothrow(Base.infer_effects(op, (T, T2)))
                 else
                     @test Core.Compiler.is_foldable(Base.infer_effects(op, (T, T2)))
                     # #47835, TODO implement interval arithmetic analysis
@@ -300,6 +300,29 @@ end
     end
 end
 
+@testset "typemin typemax" begin
+    @test typemin(Int8   ) === Int8(-128)
+    @test typemax(Int8   ) === Int8(127)
+    @test typemin(UInt8  ) === UInt8(0)
+    @test typemax(UInt8  ) === UInt8(255)
+    @test typemin(Int16  ) === Int16(-32768)
+    @test typemax(Int16  ) === Int16(32767)
+    @test typemin(UInt16 ) === UInt16(0)
+    @test typemax(UInt16 ) === UInt16(65535)
+    @test typemin(Int32  ) === Int32(-2147483648)
+    @test typemax(Int32  ) === Int32(2147483647)
+    @test typemin(UInt32 ) === UInt32(0)
+    @test typemax(UInt32 ) === UInt32(4294967295)
+    @test typemin(Int64  ) === Int64(-9223372036854775808)
+    @test typemax(Int64  ) === Int64(9223372036854775807)
+    @test typemin(UInt64 ) === UInt64(0)
+    @test typemax(UInt64 ) === UInt64(0xffff_ffff_ffff_ffff)
+    @test typemin(UInt128) === UInt128(0)
+    @test typemax(UInt128) === UInt128(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff)
+    @test typemin(Int128 ) === Int128(-170141183460469231731687303715884105728)
+    @test typemax(Int128 ) === Int128(170141183460469231731687303715884105727)
+end
+
 @testset "issue #15489" begin
     @test 0x00007ffea27edaa0 + (-40) === (-40) + 0x00007ffea27edaa0 === 0x00007ffea27eda78
     @test UInt64(1) * Int64(-1) === typemax(UInt64)
@@ -422,30 +445,6 @@ end
     @test bitreverse(Int32(456618293)) === Int32(-1399919400)
 end
 
-@testset "min/max of datatype" begin
-    @test typemin(Int8) === Int8(-128)
-    @test typemin(UInt8) === UInt8(0)
-    @test typemin(Int16) === Int16(-32768)
-    @test typemin(UInt16) === UInt16(0)
-    @test typemin(Int32) === Int32(-2147483648)
-    @test typemin(UInt32) === UInt32(0)
-    @test typemin(Int64) === Int64(-9223372036854775808)
-    @test typemin(UInt64) === UInt64(0)
-    @test typemin(Int128) === Int128(-170141183460469231731687303715884105728)
-    @test typemin(UInt128) === UInt128(0)
-
-    @test typemax(Int8) === Int8(127)
-    @test typemax(UInt8) === UInt8(255)
-    @test typemax(Int16) === Int16(32767)
-    @test typemax(UInt16) === UInt16(65535)
-    @test typemax(Int32) === Int32(2147483647)
-    @test typemax(UInt32) === UInt32(4294967295)
-    @test typemax(Int64) === Int64(9223372036854775807)
-    @test typemax(UInt64) === UInt64(0xffffffffffffffff)
-    @test typemax(Int128) === Int128(170141183460469231731687303715884105727)
-    @test typemax(UInt128) === UInt128(0xffffffffffffffffffffffffffffffff)
-end
-
 @testset "BitIntegerType" begin
     @test Int isa Base.BitIntegerType
     @test Base.BitIntegerType === Union{
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index 2215cbaf36a56..ceaac235a3da9 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -273,6 +273,8 @@ end
 
     @test powermod(2, big(3), 5) == 3
     @test powermod(2, big(3), -5) == -2
+    @inferred  powermod(2, -2, -5)
+    @inferred  powermod(big(2), -2, UInt(5))
 end
 
 @testset "nextpow/prevpow" begin
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index dec4412ffd4d5..aa2a9649857c4 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -148,7 +148,6 @@ end
     @test_intrinsic Core.Intrinsics.sub_float Float16(3.3) Float16(2) Float16(1.301)
     @test_intrinsic Core.Intrinsics.mul_float Float16(3.3) Float16(2) Float16(6.6)
     @test_intrinsic Core.Intrinsics.div_float Float16(3.3) Float16(2) Float16(1.65)
-    @test_intrinsic Core.Intrinsics.rem_float Float16(3.3) Float16(2) Float16(1.301)
 
     # ternary
     @test_intrinsic Core.Intrinsics.fma_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02)
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index d8211aa7086b3..ec77903b4a5b8 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -348,3 +348,12 @@ end
 @testset "bytesavailable devnull" begin
     @test bytesavailable(devnull) == 0
 end
+
+@testset "#48188 read_sub for non Array AbstractArray" begin
+    a = [0,0,0]
+    v = @view a[1:2]
+    io = IOBuffer()
+    write(io,1)
+    seek(io,0)
+    @test Base.read_sub(io,v,1,1) == [1,0]
+end
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index a5116afa3c31d..0aed0544b7e2e 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -387,3 +387,10 @@ f41416(a...="a"; b=true) = (b, a)
 @test f41416(;b=false)   === (false, ("a",))
 @test f41416(33)         === (true, (33,))
 @test f41416(3; b=false) === (false, (3,))
+
+Core.kwcall(i::Int) = "hi $i"
+let m = first(methods(Core.kwcall, (NamedTuple,typeof(kwf1),Vararg)))
+    @test m.name === :kwf1
+    @test Core.kwcall(1) == "hi 1"
+    @test which(Core.kwcall, (Int,)).name === :kwcall
+end
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index a89696ed9c6c2..f7f6b44b29e62 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -264,5 +264,7 @@ MyStruct(kern) = MyStruct(kern, reinterpret(Core.LLVMPtr{UInt8,1}, 0))
 MyStruct() = MyStruct(0)
 s = MyStruct()
 
+# ensure LLVMPtr properly subtypes
+@test eltype(supertype(Core.LLVMPtr{UInt8,1})) <: UInt8
 @test s.kern == 0
 @test reinterpret(Int, s.ptr) == 0
diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.jl
index c5a5b2be86614..e7ddf12d79bc7 100644
--- a/test/llvmpasses/alloc-opt-gcframe.jl
+++ b/test/llvmpasses/alloc-opt-gcframe.jl
@@ -14,7 +14,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 # CHECK-LABEL: @return_obj
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
 # CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
 # CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
 # CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
diff --git a/test/llvmpasses/alloc-opt-pass.jl b/test/llvmpasses/alloc-opt-pass.ll
similarity index 55%
rename from test/llvmpasses/alloc-opt-pass.jl
rename to test/llvmpasses/alloc-opt-pass.ll
index 7ea9b6eff3ecb..4ce152669246f 100644
--- a/test/llvmpasses/alloc-opt-pass.jl
+++ b/test/llvmpasses/alloc-opt-pass.ll
@@ -1,29 +1,24 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
+; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S - | FileCheck %s
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S - | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s
 
-isz = sizeof(UInt) == 8 ? "i64" : "i32"
-
-println("""
 @tag = external addrspace(10) global {}
-""")
 
-# Test that the gc_preserve intrinsics are deleted directly.
+; Test that the gc_preserve intrinsics are deleted directly.
 
-# CHECK-LABEL: @preserve_branches
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NOT: @llvm.julia.gc_preserve_begin
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
+; CHECK-LABEL: @preserve_branches
+; CHECK: call {}*** @julia.ptls_states()
+; CHECK: L1:
+; CHECK-NOT: @llvm.julia.gc_preserve_begin
+; CHECK-NEXT: @external_function()
+; CHECK-NEXT: br i1 %b2, label %L2, label %L3
 
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
+; CHECK: L2:
+; CHECK: @external_function()
+; CHECK-NEXT: br label %L3
 
-# CHECK: L3:
-println("""
+; CHECK: L3:
 define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
@@ -31,7 +26,7 @@ define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
   br i1 %b, label %L1, label %L3
 
 L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
   %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %v)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
@@ -43,22 +38,20 @@ L2:
 L3:
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @preserve_branches2
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
-
-# CHECK: L3:
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @preserve_branches2
+; CHECK: call {}*** @julia.ptls_states()
+; CHECK: L1:
+; CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
+; CHECK-NEXT: @external_function()
+; CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+; CHECK: L2:
+; CHECK: @external_function()
+; CHECK-NEXT: br label %L3
+
+; CHECK: L3:
 define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
@@ -67,7 +60,7 @@ define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
   br i1 %b, label %L1, label %L3
 
 L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
   %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* nonnull %v2)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
@@ -79,57 +72,50 @@ L2:
 L3:
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @legal_int_types
-# CHECK: alloca [12 x i8]
-# CHECK-NOT: alloca i96
-# CHECK: ret void
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @legal_int_types
+; CHECK: alloca [12 x i8]
+; CHECK-NOT: alloca i96
+; CHECK: ret void
 define void @legal_int_types() {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 12, {} addrspace(10)* @tag)
+  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 12, {} addrspace(10)* @tag)
   %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
   %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
+; CHECK-LABEL: }{{$}}
 
 
-println("""
 declare void @external_function()
 declare {} addrspace(10)* @external_function2()
 declare {}*** @julia.ptls_states()
 declare {}*** @julia.get_pgcstack()
-declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
+declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
 declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
 declare token @llvm.julia.gc_preserve_begin(...)
 declare void @llvm.julia.gc_preserve_end(token)
-""")
-
-# CHECK-LABEL: @memref_collision
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: store {}
-# CHECK: store i
-# CHECK-NOT: store {}
-# CHECK: L1:
-# CHECK: load {}
-# CHECK: L2:
-# CHECK: load i
-println("""
-define void @memref_collision($isz %x) {
+
+; CHECK-LABEL: @memref_collision
+; CHECK: call {}*** @julia.ptls_states()
+; CHECK-NOT: store {}
+; CHECK: store i
+; CHECK-NOT: store {}
+; CHECK: L1:
+; CHECK: load {}
+; CHECK: L2:
+; CHECK: load i
+define void @memref_collision(i64 %x) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %v_p = bitcast {} addrspace(10)* %v to $isz addrspace(10)*
-  store $isz %x, $isz addrspace(10)* %v_p
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+  %v_p = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+  store i64 %x, i64 addrspace(10)* %v_p
   br i1 0, label %L1, label %L2
 
 L1:
@@ -138,9 +124,8 @@ L1:
   ret void
 
 L2:
-  %v2 = bitcast {} addrspace(10)* %v to $isz addrspace(10)*
+  %v2 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
   %v2_x = load i64, i64 addrspace(10)* %v2
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
+; CHECK-LABEL: }{{$}}
diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll
index f7ea31fde6b05..8a21091ce558c 100644
--- a/test/llvmpasses/alloc-opt-unsized.ll
+++ b/test/llvmpasses/alloc-opt-unsized.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s
 
 source_filename = "text"
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
index ccb8cc69f0f66..1a04db5749b39 100644
--- a/test/llvmpasses/cpu-features.ll
+++ b/test/llvmpasses/cpu-features.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll
index 54fc19566ff32..61e9e33875078 100644
--- a/test/llvmpasses/final-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/final-lower-gc-addrspaces.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index 95e88f9feac9e..6f1be3d240ae4 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s
 
@@ -67,6 +69,21 @@ top:
   ret {} addrspace(10)* %v
 }
 
+define {} addrspace(10)* @gc_alloc_lowering_var(i64 %size) {
+top:
+; CHECK-LABEL: @gc_alloc_lowering_var
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+; CHECK: %0 = add i64 %size, 8
+; CHECK: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null)
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size)
+  %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+  %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
+  store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
+  ret {} addrspace(10)* %v
+}
+
 !0 = !{!1, !1, i64 0}
 !1 = !{!"jtbaa_gcframe", !2, i64 0}
 !2 = !{!"jtbaa"}
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
index 14bae9ff8a8f1..668c6ff3dd261 100644
--- a/test/llvmpasses/float16.ll
+++ b/test/llvmpasses/float16.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0  -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
 
-define half @demotehalf_test(half %a, half %b) {
+define half @demotehalf_test(half %a, half %b) #0 {
+top:
 ; CHECK-LABEL: @demotehalf_test(
 ; CHECK-NEXT:  top:
 ; CHECK-NEXT:    %0 = fpext half %a to float
@@ -44,6 +45,42 @@ define half @demotehalf_test(half %a, half %b) {
 ; CHECK-NEXT:    %36 = fadd float %34, %35
 ; CHECK-NEXT:    %37 = fptrunc float %36 to half
 ; CHECK-NEXT:    ret half %37
+;
+  %0 = fadd half %a, %b
+  %1 = fadd half %0, %b
+  %2 = fadd half %1, %b
+  %3 = fmul half %2, %b
+  %4 = fdiv half %3, %b
+  %5 = insertelement <2 x half> undef, half %a, i32 0
+  %6 = insertelement <2 x half> %5, half %b, i32 1
+  %7 = insertelement <2 x half> undef, half %b, i32 0
+  %8 = insertelement <2 x half> %7, half %b, i32 1
+  %9 = fadd <2 x half> %6, %8
+  %10 = extractelement <2 x half> %9, i32 0
+  %11 = extractelement <2 x half> %9, i32 1
+  %12 = fadd half %10, %11
+  %13 = fadd half %12, %4
+  ret half %13
+}
+
+define half @native_half_test(half %a, half %b) #1 {
+; CHECK-LABEL: @native_half_test(
+; CHECK-NEXT   top:
+; CHECK-NEXT     %0 = fadd half %a, %b
+; CHECK-NEXT     %1 = fadd half %0, %b
+; CHECK-NEXT     %2 = fadd half %1, %b
+; CHECK-NEXT     %3 = fmul half %2, %b
+; CHECK-NEXT     %4 = fdiv half %3, %b
+; CHECK-NEXT     %5 = insertelement <2 x half> undef, half %a, i32 0
+; CHECK-NEXT     %6 = insertelement <2 x half> %5, half %b, i32 1
+; CHECK-NEXT     %7 = insertelement <2 x half> undef, half %b, i32 0
+; CHECK-NEXT     %8 = insertelement <2 x half> %7, half %b, i32 1
+; CHECK-NEXT     %9 = fadd <2 x half> %6, %8
+; CHECK-NEXT     %10 = extractelement <2 x half> %9, i32 0
+; CHECK-NEXT     %11 = extractelement <2 x half> %9, i32 1
+; CHECK-NEXT     %12 = fadd half %10, %11
+; CHECK-NEXT     %13 = fadd half %12, %4
+; CHECK-NEXT     ret half %13
 ;
 top:
   %0 = fadd half %a, %b
@@ -62,3 +99,6 @@ top:
   %13 = fadd half %12, %4
   ret half %13
 }
+
+attributes #0 = { "target-features"="-avx512fp16" }
+attributes #1 = { "target-features"="+avx512fp16" }
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index 84f120712734b..eefd847bf68fa 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll
new file mode 100644
index 0000000000000..250ad620b05e6
--- /dev/null
+++ b/test/llvmpasses/julia-licm-fail.ll
@@ -0,0 +1,96 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s
+
+; COM: This file contains functions that should not trigger allocations to be hoisted out of loops
+
+@tag = external addrspace(10) global {}, align 16
+
+; COM: Tests that an escape in a loop prevents hoisting of the allocation
+; CHECK-LABEL: @julia_escape_alloc
+define void @julia_escape_alloc(i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
+  %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+; COM: Tests that addrescape in a loop prevents hoisting of the allocation
+; CHECK-LABEL: @julia_addrescape_alloc
+define void @julia_addrescape_alloc(i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+  %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+; CHECK-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
+  %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+declare void @julia.write_barrier({}*, ...)
+
+declare {}*** @julia.get_pgcstack()
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #1
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: inaccessiblemem_or_argmemonly
+declare void @ijl_gc_queue_root({} addrspace(10)*) #3
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
+
+; COM: escape to make it easy to find
+declare nonnull {} addrspace(10)* @escape({} addrspace(10)*)
+
+; COM: addrescape function
+declare nonnull {}* @julia.pointer_from_objref({} addrspace(11)*)
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { allocsize(1) }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+attributes #3 = { inaccessiblemem_or_argmemonly }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll
new file mode 100644
index 0000000000000..977b8e2a787f9
--- /dev/null
+++ b/test/llvmpasses/julia-licm-missed.ll
@@ -0,0 +1,109 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s
+
+; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops
+; COM: i.e. they are missed optimizations
+; COM: Better optimization could potentially enable allocations to be hoisted out of these loops
+
+@tag = external addrspace(10) global {}, align 16
+
+; COM: Currently we don't hoist allocations that have references stored into them out of loops
+; COM: This is because we need to insert write barriers for the stores when the storee does not
+; COM: dominate the allocation after it has been moved out of the loop
+; CHECK-LABEL: @julia_refstore
+define void @julia_refstore({} addrspace(10)* %obj, i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+  %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+; CHECK-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
+  %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
+; CHECK-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
+  store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+; COM: Currently our LLVM-level escape analysis doesn't handle phi nodes at all
+; COM: so this allocation is counted as 'escaping' despite the fact that it's
+; COM: clearly dead
+; CHECK-LABEL: @julia_phi
+define void @julia_phi({} addrspace(10)* %obj, i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  br label %other
+; CHECK: other:
+other:
+; CHECK-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ]
+  %phi = phi {} addrspace(10)* [ %alloc, %loop ]
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+
+
+declare void @julia.write_barrier({}*, ...)
+
+declare {}*** @julia.get_pgcstack()
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #1
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: inaccessiblemem_or_argmemonly
+declare void @ijl_gc_queue_root({} addrspace(10)*) #3
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
+
+; COM: escape to make it easy to find
+declare nonnull {} addrspace(10)* @escape({} addrspace(10)*)
+
+; COM: addrescape function
+declare nonnull {}* @julia.pointer_from_objref({} addrspace(11)*)
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { allocsize(1) }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+attributes #3 = { inaccessiblemem_or_argmemonly }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
index 0c7cf9a640ef7..6fc6f85de7c26 100644
--- a/test/llvmpasses/julia-licm.ll
+++ b/test/llvmpasses/julia-licm.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s
 
@@ -7,6 +9,8 @@ declare void @julia.write_barrier({}*, ...)
 
 declare {}*** @julia.get_pgcstack()
 
+; COM: check basic allocation hoisting functionality
+; CHECK-LABEL: @julia_allocation_hoist
 define nonnull {} addrspace(10)* @julia_allocation_hoist(i64 signext %0) #0 {
 top:
   %1 = call {}*** @julia.get_pgcstack()
@@ -25,13 +29,15 @@ L4:                                               ; preds = %top
   %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12
   %current_task1 = bitcast {}*** %current_task112 to {}**
   ; CHECK: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
+  ; CHECK-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)*
+  ; CHECK-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false)
   ; CHECK-NEXT: br label %L22
   br label %L22
 
 L22:                                              ; preds = %L4, %L22
   %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; CHECK: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; CHECK-NEXT %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
+  ; CHECK: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ]
+  ; CHECK-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
   %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) #1
   %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
   store i64 %value_phi5, i64 addrspace(10)* %4, align 8, !tbaa !2
@@ -40,6 +46,28 @@ L22:                                              ; preds = %L4, %L22
   br i1 %.not, label %L3.loopexit, label %L22
 }
 
+; COM: check that we hoist the allocation out of the loop despite returning the allocation
+; CHECK-LABEL: @julia_hoist_returned
+define nonnull {} addrspace(10)* @julia_hoist_returned(i64 signext %n, i1 zeroext %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
+; CHECK-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false)
+; CHECK-NEXT: br label %loop
+  br label %loop
+loop:
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  br i1 %ret, label %return, label %loop
+return:
+  ret {} addrspace(10)* %alloc
+}
+
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #1
 
diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll
index 7497febf1e846..84a6da9f2554d 100644
--- a/test/llvmpasses/late-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/late-lower-gc-addrspaces.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
@@ -45,7 +47,7 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 ; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
 ; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
 ; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
 ; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
@@ -70,7 +72,7 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 ; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
 ; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
 ; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
 ; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 65a67c78d7810..98c472771aaf9 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
@@ -42,7 +44,7 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 ; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
 ; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
 ; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
 ; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
@@ -67,7 +69,7 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 ; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
 ; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
 ; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
 ; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl
index 412bee7015c3e..c970e07f8a125 100644
--- a/test/llvmpasses/loopinfo.jl
+++ b/test/llvmpasses/loopinfo.jl
@@ -32,7 +32,7 @@ function simdf(X)
 # LOWER: fadd fast double
 # LOWER-NOT: call void @julia.loopinfo_marker()
 # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
-# FINAL: fadd fast <{{[0-9]+}} x double>
+# FINAL: fadd fast <{{(vscale x )?}}{{[0-9]+}} x double>
     end
     acc
 end
diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll
index 9770684574034..fcc4dc0114c21 100644
--- a/test/llvmpasses/lower-handlers-addrspaces.ll
+++ b/test/llvmpasses/lower-handlers-addrspaces.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index 01bc1ae728f15..c3d51f2e94c30 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll
index 2eddb62cef3ec..f93940db392af 100644
--- a/test/llvmpasses/muladd.ll
+++ b/test/llvmpasses/muladd.ll
@@ -1,7 +1,10 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
 
 
+; CHECK-LABEL: @fast_muladd1
 define double @fast_muladd1(double %a, double %b, double %c) {
 top:
 ; CHECK: {{contract|fmuladd}}
@@ -11,6 +14,7 @@ top:
   ret double %v2
 }
 
+; CHECK-LABEL: @fast_mulsub1
 define double @fast_mulsub1(double %a, double %b, double %c) {
 top:
 ; CHECK: {{contract|fmuladd}}
@@ -20,6 +24,7 @@ top:
   ret double %v2
 }
 
+; CHECK-LABEL: @fast_mulsub_vec1
 define <2 x double> @fast_mulsub_vec1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 top:
 ; CHECK: {{contract|fmuladd}}
@@ -28,3 +33,31 @@ top:
 ; CHECK: ret <2 x double>
   ret <2 x double> %v2
 }
+
+; COM: Should not mark fmul as contract when multiple uses of fmul exist
+; CHECK-LABEL: @slow_muladd1
+define double @slow_muladd1(double %a, double %b, double %c) {
+top:
+; CHECK: %v1 = fmul double %a, %b
+  %v1 = fmul double %a, %b
+; CHECK: %v2 = fadd fast double %v1, %c
+  %v2 = fadd fast double %v1, %c
+; CHECK: %v3 = fadd fast double %v1, %b
+  %v3 = fadd fast double %v1, %b
+; CHECK: %v4 = fadd fast double %v3, %v2
+  %v4 = fadd fast double %v3, %v2
+; CHECK: ret double %v4
+  ret double %v4
+}
+
+; COM: Should not mark fadd->fadd fast as contract
+; CHECK-LABEL: @slow_addadd1
+define double @slow_addadd1(double %a, double %b, double %c) {
+top:
+; CHECK: %v1 = fadd double %a, %b
+  %v1 = fadd double %a, %b
+; CHECK: %v2 = fadd fast double %v1, %c
+  %v2 = fadd fast double %v1, %c
+; CHECK: ret double %v2
+  ret double %v2
+}
diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll
new file mode 100644
index 0000000000000..ababb4fc74b8a
--- /dev/null
+++ b/test/llvmpasses/multiversioning-annotate-only.ll
@@ -0,0 +1,219 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
+
+; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning
+; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars
+
+; COM: Copied from src/processor.h
+; COM:    JL_TARGET_VEC_CALL = 1 << 0,
+; COM:    // Clone all functions
+; COM:    JL_TARGET_CLONE_ALL = 1 << 1,
+; COM:    // Clone when there's scalar math operations that can benefit from target-specific
+; COM:    // optimizations. This includes `muladd`, `fma`, `fast`/`contract` flags.
+; COM:    JL_TARGET_CLONE_MATH = 1 << 2,
+; COM:    // Clone when the function has a loop
+; COM:    JL_TARGET_CLONE_LOOP = 1 << 3,
+; COM:    // Clone when the function uses any vectors
+; COM:    // When this is specified, the cloning pass should also record if any of the cloned functions
+; COM:    // used this in any function call (including the signature of the function itself)
+; COM:    JL_TARGET_CLONE_SIMD = 1 << 4,
+; COM:    // The CPU name is unknown
+; COM:    JL_TARGET_UNKNOWN_NAME = 1 << 5,
+; COM:    // Optimize for size for this target
+; COM:    JL_TARGET_OPTSIZE = 1 << 6,
+; COM:    // Only optimize for size for this target
+; COM:    JL_TARGET_MINSIZE = 1 << 7,
+; COM:    // Clone when the function queries CPU features
+; COM:    JL_TARGET_CLONE_CPU = 1 << 8,
+; COM:    // Clone when the function uses fp16
+; COM:    JL_TARGET_CLONE_FLOAT16 = 1 << 9,
+
+; COM: start with the basics, just one feature per function
+
+; COM: boring should only be cloned if clone_all is enabled on the target
+; CHECK: @boring{{.*}}#[[BORING_ATTRS:[0-9]+]]
+define noundef i32 @boring(i32 noundef %0) {
+  ret i32 %0
+}
+
+; CHECK: @fastmath_test{{.*}}#[[FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef float @fastmath_test(float noundef %0, float noundef %1) {
+  %3 = fadd fast float %0, %1
+  ret float %3
+}
+
+; CHECK: @loop_test{{.*}}#[[LOOP_TEST_ATTRS:[0-9]+]]
+define noundef i32 @loop_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %5, %1
+  %4 = phi i32 [ 0, %1 ], [ %9, %5 ]
+  ret i32 %4
+
+5:                                                ; preds = %1, %5
+  %6 = phi i32 [ %10, %5 ], [ 0, %1 ]
+  %7 = phi i32 [ %9, %5 ], [ 0, %1 ]
+  %8 = lshr i32 %6, 1
+  %9 = add nuw nsw i32 %8, %7
+  %10 = add nuw nsw i32 %6, 1
+  %11 = icmp eq i32 %10, %0
+  br i1 %11, label %3, label %5, !llvm.loop !9
+}
+
+; CHECK: @simd_test{{.*}}#[[SIMD_TEST_ATTRS:[0-9]+]]
+define noundef i32 @simd_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  ret i32 %2
+}
+
+; COM: now check all the combinations
+
+; CHECK: @simd_fastmath_test{{.*}}#[[SIMD_FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef float @simd_fastmath_test(<4 x float> noundef %0) {
+  %2 = extractelement <4 x float> %0, i64 0
+  %3 = extractelement <4 x float> %0, i64 1
+  %4 = fadd fast float %2, %3
+  ret float %4
+}
+
+; CHECK: @loop_fastmath_test{{.*}}#[[LOOP_FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef i32 @loop_fastmath_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %7, label %5
+
+3:                                                ; preds = %7
+  %4 = fptosi float %12 to i32
+  br label %5
+
+5:                                                ; preds = %3, %1
+  %6 = phi i32 [ 0, %1 ], [ %4, %3 ]
+  ret i32 %6
+
+7:                                                ; preds = %1, %7
+  %8 = phi i32 [ %13, %7 ], [ 0, %1 ]
+  %9 = phi float [ %12, %7 ], [ 0.000000e+00, %1 ]
+  %10 = lshr i32 %8, 1
+  %11 = sitofp i32 %10 to float
+  %12 = fadd fast float %9, %11
+  %13 = add nuw nsw i32 %8, 1
+  %14 = icmp eq i32 %13, %0
+  br i1 %14, label %3, label %7, !llvm.loop !9
+}
+
+; CHECK: @simd_loop_test{{.*}}#[[SIMD_LOOP_TEST_ATTRS:[0-9]+]]
+define dso_local noundef i32 @simd_loop_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  %3 = icmp sgt i32 %2, 0
+  br i1 %3, label %6, label %4
+
+4:                                                ; preds = %6, %1
+  %5 = phi i32 [ 0, %1 ], [ %10, %6 ]
+  ret i32 %5
+
+6:                                                ; preds = %1, %6
+  %7 = phi i32 [ %11, %6 ], [ 0, %1 ]
+  %8 = phi i32 [ %10, %6 ], [ 0, %1 ]
+  %9 = lshr i32 %7, 1
+  %10 = add nuw nsw i32 %9, %8
+  %11 = add nuw nsw i32 %7, 1
+  %12 = icmp eq i32 %11, %2
+  br i1 %12, label %4, label %6, !llvm.loop !9
+}
+
+; CHECK: @simd_loop_fastmath_test{{.*}}#[[SIMD_LOOP_FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef i32 @simd_loop_fastmath_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  %3 = icmp sgt i32 %2, 0
+  br i1 %3, label %8, label %6
+
+4:                                                ; preds = %8
+  %5 = fptosi float %13 to i32
+  br label %6
+
+6:                                                ; preds = %4, %1
+  %7 = phi i32 [ 0, %1 ], [ %5, %4 ]
+  ret i32 %7
+
+8:                                                ; preds = %1, %8
+  %9 = phi i32 [ %14, %8 ], [ 0, %1 ]
+  %10 = phi float [ %13, %8 ], [ 0.000000e+00, %1 ]
+  %11 = lshr i32 %9, 1
+  %12 = sitofp i32 %11 to float
+  %13 = fadd fast float %10, %12
+  %14 = add nuw nsw i32 %9, 1
+  %15 = icmp eq i32 %14, %2
+  br i1 %15, label %4, label %8, !llvm.loop !9
+}
+
+; COM: check for fvar and reloc annotations on functions used by other globals
+
+@func_gv = global i32 (i32)* @func_in_gv, align 8
+
+; CHECK: @func_in_gv{{.*}}#[[FUNC_IN_GV_ATTRS:[0-9]+]]
+define noundef i32 @func_in_gv(i32 noundef returned %0) {
+  ret i32 %0
+}
+
+@aliaser = alias i32 (i32)*, bitcast (i32 (i32)* @aliasee to i32 (i32)**)
+
+; CHECK: @aliasee{{.*}}#[[ALIASEE_ATTRS:[0-9]+]]
+define i32 @aliasee(i32 noundef returned %0) {
+  ret i32 %0
+}
+
+; COM: check for reloc annotations on functions used by other functions
+; CHECK: @cloned{{.*}}#[[CLONED_RELOC_ATTRS:[0-9]+]]
+define noundef float @cloned(float noundef %0, float noundef %1) {
+  %3 = fadd fast float %0, %1
+  ret float %3
+}
+
+define noundef i32 @uncloned(i32 noundef %0) {
+  %2 = sitofp i32 %0 to float
+  %3 = call noundef float @cloned(float noundef %2, float noundef %2)
+  %4 = fptosi float %3 to i32
+  ret i32 %4
+}
+
+; COM: Note that these strings are hex-encoded bits of the target indices that will be cloned
+; CHECK-DAG: attributes #[[BORING_ATTRS]] = { "julia.mv.clones"="2" }
+; CHECK-DAG: attributes #[[FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="6" }
+; CHECK-DAG: attributes #[[LOOP_TEST_ATTRS]] = { "julia.mv.clones"="A" }
+; CHECK-DAG: attributes #[[SIMD_TEST_ATTRS]] = { "julia.mv.clones"="12" }
+; CHECK-DAG: attributes #[[SIMD_FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="16" }
+; CHECK-DAG: attributes #[[LOOP_FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="E" }
+; CHECK-DAG: attributes #[[SIMD_LOOP_TEST_ATTRS]] = { "julia.mv.clones"="1A" }
+; CHECK-DAG: attributes #[[SIMD_LOOP_FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="1E" }
+; CHECK-DAG: attributes #[[FUNC_IN_GV_ATTRS]]
+; CHECK-SAME: "julia.mv.clones"="2"
+; CHECK-SAME: "julia.mv.fvar"
+; CHECK-DAG: attributes #[[ALIASEE_ATTRS]]
+; CHECK-SAME: "julia.mv.clones"="2"
+; CHECK-SAME: "julia.mv.reloc"
+; CHECK-DAG: attributes #[[CLONED_RELOC_ATTRS]]
+; CHECK-SAME: "julia.mv.clones"="6"
+; CHECK-SAME: "julia.mv.reloc"
+
+; CHECK-LABEL: !llvm.module.flags
+
+!llvm.module.flags = !{!0, !1, !2}
+
+; CHECK-DAG: julia.mv.enable
+; CHECK-DAG: julia.mv.skipcloning
+; CHECK-DAG: julia.mv.specs
+; CHECK-DAG: julia.mv.annotated
+; CHECK-DAG: julia.mv.veccall
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{i32 1, !"julia.mv.skipcloning", i32 1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6, !7, !8}
+!4 = !{!"cpubase", !"nofeatures", i32 0, i32 2}
+!5 = !{!"cpucloneall", !"cloneall", i32 0, i32 2}
+!6 = !{!"cpufastmath", !"fastmathclone", i32 0, i32 4}
+!7 = !{!"cpuloop", !"loopclone", i32 0, i32 8}
+!8 = !{!"cpusimd", !"simdclone", i32 0, i32 16}
+!9 = !{!9}
diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll
new file mode 100644
index 0000000000000..897652700c335
--- /dev/null
+++ b/test/llvmpasses/multiversioning-clone-only.ll
@@ -0,0 +1,215 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false
+
+; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer
+; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer
+; CHECK: @subtarget_cloned_gv = hidden global i64* null
+; CHECK: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null
+; CHECK: @jl_fvar_offsets = hidden constant [2 x i32] [i32 1, i32 0]
+; CHECK: @jl_gvar_base = hidden constant i64 0
+; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer
+; CHECK: @jl_clone_slots = hidden constant [5 x i32]
+; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_gvar_base
+; CHECK: @jl_clone_idxs = hidden constant [13 x i32]
+; COM: TODO actually check the clone idxs maybe?
+; CHECK: @jl_clone_offsets = hidden constant [4 x i32]
+; CHECK-SAME: sub
+; CHECK-SAME: @subtarget_cloned.1
+; CHECK-SAME: @subtarget_cloned
+; CHECK-SAME: sub
+; CHECK-SAME: @subtarget_cloned.2
+; CHECK-SAME: @subtarget_cloned
+; CHECK-SAME: sub
+
+@jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 16
+@jl_gvars = global [0 x i64*] zeroinitializer, align 16
+@jl_fvar_idxs = hidden constant [1 x i32] [i32 0], align 16
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
+@subtarget_cloned_gv = hidden global i64* bitcast (i32 (i32)* @subtarget_cloned to i64*), align 16
+
+@subtarget_cloned_aliased = alias i32 (i32), i32 (i32)* @subtarget_cloned
+
+; CHECK: define{{.*}}@boring({{.*}}#[[BORING_DEFAULT_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 %0
+define noundef i32 @boring(i32 noundef %0) #0 {
+    ret i32 %0
+}
+
+; CHECK: declare{{.*}}@declaration({{.*}}#[[DECLARATION_DEFAULT_ATTRS:[0-9]+]]
+declare i32 @declaration(i32 %0) #1
+
+; CHECK: define{{.*}}@call_boring({{.*}}#[[BORING_DEFAULT_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @boring(i32 noundef %0)
+define noundef i32 @call_boring(i32 noundef %0) #0 {
+    %2 = call noundef i32 @boring(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@call_declaration({{.*}}#[[DECLARATION_DEFAULT_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @declaration(i32 noundef %0)
+define noundef i32 @call_declaration(i32 noundef %0) #1 {
+    %2 = call noundef i32 @declaration(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@subtarget_cloned({{.*}}#[[SUBTARGET_CLONED_DEFAULT_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 0
+define noundef i32 @subtarget_cloned(i32 noundef %0) #2 {
+    ret i32 0
+}
+
+; COM: should fixup this callsite since 2 is cloned for a subtarget
+; CHECK: define{{.*}}@call_subtarget_cloned({{.*}}#[[CALL_SUBTARGET_CLONED_DEFAULT_ATTRS:[0-9]+]]
+; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA:[0-9]+]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK: ret i32
+define noundef i32 @call_subtarget_cloned(i32 noundef %0) #3 {
+    %2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@call_subtarget_cloned_but_not_cloned({{.*}}#[[BORING_DEFAULT_ATTRS]]
+; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK: ret i32
+define noundef i32 @call_subtarget_cloned_but_not_cloned(i32 noundef %0) #0 {
+    %2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@boring.1({{.*}}#[[BORING_CLONEALL_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 %0
+
+; CHECK: declare{{.*}}@declaration.1({{.*}}#[[DECLARATION_CLONEALL_ATTRS:[0-9]+]]
+
+; COM: should not fixup this callsite since boring is not cloned for a subtarget
+; COM: also should call boring.1 instead of boring
+; CHECK: define{{.*}}@call_boring.1({{.*}}#[[BORING_CLONEALL_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @boring.1(i32 noundef %0)
+
+; CHECK: define{{.*}}@call_declaration.1({{.*}}#[[DECLARATION_CLONEALL_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @declaration.1(i32 noundef %0)
+
+; CHECK: define{{.*}}@subtarget_cloned.1({{.*}}#[[SUBTARGET_CLONED_CLONEALL_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 0
+
+; CHECK: define{{.*}}@subtarget_cloned.2({{.*}}#[[SUBTARGET_CLONED_FASTMATH_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 0
+
+; COM: should *NOT* fixup this callsite since subtarget_cloned is not cloned for a subtarget of the cloneall
+; CHECK: define{{.*}}@call_subtarget_cloned.1({{.*}}#[[CALL_SUBTARGET_CLONED_CLONEALL_ATTRS:[0-9]+]]
+; CHECK-NEXT: %2 = call noundef i32 @subtarget_cloned.1(i32 noundef %0)
+
+; CHECK: define {{.*}}@call_subtarget_cloned.2({{.*}}#[[CALL_SUBTARGET_CLONED_FASTMATH_ATTRS:[0-9]+]]
+; CHECK-NEXT: %2 = call noundef i32 @subtarget_cloned.2(i32 noundef %0)
+
+; CHECK: define{{.*}}@call_subtarget_cloned_but_not_cloned.1({{.*}}#[[BORING_CLONEALL_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @subtarget_cloned.1(i32 noundef %0)
+
+; COM: should not have cloned for fastmath
+; CHECK-NOT: @subtarget_cloned_but_not_cloned.2
+
+; COM: check for alias being rewritten to a function trampoline
+; CHECK: define{{.*}}@subtarget_cloned_aliased{{.*}}#[[SUBTARGET_ALIASED_ATTRS:[0-9]+]]
+; CHECK-NOT: }
+; CHECK: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK: ret i32
+
+; CHECK: attributes #[[BORING_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-SAME: }
+; CHECK: attributes #[[DECLARATION_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_CLONED_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-DAG: "julia.mv.reloc"
+; CHECK-SAME: }
+; CHECK: attributes #[[CALL_SUBTARGET_CLONED_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-SAME: }
+; CHECK: attributes #[[BORING_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-SAME: }
+; CHECK: attributes #[[DECLARATION_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_CLONED_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-DAG: "julia.mv.reloc"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_CLONED_FASTMATH_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="2"
+; CHECK-DAG: "target-cpu"="cpufastmath"
+; CHECK-DAG: "target-features"="fastmathclone"
+; CHECK-DAG: "julia.mv.reloc"
+; CHECK-SAME: }
+; CHECK: attributes #[[CALL_SUBTARGET_CLONED_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-SAME: }
+; CHECK: attributes #[[CALL_SUBTARGET_CLONED_FASTMATH_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="2"
+; CHECK-DAG: "target-cpu"="cpufastmath"
+; CHECK-DAG: "target-features"="fastmathclone"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_ALIASED_ATTRS]]
+; CHECK-SAME: {
+; CHECK-SAME: "julia.mv.alias"
+; CHECK-SAME: }
+attributes #0 = {"julia.mv.clones"="2"}
+attributes #1 = {"julia.mv.clones"="2" "test.unique"="1"}
+attributes #2 = {"julia.mv.clones"="6" "julia.mv.reloc"}
+attributes #3 = {"julia.mv.clones"="6"}
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{i32 1, !"julia.mv.annotated", i32 1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6, !7, !8}
+!4 = !{!"cpubase", !"nofeatures", i32 0, i32 2}
+!5 = !{!"cpucloneall", !"cloneall", i32 0, i32 2}
+!6 = !{!"cpufastmath", !"fastmathclone", i32 0, i32 4}
+!7 = !{!"cpuloop", !"loopclone", i32 0, i32 8}
+!8 = !{!"cpusimd", !"simdclone", i32 0, i32 16}
+; CHECK-DAG: ![[TBAA_CONST_METADATA]] = !{![[JTBAA_CONST_METADATA:[0-9]+]], ![[JTBAA_CONST_METADATA]]
+; CHECK-DAG: ![[JTBAA_CONST_METADATA]] = !{!"jtbaa_const"
diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl
index ff9cd0aace704..1b5d1df3c9f36 100644
--- a/test/llvmpasses/pipeline-o0.jl
+++ b/test/llvmpasses/pipeline-o0.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl
index e7be976919344..86e1ded3f11e5 100644
--- a/test/llvmpasses/pipeline-o2-allocs.jl
+++ b/test/llvmpasses/pipeline-o2-allocs.jl
@@ -1,8 +1,12 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
 
 include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 
+# COM: This tests that simplifycfg is still hoisting allocations in different basic blocks
+# COM: into the parent basic block, and deduplicating them in the process
 # CHECK-LABEL: @julia_split
 # CHECK: alloc
 # CHECK-NOT: alloc
@@ -15,6 +19,8 @@ function split(maybe)
     end
 end
 
+# COM: This tests that irrespective of the condition outside the loop
+# COM: allocations inside the loop are hoisted and the loop is deleted
 # CHECK-LABEL: @julia_loop_alloc
 # CHECK: phi
 # CHECK-NOT: phi
@@ -27,6 +33,8 @@ function loop_alloc(N)
     ref
 end
 
+# COM: This tests that even with the allocation LLVM will recognize
+# COM: that the loop is meaningless and delete it
 # CHECK-LABEL: @julia_loop_const
 # CHECK-NOT: br
 function loop_const()
@@ -37,6 +45,8 @@ function loop_const()
     ref
 end
 
+# COM: This tests that the GC.@preserve macro is being ignored since ref
+# COM: is not used anywhere else
 # CHECK-LABEL: @julia_nopreserve
 # CHECK-NOT: alloc
 # CHECK-NOT: julia.gc_preserve_begin
diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl
new file mode 100644
index 0000000000000..584e8855f0f8c
--- /dev/null
+++ b/test/llvmpasses/pipeline-o2-broadcast.jl
@@ -0,0 +1,123 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: Check broadcasted outer product is vectorized
+
+# COM: Float32
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
+
+# COM: Float64
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
+
+# COM: Int32
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
+
+# COM: Int64
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
+
+function prod_v_vT(R, x, y)
+    R .= x .* y'
+end
+
+# COM: Check broadcasted inner product is vectorized
+
+# COM: Float32
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
+
+# COM: Float64
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
+
+# COM: Int32
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
+
+# COM: Int64
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
+
+function prod_vT_v(R, x, y)
+    R .= x' .* y
+end
+
+# COM: Check broadcasted multiplications are vectorized
+
+# COM: Float32
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
+# XFAIL-CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
+
+# COM: Float64
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
+# XFAIL-CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
+
+# COM: Int32
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
+# XFAIL-CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
+
+# COM: Int64
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
+# XFAIL-CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
+
+function prod_v_M_vT(R, x, M, y)
+    R .= x .* M .* y'
+end
+
+emit(prod_v_vT, Matrix{Float32}, Vector{Float32}, Vector{Float32})
+emit(prod_v_vT, Matrix{Float64}, Vector{Float64}, Vector{Float64})
+emit(prod_v_vT, Matrix{Int32}, Vector{Int32}, Vector{Int32})
+emit(prod_v_vT, Matrix{Int64}, Vector{Int64}, Vector{Int64})
+
+emit(prod_vT_v, Matrix{Float32}, Vector{Float32}, Vector{Float32})
+emit(prod_vT_v, Matrix{Float64}, Vector{Float64}, Vector{Float64})
+emit(prod_vT_v, Matrix{Int32}, Vector{Int32}, Vector{Int32})
+emit(prod_vT_v, Matrix{Int64}, Vector{Int64}, Vector{Int64})
+
+emit(prod_v_M_vT, Matrix{Float32}, Vector{Float32}, Matrix{Float32}, Vector{Float32})
+emit(prod_v_M_vT, Matrix{Float64}, Vector{Float64}, Matrix{Float64}, Vector{Float64})
+emit(prod_v_M_vT, Matrix{Int32}, Vector{Int32}, Matrix{Int32}, Vector{Int32})
+emit(prod_v_M_vT, Matrix{Int64}, Vector{Int64}, Matrix{Int64}, Vector{Int64})
diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl
index 85f5035a3249d..fcb2161de7614 100644
--- a/test/llvmpasses/pipeline-o2.jl
+++ b/test/llvmpasses/pipeline-o2.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
 # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
 
@@ -76,21 +78,21 @@ end
 # COM: memset checks
 
 # COM: INT64
-# ALL-LABEL: define nonnull {} addrspace(10)* @julia_zeros
+# ALL: define {{.*}} @julia_zeros
 # ALL-NOT: bounds_error
 # COM: memset is not used with bounds checks on (too late in the pipeline)
 # BC_OFF: llvm.memset
 # BC_AUTO: llvm.memset
 
 # COM: INT32
-# ALL-LABEL: define nonnull {} addrspace(10)* @julia_zeros
+# ALL: define {{.*}} @julia_zeros
 # ALL-NOT: bounds_error
 # COM: memset is not used with bounds checks on (too late in the pipeline)
 # BC_OFF: llvm.memset
 # BC_AUTO: llvm.memset
 
 # COM: INT16
-# ALL-LABEL: define nonnull {} addrspace(10)* @julia_zeros
+# ALL: define {{.*}} @julia_zeros
 # ALL-NOT: bounds_error
 # COM: memset is not used with bounds checks on (too late in the pipeline)
 # BC_OFF: llvm.memset
diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll
index b896850935e37..c1ba2069102ac 100644
--- a/test/llvmpasses/propagate-addrspace-non-zero.ll
+++ b/test/llvmpasses/propagate-addrspace-non-zero.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll
index 84ad33310ab3f..92bf68578477f 100644
--- a/test/llvmpasses/propagate-addrspace.ll
+++ b/test/llvmpasses/propagate-addrspace.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index 6c92bab06e357..3600fb76804ab 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index 77a8a5e815057..4710f9bd6c4d6 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s
 
@@ -47,7 +49,7 @@ top:
 %list = type { i64, %list* }
 
 ; COM: There's nothing to remove in this function; but remove-addrspaces shouldn't crash.
-define i64 @sum.linked.list() #0 {
+define i64 @sum.linked.list() {
 ; CHECK-LABEL: @sum.linked.list
 top:
   %a = alloca %list
@@ -109,3 +111,9 @@ define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} add
 ; CHECK: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
   ret void
 }
+
+
+; COM: check that function attributes are preserved on declarations too
+declare void @convergent_function() #0
+attributes #0 = { convergent }
+; CHECK: attributes #0 = { convergent }
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index 17791d630d61a..404330ac3f7e1 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
 
diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll
index 894d3a1428a5c..142250212984e 100644
--- a/test/llvmpasses/simdloop.ll
+++ b/test/llvmpasses/simdloop.ll
@@ -1,3 +1,5 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
 ; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
 
diff --git a/test/loading.jl b/test/loading.jl
index 6b84bdc7bb3e1..ea544c2635dbc 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -42,6 +42,7 @@ thefname = "the fname!//\\&\1*"
 include_string_test_func = include_string(@__MODULE__, "include_string_test() = @__FILE__", thefname)
 @test include_string_test_func() == thefname
 @test include_string(@__MODULE__, "Base.source_path()", thefname) == Base.source_path()
+@test isdir(Base.source_dir())
 @test basename(@__FILE__) == "loading.jl"
 @test isabspath(@__FILE__)
 
@@ -982,6 +983,8 @@ end
             # Package in manifest in current env not present in depot
             @test Base.locate_package(pkg) !== nothing
 
+            @test Base.find_package("Baz") !== nothing  # coverage
+
             pushfirst!(LOAD_PATH, joinpath(tmp, "Env1"))
 
             @test Base.locate_package(pkg) === nothing
@@ -1003,23 +1006,29 @@ end
     try
         proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
 
-        function gen_extension_cmd(compile)
-            ```$(Base.julia_cmd()) $compile --startup-file=no -e '
-                begin
-                    push!(empty!(DEPOT_PATH), '$(repr(depot_path))')
-                    using HasExtensions
-                    # Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly got an extension")
-                    HasExtensions.ext_loaded && error("ext_loaded set")
-                    using HasDepWithExtensions
-                    # Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
-                    HasExtensions.ext_loaded || error("ext_loaded not set")
-                    HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
-                    HasDepWithExtensions.do_something() || error("do_something errored")
-                    using ExtDep2
-                    HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set")
-                end
-                '
-            ```
+        function gen_extension_cmd(compile, distr=false)
+            load_distr = distr ? "using Distributed; addprocs(1)" : ""
+            ew = distr ? "@everywhere" : ""
+            cmd = """
+            $load_distr
+            begin
+                $ew push!(empty!(DEPOT_PATH), $(repr(depot_path)))
+                using HasExtensions
+                $ew using HasExtensions
+                $ew Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly got an extension")
+                $ew HasExtensions.ext_loaded && error("ext_loaded set")
+                using HasDepWithExtensions
+                $ew using HasDepWithExtensions
+                $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
+                $ew HasExtensions.ext_loaded || error("ext_loaded not set")
+                $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
+                $ew HasDepWithExtensions.do_something() || error("do_something errored")
+                using ExtDep2
+                $ew using ExtDep2
+                $ew HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set")
+            end
+            """
+            return `$(Base.julia_cmd()) $compile --startup-file=no -e $cmd`
         end
 
         for compile in (`--compiled-modules=no`, ``, ``) # Once when requiring precompilation, once where it is already precompiled
@@ -1029,12 +1038,49 @@ end
             @test success(cmd)
         end
 
-        # 48351
         sep = Sys.iswindows() ? ';' : ':'
+
+        cmd = gen_extension_cmd(``, true)
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => join([proj, "@stdlib"], sep))
+        str = read(cmd, String)
+        @test !occursin("Error during loading of extension", str)
+        @test !occursin("ConcurrencyViolationError", str)
+
+        # 48351
         cmd = gen_extension_cmd(``)
         cmd = addenv(cmd, "JULIA_LOAD_PATH" => join([mktempdir(), proj], sep))
         cmd = pipeline(cmd; stdout, stderr)
         @test success(cmd)
+
+        # Only load env from where package is loaded
+        envs = [joinpath(@__DIR__, "project", "Extensions", "EnvWithHasExtensionsv2"), joinpath(@__DIR__, "project", "Extensions", "EnvWithHasExtensions")]
+        cmd = addenv(```$(Base.julia_cmd()) --startup-file=no -e '
+        begin
+            push!(empty!(DEPOT_PATH), '$(repr(depot_path))')
+            using HasExtensions
+            using ExtDep
+            Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly loaded ext from other env")
+            Base.get_extension(HasExtensions, :Extension2) === nothing && error("did not load ext from active env")
+        end
+        '
+        ```, "JULIA_LOAD_PATH" => join(envs, sep))
+        @test success(cmd)
+
+        test_ext_proj = """
+        begin
+            using HasExtensions
+            using ExtDep
+            Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load")
+            using ExtDep2
+            Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load")
+        end
+        """
+        for compile in (`--compiled-modules=no`, ``)
+            cmd_proj_ext = `$(Base.julia_cmd()) $compile --startup-file=no -e $test_ext_proj`
+            proj = joinpath(@__DIR__, "project", "Extensions")
+            cmd_proj_ext = addenv(cmd_proj_ext, "JULIA_LOAD_PATH" => join([joinpath(proj, "HasExtensions.jl"), joinpath(proj, "EnvWithDeps")], sep))
+            run(cmd_proj_ext)
+        end
     finally
         try
             rm(depot_path, force=true, recursive=true)
@@ -1069,21 +1115,69 @@ end
     for (P, D, C, I, O) in Iterators.product(0:1, 0:2, 0:2, 0:1, 0:3)
         julia = joinpath(Sys.BINDIR, Base.julia_exename())
         script = """
-        using Test
         let
             cf = Base.CacheFlags()
             opts = Base.JLOptions()
-            @test cf.use_pkgimages == opts.use_pkgimages == $P
-            @test cf.debug_level == opts.debug_level == $D
-            @test cf.check_bounds == opts.check_bounds == $C
-            @test cf.inline == opts.can_inline == $I
-            @test cf.opt_level == opts.opt_level == $O
+            cf.use_pkgimages == opts.use_pkgimages == $P || error("use_pkgimages")
+            cf.debug_level == opts.debug_level == $D || error("debug_level")
+            cf.check_bounds == opts.check_bounds == $C || error("check_bounds")
+            cf.inline == opts.can_inline == $I || error("inline")
+            cf.opt_level == opts.opt_level == $O || error("opt_level")
         end
         """
         cmd = `$julia $(pkgimage(P)) $(opt_level(O)) $(debug_level(D)) $(check_bounds(C)) $(inline(I)) -e $script`
         @test success(pipeline(cmd; stdout, stderr))
     end
+
+    cf = Base.CacheFlags(255)
+    @test cf.use_pkgimages
+    @test cf.debug_level == 3
+    @test cf.check_bounds == 3
+    @test cf.inline
+    @test cf.opt_level == 3
+
+    io = PipeBuffer()
+    show(io, cf)
+    @test read(io, String) == "use_pkgimages = true, debug_level = 3, check_bounds = 3, inline = true, opt_level = 3"
 end
 
 empty!(Base.DEPOT_PATH)
 append!(Base.DEPOT_PATH, original_depot_path)
+
+@testset "loading deadlock detector" begin
+    pkid1 = Base.PkgId("pkgid1")
+    pkid2 = Base.PkgId("pkgid2")
+    pkid3 = Base.PkgId("pkgid3")
+    pkid4 = Base.PkgId("pkgid4")
+    e = Base.Event()
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid4)     # module pkgid4
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid1)     # module pkgid1
+    t1 = @async begin
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid2) # @async module pkgid2; using pkgid1; end
+        notify(e)
+        @test "loaded_pkgid1" == @lock Base.require_lock Base.start_loading(pkid1)
+        @lock Base.require_lock Base.end_loading(pkid2, "loaded_pkgid2")
+    end
+    wait(e)
+    reset(e)
+    t2 = @async begin
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid3) # @async module pkgid3; using pkgid2; end
+        notify(e)
+        @test "loaded_pkgid2" == @lock Base.require_lock Base.start_loading(pkid2)
+        @lock Base.require_lock Base.end_loading(pkid3, "loaded_pkgid3")
+    end
+    wait(e)
+    reset(e)
+    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid3 -> pkgid2 -> pkgid1 -> pkgid3 && pkgid4"),
+        @lock Base.require_lock Base.start_loading(pkid3)).value            # try using pkgid3
+    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid4 -> pkgid4 && pkgid1"),
+        @lock Base.require_lock Base.start_loading(pkid4)).value            # try using pkgid4
+    @lock Base.require_lock Base.end_loading(pkid1, "loaded_pkgid1")        # end
+    @lock Base.require_lock Base.end_loading(pkid4, "loaded_pkgid4")        # end
+    wait(t2)
+    wait(t1)
+end
+
+@testset "Upgradable stdlibs" begin
+    @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using DelimitedFiles'`)
+end
diff --git a/test/math.jl b/test/math.jl
index f9af521de61ca..19d9f7893a496 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -69,8 +69,9 @@ end
     @test repr(Any[pi ℯ; ℯ pi]) == "Any[π ℯ; ℯ π]"
     @test string(pi) == "π"
 
-    @test sin(π) === sinpi(1) == tan(π) == sinpi(1 // 1) == 0
-    @test cos(π) === cospi(1) == sec(π) == cospi(1 // 1) == -1
+    @test sin(π) == sind(180) === sinpi(1) === sinpi(1//1) == tan(π) == 0
+    @test tan(π) == tand(180) === tanpi(1) === tanpi(1//1) === -0.0
+    @test cos(π) == cosd(180) === cospi(1) === cospi(1//1) == sec(π) == -1
     @test csc(π) == 1/0 && cot(π) == -1/0
     @test sincos(π) === sincospi(1) == (0, -1)
 end
@@ -179,8 +180,10 @@ end
             @test atan(x,y) ≈ atan(big(x),big(y))
             @test atanh(x) ≈ atanh(big(x))
             @test cbrt(x) ≈ cbrt(big(x))
+            @test fourthroot(x) ≈ fourthroot(big(x))
             @test cos(x) ≈ cos(big(x))
             @test cosh(x) ≈ cosh(big(x))
+            @test cospi(x) ≈ cospi(big(x))
             @test exp(x) ≈ exp(big(x))
             @test exp10(x) ≈ exp10(big(x))
             @test exp2(x) ≈ exp2(big(x))
@@ -194,9 +197,11 @@ end
             @test log2(x) ≈ log2(big(x))
             @test sin(x) ≈ sin(big(x))
             @test sinh(x) ≈ sinh(big(x))
+            @test sinpi(x) ≈ sinpi(big(x))
             @test sqrt(x) ≈ sqrt(big(x))
             @test tan(x) ≈ tan(big(x))
             @test tanh(x) ≈ tanh(big(x))
+            @test tanpi(x) ≈ tanpi(big(x))
             @test sec(x) ≈ sec(big(x))
             @test csc(x) ≈ csc(big(x))
             @test secd(x) ≈ secd(big(x))
@@ -215,6 +220,9 @@ end
             @test isequal(cbrt(T(0)), T(0))
             @test isequal(cbrt(T(1)), T(1))
             @test isequal(cbrt(T(1000000000))^3, T(1000)^3)
+            @test isequal(fourthroot(T(0)), T(0))
+            @test isequal(fourthroot(T(1)), T(1))
+            @test isequal(fourthroot(T(100000000))^4, T(100)^4)
             @test isequal(cos(T(0)), T(1))
             @test cos(T(pi)/2) ≈ T(0) atol=eps(T)
             @test isequal(cos(T(pi)), T(-1))
@@ -267,6 +275,8 @@ end
             @test asin(sin(x)) ≈ x
             @test cbrt(x)^3 ≈ x
             @test cbrt(x^3) ≈ x
+            @test fourthroot(x)^4 ≈ x
+            @test fourthroot(x^4) ≈ x
             @test asinh(sinh(x)) ≈ x
             @test atan(tan(x)) ≈ x
             @test atan(x,y) ≈ atan(x/y)
@@ -499,6 +509,22 @@ end
             @test cospi(convert(T,-1.5))::fT ⩲ zero(fT)
             @test_throws DomainError cospi(convert(T,Inf))
         end
+        @testset "trig pi functions accuracy" for numerator in -20:1:20
+            for func in (sinpi, cospi, tanpi,
+                         x -> sincospi(x)[1],
+                         x -> sincospi(x)[2])
+                x = numerator // 20
+                # Check that rational function works
+                @test func(x) ≈ func(BigFloat(x))
+                # Use short value so that wider values will be exactly equal
+                shortx = Float16(x)
+                # Compare to BigFloat value
+                bigvalue = func(BigFloat(shortx))
+                for T in (Float16,Float32,Float64)
+                    @test func(T(shortx)) ≈ T(bigvalue)
+                end
+            end
+        end
         @testset begin
             # If the machine supports fma (fused multiply add), we require exact equality.
             # Otherwise, we only require approximate equality.
@@ -529,14 +555,18 @@ end
     @test ismissing(scdm[2])
 end
 
-@testset "Integer and Inf args for sinpi/cospi/sinc/cosc" begin
+@testset "Integer and Inf args for sinpi/cospi/tanpi/sinc/cosc" begin
     for (sinpi, cospi) in ((sinpi, cospi), (x->sincospi(x)[1], x->sincospi(x)[2]))
-        @test sinpi(1) == 0
-        @test sinpi(-1) == -0
+        @test sinpi(1) === 0.0
+        @test sinpi(-1) === -0.0
         @test cospi(1) == -1
         @test cospi(2) == 1
     end
 
+    @test tanpi(1) === -0.0
+    @test tanpi(-1) === 0.0
+    @test tanpi(2) === 0.0
+    @test tanpi(-2) === -0.0
     @test sinc(1) == 0
     @test sinc(complex(1,0)) == 0
     @test sinc(0) == 1
@@ -589,7 +619,7 @@ end
     end
 end
 
-@testset "Irrational args to sinpi/cospi/sinc/cosc" begin
+@testset "Irrational args to sinpi/cospi/tanpi/sinc/cosc" begin
     for x in (pi, ℯ, Base.MathConstants.golden)
         for (sinpi, cospi) in ((sinpi, cospi), (x->sincospi(x)[1], x->sincospi(x)[2]))
             @test sinpi(x) ≈ Float64(sinpi(big(x)))
@@ -597,6 +627,7 @@ end
             @test sinpi(complex(x, x)) ≈ ComplexF64(sinpi(complex(big(x), big(x))))
             @test cospi(complex(x, x)) ≈ ComplexF64(cospi(complex(big(x), big(x))))
         end
+        @test tanpi(x) ≈ Float64(tanpi(big(x)))
         @test sinc(x)  ≈ Float64(sinc(big(x)))
         @test cosc(x)  ≈ Float64(cosc(big(x)))
         @test sinc(complex(x, x))  ≈ ComplexF64(sinc(complex(big(x),  big(x))))
@@ -626,7 +657,7 @@ end
 end
 
 @testset "trig function type stability" begin
-    @testset "$T $f" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16), f = (sind,cosd,sinpi,cospi)
+    @testset "$T $f" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16), f = (sind,cosd,sinpi,cospi,tanpi)
         @test Base.return_types(f,Tuple{T}) == [float(T)]
     end
     @testset "$T sincospi" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16)
@@ -1230,6 +1261,22 @@ end
     end
 end
 
+@testset "fourthroot" begin
+    for T in (Float32, Float64)
+        @test fourthroot(zero(T)) === zero(T)
+        @test fourthroot(one(T)) === one(T)
+        @test fourthroot(T(Inf)) === T(Inf)
+        @test isnan_type(T, fourthroot(T(NaN)))
+        for x in (pcnfloat(nextfloat(nextfloat(zero(T))))...,
+                  0.45, 0.6, 0.98,
+                  map(x->x^3, 1.0:1.0:1024.0)...,
+                  prevfloat(T(Inf)))
+            by = fourthroot(big(T(x)))
+            @test fourthroot(T(x)) ≈ by rtol=eps(T)
+        end
+    end
+end
+
 @testset "hypot" begin
     @test hypot(0, 0) == 0.0
     @test hypot(3, 4) == 5.0
@@ -1494,7 +1541,7 @@ end
 end
 
 # test constant-foldability
-for fn in (:sin, :cos, :tan, :log, :log2, :log10, :log1p, :exponent, :sqrt, :cbrt,
+for fn in (:sin, :cos, :tan, :log, :log2, :log10, :log1p, :exponent, :sqrt, :cbrt, :fourthroot,
            :asin, :atan, :acos, :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
            :exp, :exp2, :exp10, :expm1
            )
diff --git a/test/misc.jl b/test/misc.jl
index 7c9fa3c1fbc41..79b684badf1e0 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -859,6 +859,10 @@ let buf = IOBuffer()
     printstyled(buf_color, "foo"; bold=true, color=:red)
     @test String(take!(buf)) == "\e[31m\e[1mfoo\e[22m\e[39m"
 
+    # Check that italic is turned off
+    printstyled(buf_color, "foo"; italic=true, color=:red)
+    @test String(take!(buf)) == "\e[31m\e[3mfoo\e[23m\e[39m"
+
     # Check that underline is turned off
     printstyled(buf_color, "foo"; color = :red, underline = true)
     @test String(take!(buf)) == "\e[31m\e[4mfoo\e[24m\e[39m"
@@ -876,8 +880,8 @@ let buf = IOBuffer()
     @test String(take!(buf)) == "\e[31m\e[8mfoo\e[28m\e[39m"
 
     # Check that all options can be turned on simultaneously
-    printstyled(buf_color, "foo"; color = :red, bold = true, underline = true, blink = true, reverse = true, hidden = true)
-    @test String(take!(buf)) == "\e[31m\e[1m\e[4m\e[5m\e[7m\e[8mfoo\e[28m\e[27m\e[25m\e[24m\e[22m\e[39m"
+    printstyled(buf_color, "foo"; color = :red, bold = true, italic = true, underline = true, blink = true, reverse = true, hidden = true)
+    @test String(take!(buf)) == "\e[31m\e[1m\e[3m\e[4m\e[5m\e[7m\e[8mfoo\e[28m\e[27m\e[25m\e[24m\e[22m\e[23m\e[39m"
 end
 
 abstract type DA_19281{T, N} <: AbstractArray{T, N} end
@@ -1380,5 +1384,21 @@ end
 
     # sanity check `@allocations` returns what we expect in some very simple cases
     @test (@allocations "a") == 0
-    @test (@allocations "a" * "b") == 1
+    @test (@allocations "a" * "b") == 0 # constant propagation
+    @test (@allocations "a" * Base.inferencebarrier("b")) == 1
+end
+
+@testset "in_finalizer" begin
+    @test !GC.in_finalizer()
+
+    in_fin = Ref{Any}()
+    wait(@async begin
+        r = Ref(1)
+        finalizer(r) do _
+            in_fin[] = GC.in_finalizer()
+        end
+        nothing
+    end)
+    GC.gc(true); yield()
+    @test in_fin[]
 end
diff --git a/test/missing.jl b/test/missing.jl
index 474e10620732f..f06d1aad7a6b1 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -21,8 +21,8 @@ end
     @test convert(Union{Nothing, Missing}, nothing) === nothing
     @test convert(Union{Missing, Nothing, Float64}, 1) === 1.0
 
-    @test_throws MethodError convert(Missing, 1)
-    @test_throws MethodError convert(Union{Nothing, Missing}, 1)
+    @test_throws ErrorException("cannot convert a value to missing for assignment") convert(Missing, 1)
+    @test_throws ErrorException("cannot convert a value to missing for assignment") convert(Union{Nothing, Missing}, 1)
     @test_throws MethodError convert(Union{Int, Missing}, "a")
 end
 
@@ -66,6 +66,7 @@ end
     @test isequal(missing, missing)
     @test !isequal(1, missing)
     @test !isequal(missing, 1)
+    @test !isequal('c', missing)
     @test (missing < missing) === missing
     @test (missing < 1) === missing
     @test (1 < missing) === missing
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index b2101944d423b..ea3a5cdbb8ee4 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -76,6 +76,26 @@ let NT = NamedTuple{(:a,:b),Tuple{Int8,Int16}}, nt = (x=3,y=4)
     @test_throws MethodError convert(NT, nt)
 end
 
+@testset "convert NamedTuple" begin
+    conv1 = convert(NamedTuple{(:a,),Tuple{I}} where I, (;a=1))
+    @test conv1 === (a = 1,)
+
+    conv2 = convert(NamedTuple{(:a,),Tuple{Any}}, (;a=1))
+    @test conv2 === NamedTuple{(:a,), Tuple{Any}}((1,))
+
+    conv3 = convert(NamedTuple{(:a,),}, (;a=1))
+    @test conv3 === (a = 1,)
+
+    conv4 = convert(NamedTuple{(:a,),Tuple{I}} where I<:Unsigned, (;a=1))
+    @test conv4 === NamedTuple{(:a,), Tuple{Unsigned}}((1,))
+
+    conv5 = convert(NamedTuple, (;a=1))
+    @test conv1 === (a = 1,)
+
+    conv_res = @test_throws MethodError convert(NamedTuple{(:a,),Tuple{I}} where I<:AbstractString, (;a=1))
+    @test conv_res.value.f === convert && conv_res.value.args === (AbstractString, 1)
+end
+
 @test NamedTuple{(:a,:c)}((b=1,z=2,c=3,aa=4,a=5)) === (a=5, c=3)
 @test NamedTuple{(:a,)}(NamedTuple{(:b, :a), Tuple{Int, Union{Int,Nothing}}}((1, 2))) ===
     NamedTuple{(:a,), Tuple{Union{Int,Nothing}}}((2,))
@@ -124,7 +144,7 @@ end
 let nt = merge(NamedTuple{(:a,:b),Tuple{Int32,Union{Int32,Nothing}}}((1,Int32(2))),
                NamedTuple{(:a,:c),Tuple{Union{Int8,Nothing},Float64}}((nothing,1.0)))
     @test typeof(nt) == NamedTuple{(:a,:b,:c),Tuple{Union{Int8,Nothing},Union{Int32,Nothing},Float64}}
-    @test repr(nt) == "NamedTuple{(:a, :b, :c), Tuple{Union{Nothing, Int8}, Union{Nothing, Int32}, Float64}}((nothing, 2, 1.0))"
+    @test repr(nt) == "@NamedTuple{a::Union{Nothing, Int8}, b::Union{Nothing, Int32}, c::Float64}((nothing, 2, 1.0))"
 end
 
 @test merge(NamedTuple(), [:a=>1, :b=>2, :c=>3, :a=>4, :c=>5]) == (a=4, b=2, c=5)
diff --git a/test/numbers.jl b/test/numbers.jl
index 870acd37c089c..efb2702aff1c2 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -2483,17 +2483,32 @@ Base.abs(x::TestNumber) = TestNumber(abs(x.inner))
             d == 0 && continue
             fastd = Base.multiplicativeinverse(d)
             for n in numrange
+                d == -1 && n == typemin(typeof(n)) && continue
                 @test div(n,d) == div(n,fastd)
             end
         end
     end
     testmi(-1000:1000, -100:100)
-    testmi(typemax(Int)-1000:typemax(Int), -100:100)
-    testmi(typemin(Int)+1:typemin(Int)+1000, -100:100)
     @test_throws ArgumentError Base.multiplicativeinverse(0)
-    testmi(map(UInt32, 0:1000), map(UInt32, 1:100))
-    testmi(typemax(UInt32)-UInt32(1000):typemax(UInt32), map(UInt32, 1:100))
+    for T in [Int8, Int16, Int32, Int64, Int128]
+        testmi(map(T, typemin(T)+1:typemin(T)+100), map(T, -50:50))
+    end
+    for T in [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128]
+        testmi(map(T, typemax(T)-50:typemax(T)), map(T, 1:50))
+        testmi(rand(T, 50), rand(T, 50))
+        @test_throws ArgumentError Base.multiplicativeinverse(T(0))
+    end
+
+    # Division overflow is not handled
+    T = Int8
+    fastd = Base.multiplicativeinverse(T(-1))
+    @test_throws DivideError div(typemin(T), T(-1))
+    # does not throw:
+    # @test_throws div(typemin(T), fastd)
+    # test broadcasting works.
+    @test div.(3, Base.multiplicativeinverse(3)) == 1
 end
+
 @testset "ndims/indices/size/length" begin
     @test ndims(1) == 0
     @test ndims(Integer) == 0
@@ -2676,13 +2691,35 @@ end
     @test rem2pi(T(-8), RoundNearest) ≈ -8+2pi
     @test rem2pi(T(-8), RoundDown)    ≈ -8+4pi
     @test rem2pi(T(-8), RoundUp)      ≈ -8+2pi
-end
-
-@testset "PR #36420 $T" for T in (Float16, Float32, Float64)
+    # to hit n is even and n % 4 == 2 condition
+    @test rem2pi(T(3), RoundToZero)  == 3
+    @test rem2pi(T(3), RoundNearest) == 3
+    @test rem2pi(T(3), RoundDown)    == 3
+    @test rem2pi(T(3), RoundUp)      ≈ 3 - 2π
+    @test rem2pi(T(-3), RoundToZero)  == -3
+    @test rem2pi(T(-3), RoundNearest) == -3
+    @test rem2pi(T(-3), RoundDown)    ≈ -3 + 2π
+    @test rem2pi(T(-3), RoundUp)      == -3
+    # to hit even n condition and n % 4 != 2 condition
+    @test rem2pi(T(13), RoundToZero)  ≈ 13-4π
+    @test rem2pi(T(13), RoundNearest) ≈ 13-4π
+    @test rem2pi(T(13), RoundDown)    ≈ 13-4π
+    @test rem2pi(T(13), RoundUp)      ≈ 13-6π
+    @test rem2pi(T(-13), RoundToZero)  ≈ -13+4π
+    @test rem2pi(T(-13), RoundNearest) ≈ -13+4π
+    @test rem2pi(T(-13), RoundDown)    ≈ -13+6π
+    @test rem2pi(T(-13), RoundUp)      ≈ -13+4π
+end
+
+@testset "PR #36420 $T" for T in (Float16, Float32, Float64, BigFloat)
+    nan = reinterpret(Float64, reinterpret(UInt64, NaN) | rand(UInt64))
     for r in (RoundToZero, RoundNearest, RoundDown, RoundUp)
-        for x in (Inf, -Inf, NaN, -NaN)
+        for x in (Inf, -Inf, NaN, -NaN, nan)
             @test isnan(rem2pi(T(x), r))
             @test rem2pi(T(x), r) isa T
+            if isnan(x) && T !== BigFloat
+                @test rem2pi(T(x), r) === T(x)
+            end
         end
     end
 end
@@ -2930,6 +2967,20 @@ end
     end
 end
 
+Base.@irrational irrational_1548_pi 4863.185427757 1548big(pi)
+Base.@irrational irrational_inv_1548_pi 1/big(irrational_1548_pi)
+@testset "@irrational" begin
+    @test irrational_1548_pi ≈ 1548big(pi)
+    @test Float64(irrational_1548_pi) == 1548π
+    @test irrational_1548_pi ≈ 1548pi
+    @test irrational_1548_pi != 1548pi
+
+    @test irrational_inv_1548_pi ≈ inv(1548big(pi))
+    @test Float64(irrational_inv_1548_pi) == 1/(1548π)
+    @test irrational_inv_1548_pi ≈ inv(1548pi)
+    @test irrational_inv_1548_pi != inv(1548pi)
+end
+
 @testset "modf" begin
     @testset "remd" begin
         denorm_min = nextfloat(0.0)
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index 530c5b3a844eb..e6490f5e9d345 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -1,8 +1,10 @@
 using Test
 using InteractiveUtils
 using Core: OpaqueClosure
+using Base.Experimental: @opaque
 
 const_int() = 1
+const_int_barrier() = Base.inferencebarrier(1)::typeof(1)
 
 const lno = LineNumberNode(1, :none)
 
@@ -12,7 +14,7 @@ let ci = @code_lowered const_int()
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
-@test isa(oc_trivial(), Core.OpaqueClosure{Tuple{}, Any})
+@test isa(oc_trivial(), OpaqueClosure{Tuple{}, Any})
 @test oc_trivial()() == 1
 
 let ci = @code_lowered const_int()
@@ -21,7 +23,7 @@ let ci = @code_lowered const_int()
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
-@test isa(oc_simple_inf(), Core.OpaqueClosure{Tuple{}, Int})
+@test isa(oc_simple_inf(), OpaqueClosure{Tuple{}, Int})
 @test oc_simple_inf()() == 1
 
 struct OcClos2Int
@@ -72,8 +74,8 @@ let ci = @code_lowered OcClos1Any(1)()
             :x))
     end
 end
-@test isa(oc_infer_pass_clos(1), Core.OpaqueClosure{Tuple{}, typeof(1)})
-@test isa(oc_infer_pass_clos("a"), Core.OpaqueClosure{Tuple{}, typeof("a")})
+@test isa(oc_infer_pass_clos(1), OpaqueClosure{Tuple{}, typeof(1)})
+@test isa(oc_infer_pass_clos("a"), OpaqueClosure{Tuple{}, typeof("a")})
 @test oc_infer_pass_clos(1)() == 1
 @test oc_infer_pass_clos("a")() == "a"
 
@@ -115,8 +117,6 @@ let A = [1 2]
     end
 end
 
-using Base.Experimental: @opaque
-
 @test @opaque(x->2x)(8) == 16
 let f = @opaque (x::Int, y::Float64)->(2x, 3y)
     @test_throws TypeError f(1, 1)
@@ -128,18 +128,26 @@ end
 @test uses_frontend_opaque(10)(8) == 18
 
 # World age mechanism
+module test_world_age
+
+using Test
+using Core: OpaqueClosure
+using Base.Experimental: @opaque
+
 function test_oc_world_age end
 mk_oc_world_age() = @opaque ()->test_oc_world_age()
 g_world_age = @opaque ()->test_oc_world_age()
 h_world_age = mk_oc_world_age()
-@test isa(h_world_age, Core.OpaqueClosure{Tuple{}, Union{}})
+@test isa(h_world_age, OpaqueClosure{Tuple{}, Union{}})
 test_oc_world_age() = 1
 @test_throws MethodError g_world_age()
 @test_throws MethodError h_world_age()
 @test mk_oc_world_age()() == 1
 g_world_age = @opaque ()->test_oc_world_age()
 @test g_world_age() == 1
-@test isa(mk_oc_world_age(), Core.OpaqueClosure{Tuple{}, Int})
+@test isa(mk_oc_world_age(), OpaqueClosure{Tuple{}, Int})
+
+end # module test_world_age
 
 function maybe_vararg(isva::Bool)
     T = isva ? Vararg{Int} : Int
@@ -170,33 +178,28 @@ mk_va_opaque() = @opaque (x...)->x
 @test mk_va_opaque()(1,2) == (1,2)
 
 # OpaqueClosure show method
-@test repr(@opaque x->1) == "(::Any)::Any->◌"
+@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)::Any->◌"
 
 # Opaque closure in CodeInfo returned from generated functions
-function mk_ocg(args...)
-    ci = @code_lowered const_int()
-    cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
-        Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
-    cig.slotnames = Symbol[Symbol("#self#")]
-    cig.slottypes = Any[Any]
-    cig.slotflags = UInt8[0x00]
-    cig
+let ci = @code_lowered const_int()
+    global function mk_ocg(world::UInt, source, args...)
+        @nospecialize
+        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
+        cig.slotnames = Symbol[Symbol("#self#")]
+        cig.slottypes = Any[Any]
+        cig.slotflags = UInt8[0x00]
+        @assert cig.min_world == UInt(1)
+        @assert cig.max_world == typemax(UInt)
+        return cig
+    end
 end
 
 @eval function oc_trivial_generated()
     $(Expr(:meta, :generated_only))
-    $(Expr(:meta,
-            :generated,
-            Expr(:new,
-                Core.GeneratedFunctionStub,
-                :mk_ocg,
-                Any[:oc_trivial_generated],
-                Any[],
-                @__LINE__,
-                QuoteNode(Symbol(@__FILE__)),
-                true)))
+    $(Expr(:meta, :generated, mk_ocg))
 end
-@test isa(oc_trivial_generated(), Core.OpaqueClosure{Tuple{}, Any})
+@test isa(oc_trivial_generated(), OpaqueClosure{Tuple{}, Any})
 @test oc_trivial_generated()() == 1
 
 # Constprop through varargs OpaqueClosure
@@ -242,31 +245,55 @@ let oc = @opaque a->sin(a)
 end
 
 # constructing an opaque closure from IRCode
-let ci = code_typed(+, (Int, Int))[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
-    @test OpaqueClosure(ir; nargs=2, isva=false)(40, 2) == 42
-    @test OpaqueClosure(ci)(40, 2) == 42
-
-    ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Int])
-    @test OpaqueClosure(ir; nargs=2, isva=false)(40, 2) == 42
-    @test isa(OpaqueClosure(ir; nargs=2, isva=false), Core.OpaqueClosure{Tuple{Int, Int}, Int})
-    @test_throws TypeError OpaqueClosure(ir; nargs=2, isva=false)(40.0, 2)
+let src = first(only(code_typed(+, (Int, Int))))
+    ir = Core.Compiler.inflate_ir(src)
+    @test OpaqueClosure(src)(40, 2) == 42
+    oc = OpaqueClosure(ir)
+    @test oc(40, 2) == 42
+    @test isa(oc, OpaqueClosure{Tuple{Int,Int}, Int})
+    @test_throws TypeError oc("40", 2)
+    @test OpaqueClosure(ir)(40, 2) == 42 # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+end
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    @test OpaqueClosure(ir)(42) == sin(42)
+    @test OpaqueClosure(ir)(42) == sin(42) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+    ir = first(only(Base.code_ircode(sin, (Float64,))))
+    @test OpaqueClosure(ir)(42.) == sin(42.)
+    @test OpaqueClosure(ir)(42.) == sin(42.) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
 end
 
-let ci = code_typed((x, y...)->(x, y), (Int, Int))[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
-    let oc = OpaqueClosure(ir; nargs=2, isva=true)
-        @test oc(40, 2) === (40, (2,))
+# variadic arguments
+let src = code_typed((Int,Int)) do x, y...
+        return (x, y)
+    end |> only |> first
+    let oc = OpaqueClosure(src)
+        @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
     end
-    let oc = OpaqueClosure(ci)
-        @test oc(40, 2) === (40, (2,))
+    ir = Core.Compiler.inflate_ir(src)
+    let oc = OpaqueClosure(ir; isva=true)
+        @test oc(1,2) === (1,(2,))
         @test_throws MethodError oc(1,2,3)
     end
+end
 
-    ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Tuple{Int}])
-    let oc = OpaqueClosure(ir; nargs=2, isva=true)
-        @test oc(40, 2) === (40, (2,))
-        @test_throws MethodError oc(1,2,3)
+# Check for correct handling in case of broken return type.
+eval_oc_dyn(oc) = Base.inferencebarrier(oc)()
+eval_oc_spec(oc) = oc()
+for f in (const_int, const_int_barrier)
+    ci = code_lowered(f, Tuple{})[1]
+    for compiled in (true, false)
+        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
+        oc_mismatch = let ci = code_lowered(f, Tuple{})[1]
+            if compiled
+                eval(:((()->$oc_expr)()))
+            else
+                eval(oc_expr)
+            end
+        end
+        @test isa(oc_mismatch, OpaqueClosure{Tuple{}, Union{}})
+        @test_throws TypeError eval_oc_dyn(oc_mismatch)
+        @test_throws TypeError eval_oc_spec(oc_mismatch)
     end
 end
diff --git a/test/operators.jl b/test/operators.jl
index 6a93f70cc21f0..46cf6c7526299 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -93,6 +93,23 @@ end
 
 @test isless('a','b')
 
+@testset "isless on pairs of integers (because there is a fastpath)" begin
+    @test isless((1,2), (1,3))
+    @test isless((0,-2), (0,2))
+    @test isless((-1,2), (1,2))
+    @test isless((-1,-2), (1,2))
+    @test !isless((1,3), (1,2))
+    @test !isless((0,2), (0,-2))
+    @test !isless((1,2), (-1,2))
+    @test !isless((1,2), (-1,-2))
+    @test !isless((-1,-2), (-1,-2))
+
+    @test isless((typemin(Int), typemin(Int)), (0,0))
+    @test isless((1, 1), (Int8(2), Int8(2)))
+    @test !isless((UInt8(200),Int8(-1)), (UInt8(200),Int8(-1)))
+    @test isless((1, 1), (1, unsigned(2)))
+end
+
 @testset "isgreater" begin
     # isgreater should be compatible with min.
     min1(a, b) = Base.isgreater(a, b) ? b : a
diff --git a/test/osutils.jl b/test/osutils.jl
index 36f2878017129..5e72675279cbc 100644
--- a/test/osutils.jl
+++ b/test/osutils.jl
@@ -51,7 +51,7 @@ end
 if Sys.iswindows()
     @testset "path variables use correct path delimiters on windows" begin
         for path in (Base.SYSCONFDIR, Base.DATAROOTDIR, Base.DOCDIR,
-                     Base.LIBDIR, Base.PRIVATE_LIBDIR, Base.INCLUDEDIR, Base.LIBEXECDIR)
+                     Base.LIBDIR, Base.PRIVATE_LIBDIR, Base.INCLUDEDIR, Base.LIBEXECDIR, Base.PRIVATE_LIBEXECDIR)
             @test !occursin("/", path)
             @test !occursin("\\\\", path)
         end
diff --git a/test/path.jl b/test/path.jl
index 4a4caa6b0b115..2f4f2d0983a58 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -171,6 +171,9 @@
         @test string(splitdrive(S(homedir()))...) == homedir()
         @test splitdrive("a\nb") == ("", "a\nb")
 
+        @test splitdir("a/\xfe/\n/b/c.ext") == ("a/\xfe/\n/b", "c.ext")
+        @test splitext("a/\xfe/\n/b/c.ext") == ("a/\xfe/\n/b/c", ".ext")
+
         if Sys.iswindows()
             @test splitdrive(S("\\\\servername\\hello.world\\filename.ext")) ==
                 ("\\\\servername\\hello.world","\\filename.ext")
diff --git a/test/precompile.jl b/test/precompile.jl
index 0febfecb78b69..606ee1087e51e 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -395,11 +395,11 @@ precompile_test_harness(false) do dir
                      Base.PkgId(m) => Base.module_build_id(m)
                  end for s in
                 [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates,
-                 :Distributed, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll,
-                 :LazyArtifacts, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra,
+                 :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll,
+                 :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra,
                  :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf,
-                 :Profile, :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :SharedArrays, :Sockets,
-                 :SparseArrays, :TOML, :Tar, :Test, :UUIDs, :Unicode,
+                 :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :Sockets,
+                 :TOML, :Tar, :Test, :UUIDs, :Unicode,
                  :nghttp2_jll]
             ),
         )
@@ -662,13 +662,13 @@ precompile_test_harness("code caching") do dir
         @test all(i -> root_provenance(m, i) == Mid, 1:length(m.roots))
     end
     # Check that we can cache external CodeInstances:
-    # size(::Vector) has an inferred specialization for Vector{X}
-    msize = which(size, (Vector{<:Any},))
+    # length(::Vector) has an inferred specialization for `Vector{X}`
+    msize = which(length, (Vector{<:Any},))
     hasspec = false
-    for i = 1:length(msize.specializations)
-        mi = msize.specializations[i]
-        if isa(mi, Core.MethodInstance) && mi.specTypes == Tuple{typeof(size),Vector{Cacheb8321416e8a3e2f1.X}}
-            if isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) && mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing
+    for mi in Base.specializations(msize)
+        if mi.specTypes == Tuple{typeof(length),Vector{Cacheb8321416e8a3e2f1.X}}
+            if (isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) &&
+                mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing)
                 hasspec = true
                 break
             end
@@ -689,7 +689,7 @@ precompile_test_harness("code caching") do dir
     @test !isempty(groups[Bid])
     # Check that internal methods and their roots are accounted appropriately
     minternal = which(M.getelsize, (Vector,))
-    mi = minternal.specializations[1]
+    mi = minternal.specializations::Core.MethodInstance
     @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
     ci = mi.cache
     @test ci.relocatability == 1
@@ -698,7 +698,9 @@ precompile_test_harness("code caching") do dir
     Base.invokelatest() do
         M.getelsize(M.X2[])
     end
-    mi = minternal.specializations[2]
+    mispecs = minternal.specializations::Core.SimpleVector
+    @test mispecs[1] === mi
+    mi = mispecs[2]::Core.MethodInstance
     ci = mi.cache
     @test ci.relocatability == 0
     # PkgA loads PkgB, and both add roots to the same `push!` method (both before and after loading B)
@@ -783,8 +785,9 @@ precompile_test_harness("code caching") do dir
     MB = getfield(@__MODULE__, RootB)
     M = getfield(MA, RootModule)
     m = which(M.f, (Any,))
-    for mi in m.specializations
+    for mi in Base.specializations(m)
         mi === nothing && continue
+        mi = mi::Core.MethodInstance
         if mi.specTypes.parameters[2] === Int8
             # external callers
             mods = Module[]
@@ -854,9 +857,13 @@ precompile_test_harness("code caching") do dir
 
         # This will be invalidated if StaleC is loaded
         useA() = $StaleA.stale("hello")
+        useA2() = useA()
 
         # force precompilation
-        useA()
+        begin
+            Base.Experimental.@force_compile
+            useA2()
+        end
 
         ## Reporting tests
         call_nbits(x::Integer) = $StaleA.nbits(x)
@@ -894,12 +901,13 @@ precompile_test_harness("code caching") do dir
     MC = getfield(@__MODULE__, StaleC)
     world = Base.get_world_counter()
     m = only(methods(MA.use_stale))
-    mi = m.specializations[1]
+    mi = m.specializations::Core.MethodInstance
     @test hasvalid(mi, world)   # it was re-inferred by StaleC
     m = only(methods(MA.build_stale))
-    mis = filter(!isnothing, collect(m.specializations))
+    mis = filter(!isnothing, collect(m.specializations::Core.SimpleVector))
     @test length(mis) == 2
     for mi in mis
+        mi = mi::Core.MethodInstance
         if mi.specTypes.parameters[2] == Int
             @test mi.cache.max_world < world
         else
@@ -909,17 +917,16 @@ precompile_test_harness("code caching") do dir
         end
     end
     m = only(methods(MB.useA))
-    mi = m.specializations[1]
+    mi = m.specializations::Core.MethodInstance
     @test !hasvalid(mi, world)      # invalidated by the stale(x::String) method in StaleC
     m = only(methods(MC.call_buildstale))
-    mi = m.specializations[1]
+    mi = m.specializations::Core.MethodInstance
     @test hasvalid(mi, world)       # was compiled with the new method
 
-    # Reporting test
+    # Reporting test (ensure SnoopCompile works)
     @test all(i -> isassigned(invalidations, i), eachindex(invalidations))
     m = only(methods(MB.call_nbits))
-    for mi in m.specializations
-        mi === nothing && continue
+    for mi in Base.specializations(m)
         hv = hasvalid(mi, world)
         @test mi.specTypes.parameters[end] === Integer ? !hv : hv
     end
@@ -937,9 +944,13 @@ precompile_test_harness("code caching") do dir
     @test invalidations[j-1] == "insert_backedges_callee"
     @test isa(invalidations[j-2], Type)
     @test isa(invalidations[j+1], Vector{Any}) # [nbits(::UInt8)]
+    m = only(methods(MB.useA2))
+    mi = only(Base.specializations(m))
+    @test !hasvalid(mi, world)
+    @test mi ∈ invalidations
 
     m = only(methods(MB.map_nbits))
-    @test !hasvalid(m.specializations[1], world+1) # insert_backedges invalidations also trigger their backedges
+    @test !hasvalid(m.specializations::Core.MethodInstance, world+1) # insert_backedges invalidations also trigger their backedges
 end
 
 precompile_test_harness("invoke") do dir
@@ -1065,7 +1076,7 @@ precompile_test_harness("invoke") do dir
 
     for func in (M.f, M.g, M.internal, M.fnc, M.gnc, M.internalnc)
         m = get_method_for_type(func, Real)
-        mi = m.specializations[1]
+        mi = m.specializations::Core.MethodInstance
         @test length(mi.backedges) == 2
         @test mi.backedges[1] === Tuple{typeof(func), Real}
         @test isa(mi.backedges[2], Core.MethodInstance)
@@ -1073,7 +1084,7 @@ precompile_test_harness("invoke") do dir
     end
     for func in (M.q, M.qnc)
         m = get_method_for_type(func, Integer)
-        mi = m.specializations[1]
+        mi = m.specializations::Core.MethodInstance
         @test length(mi.backedges) == 2
         @test mi.backedges[1] === Tuple{typeof(func), Integer}
         @test isa(mi.backedges[2], Core.MethodInstance)
@@ -1081,31 +1092,31 @@ precompile_test_harness("invoke") do dir
     end
 
     m = get_method_for_type(M.h, Real)
-    @test isempty(m.specializations)
+    @test isempty(Base.specializations(m))
     m = get_method_for_type(M.hnc, Real)
-    @test isempty(m.specializations)
+    @test isempty(Base.specializations(m))
     m = only(methods(M.callq))
-    @test isempty(m.specializations) || nvalid(m.specializations[1]) == 0
+    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
     m = only(methods(M.callqnc))
-    @test isempty(m.specializations) || nvalid(m.specializations[1]) == 0
+    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
     m = only(methods(M.callqi))
-    @test m.specializations[1].specTypes == Tuple{typeof(M.callqi), Int}
+    @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqi), Int}
     m = only(methods(M.callqnci))
-    @test m.specializations[1].specTypes == Tuple{typeof(M.callqnci), Int}
+    @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqnci), Int}
 
     m = only(methods(M.g44320))
-    @test m.specializations[1].cache.max_world == typemax(UInt)
+    @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
 
     m = which(MI.getlast, (Any,))
-    @test m.specializations[1].cache.max_world == typemax(UInt)
+    @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
 
     # Precompile specific methods for arbitrary arg types
     invokeme(x) = 1
     invokeme(::Int) = 2
     m_any, m_int = sort(collect(methods(invokeme)); by=m->(m.file,m.line))
     @test precompile(invokeme, (Int,), m_any)
-    @test m_any.specializations[1].specTypes === Tuple{typeof(invokeme), Int}
-    @test isempty(m_int.specializations)
+    @test (m_any.specializations::Core.MethodInstance).specTypes === Tuple{typeof(invokeme), Int}
+    @test isempty(Base.specializations(m_int))
 end
 
 # test --compiled-modules=no command line option
@@ -1510,10 +1521,10 @@ precompile_test_harness("No external edges") do load_path
     Base.compilecache(Base.PkgId("NoExternalEdges"))
     @eval begin
         using NoExternalEdges
-        @test only(methods(NoExternalEdges.foo1)).specializations[1].cache.max_world != 0
-        @test only(methods(NoExternalEdges.foo2)).specializations[1].cache.max_world != 0
-        @test only(methods(NoExternalEdges.foo3)).specializations[1].cache.max_world != 0
-        @test only(methods(NoExternalEdges.foo4)).specializations[1].cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo1)).specializations::Core.MethodInstance).cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo2)).specializations::Core.MethodInstance).cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo3)).specializations::Core.MethodInstance).cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo4)).specializations::Core.MethodInstance).cache.max_world != 0
     end
 end
 
@@ -1527,7 +1538,7 @@ end
     @test precompile(M.f, (Int, Any))
     @test precompile(M.f, (AbstractFloat, Any))
     mis = map(methods(M.f)) do m
-        m.specializations[1]
+        m.specializations::Core.MethodInstance
     end
     @test any(mi -> mi.specTypes.parameters[2] === Any, mis)
     @test all(mi -> isa(mi.cache, Core.CodeInstance), mis)
@@ -1574,7 +1585,7 @@ precompile_test_harness("issue #46296") do load_path
         """
         module CodeInstancePrecompile
 
-        mi = first(methods(identity)).specializations[1]
+        mi = first(Base.specializations(first(methods(identity))))
         ci = Core.CodeInstance(mi, Any, nothing, nothing, zero(Int32), typemin(UInt),
                                typemax(UInt), zero(UInt32), zero(UInt32), nothing, 0x00)
 
@@ -1608,7 +1619,7 @@ end
     f46778(::Any, ::Type{Int}) = 1
     f46778(::Any, ::DataType) = 2
     @test precompile(Tuple{typeof(f46778), Int, DataType})
-    @test which(f46778, Tuple{Any,DataType}).specializations[1].cache.invoke != C_NULL
+    @test (which(f46778, Tuple{Any,DataType}).specializations::Core.MethodInstance).cache.invoke != C_NULL
 end
 
 
@@ -1669,9 +1680,9 @@ precompile_test_harness("PkgCacheInspector") do load_path
     end
 
     if ocachefile !== nothing
-        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint), ocachefile, depmods, true)
+        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring), ocachefile, depmods, true, "PCI")
     else
-        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), cachefile, depmods, true)
+        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), cachefile, depmods, true, "PCI")
     end
 
     modules, init_order, external_methods, new_specializations, new_method_roots, external_targets, edges = sv
@@ -1686,64 +1697,73 @@ end
 precompile_test_harness("DynamicExpressions") do load_path
     # https://github.com/JuliaLang/julia/pull/47184#issuecomment-1364716312
     write(joinpath(load_path, "Float16MWE.jl"),
-    """
-    module Float16MWE
-    struct Node{T}
-        val::T
-    end
-    doconvert(::Type{<:Node}, val) = convert(Float16, val)
-    precompile(Tuple{typeof(doconvert), Type{Node{Float16}}, Float64})
-    end # module Float16MWE
-    """)
+        """
+        module Float16MWE
+        struct Node{T}
+            val::T
+        end
+        doconvert(::Type{<:Node}, val) = convert(Float16, val)
+        precompile(Tuple{typeof(doconvert), Type{Node{Float16}}, Float64})
+        end # module Float16MWE
+        """)
     Base.compilecache(Base.PkgId("Float16MWE"))
-    (@eval (using Float16MWE))
-    Base.invokelatest() do
-        @test Float16MWE.doconvert(Float16MWE.Node{Float16}, -1.2) === Float16(-1.2)
-    end
+    @eval using Float16MWE
+    @test @invokelatest(Float16MWE.doconvert(Float16MWE.Node{Float16}, -1.2)) === Float16(-1.2)
 end
 
 precompile_test_harness("BadInvalidations") do load_path
     write(joinpath(load_path, "BadInvalidations.jl"),
-    """
-    module BadInvalidations
+        """
+        module BadInvalidations
         Base.Experimental.@compiler_options compile=min optimize=1
         getval() = Base.a_method_to_overwrite_in_test()
         getval()
-    end # module BadInvalidations
-    """)
+        end # module BadInvalidations
+        """)
     Base.compilecache(Base.PkgId("BadInvalidations"))
-    (@eval Base a_method_to_overwrite_in_test() = inferencebarrier(2))
-    (@eval (using BadInvalidations))
-    Base.invokelatest() do
-        @test BadInvalidations.getval() === 2
-    end
+    @eval Base a_method_to_overwrite_in_test() = inferencebarrier(2)
+    @eval using BadInvalidations
+    @test Base.invokelatest(BadInvalidations.getval) === 2
 end
 
 # https://github.com/JuliaLang/julia/issues/48074
 precompile_test_harness("WindowsCacheOverwrite") do load_path
     # https://github.com/JuliaLang/julia/pull/47184#issuecomment-1364716312
     write(joinpath(load_path, "WindowsCacheOverwrite.jl"),
-    """
-    module WindowsCacheOverwrite
-
-    end # module
-    """)
+        """
+        module WindowsCacheOverwrite
+        end # module
+        """)
     ji, ofile = Base.compilecache(Base.PkgId("WindowsCacheOverwrite"))
-    (@eval (using WindowsCacheOverwrite))
+    @eval using WindowsCacheOverwrite
 
     write(joinpath(load_path, "WindowsCacheOverwrite.jl"),
-    """
-    module WindowsCacheOverwrite
-
-    f() = "something new"
-
-    end # module
-    """)
+        """
+        module WindowsCacheOverwrite
+        f() = "something new"
+        end # module
+        """)
 
     ji_2, ofile_2 = Base.compilecache(Base.PkgId("WindowsCacheOverwrite"))
     @test ofile_2 == Base.ocachefile_from_cachefile(ji_2)
 end
 
+precompile_test_harness("Issue #48391") do load_path
+    write(joinpath(load_path, "I48391.jl"),
+        """
+        module I48391
+        struct SurrealFinite <: Real end
+        precompile(Tuple{typeof(Base.isless), SurrealFinite, SurrealFinite})
+        Base.:(<)(x::SurrealFinite, y::SurrealFinite) = "good"
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("I48391"))
+    @eval using I48391
+    x = Base.invokelatest(I48391.SurrealFinite)
+    @test Base.invokelatest(isless, x, x) === "good"
+    @test_throws ErrorException isless(x, x)
+end
+
 empty!(Base.DEPOT_PATH)
 append!(Base.DEPOT_PATH, original_depot_path)
 empty!(Base.LOAD_PATH)
diff --git a/test/project/Extensions/EnvWithDeps/Manifest.toml b/test/project/Extensions/EnvWithDeps/Manifest.toml
new file mode 100644
index 0000000000000..85ff259f0a4d5
--- /dev/null
+++ b/test/project/Extensions/EnvWithDeps/Manifest.toml
@@ -0,0 +1,21 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.9.0-rc3"
+manifest_format = "2.0"
+project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/EnvWithDeps/Project.toml b/test/project/Extensions/EnvWithDeps/Project.toml
new file mode 100644
index 0000000000000..cf020b56fc2e8
--- /dev/null
+++ b/test/project/Extensions/EnvWithDeps/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
new file mode 100644
index 0000000000000..8ac961fa1a9a9
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
@@ -0,0 +1,29 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.9.0-beta4"
+manifest_format = "2.0"
+project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.HasExtensions]]
+path = "../HasExtensions.jl"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.1.0"
+
+    [deps.HasExtensions.extensions]
+    Extension = "ExtDep"
+    ExtensionFolder = ["ExtDep", "ExtDep2"]
+
+    [deps.HasExtensions.weakdeps]
+    ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+    ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/EnvWithHasExtensions/Project.toml b/test/project/Extensions/EnvWithHasExtensions/Project.toml
new file mode 100644
index 0000000000000..8639881ae95c0
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensions/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
new file mode 100644
index 0000000000000..66781a5701363
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
@@ -0,0 +1,25 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.0-DEV"
+manifest_format = "2.0"
+project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.HasExtensions]]
+path = "../HasExtensions_v2.jl"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.2.0"
+weakdeps = ["ExtDep"]
+
+    [deps.HasExtensions.extensions]
+    Extension2 = "ExtDep"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/EnvWithHasExtensionsv2/Project.toml b/test/project/Extensions/EnvWithHasExtensionsv2/Project.toml
new file mode 100644
index 0000000000000..8639881ae95c0
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensionsv2/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl b/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl
index d64cbc680e3a5..5c1f2d1f301aa 100644
--- a/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl
+++ b/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl
@@ -4,10 +4,18 @@ using HasExtensions: HasExtensions, HasExtensionsStruct
 using ExtDep: ExtDepStruct
 # Loading ExtDep makes the extension "Extension" load
 
+const m = Base.get_extension(HasExtensions, :Extension)
+m isa Module || error("extension not loaded during precompilation")
+
 function do_something()
     HasExtensions.foo(HasExtensionsStruct()) == 1 || error()
     HasExtensions.foo(ExtDepStruct()) == 2 || error()
     return true
 end
 
+function __init__()
+    m = Base.get_extension(HasExtensions, :Extension)
+    m isa Module || error("extension not loaded during __init__")
+end
+
 end # module
diff --git a/test/project/Extensions/HasExtensions_v2.jl/Project.toml b/test/project/Extensions/HasExtensions_v2.jl/Project.toml
new file mode 100644
index 0000000000000..5d92a4b138058
--- /dev/null
+++ b/test/project/Extensions/HasExtensions_v2.jl/Project.toml
@@ -0,0 +1,9 @@
+name = "HasExtensions"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.2.0"
+
+[weakdeps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[extensions]
+Extension2 = "ExtDep"
diff --git a/test/project/Extensions/HasExtensions_v2.jl/ext/Extension2.jl b/test/project/Extensions/HasExtensions_v2.jl/ext/Extension2.jl
new file mode 100644
index 0000000000000..d027adec9c223
--- /dev/null
+++ b/test/project/Extensions/HasExtensions_v2.jl/ext/Extension2.jl
@@ -0,0 +1,3 @@
+module Extension2
+
+end
diff --git a/test/project/Extensions/HasExtensions_v2.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions_v2.jl/src/HasExtensions.jl
new file mode 100644
index 0000000000000..dbfaeec4f8812
--- /dev/null
+++ b/test/project/Extensions/HasExtensions_v2.jl/src/HasExtensions.jl
@@ -0,0 +1,10 @@
+module HasExtensions
+
+struct HasExtensionsStruct end
+
+foo(::HasExtensionsStruct) = 1
+
+ext_loaded = false
+ext_folder_loaded = false
+
+end # module
diff --git a/test/ranges.jl b/test/ranges.jl
index bef600338a61d..ec69c57fc0a8f 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Base.Checked: checked_length
+using InteractiveUtils: code_llvm
 
 @testset "range construction" begin
     @test_throws ArgumentError range(start=1, step=1, stop=2, length=10)
@@ -254,6 +255,45 @@ end
     @test x.hi/2 === PhysQuantity{1}(2.0)
     @test_throws ErrorException("Int is incommensurate with PhysQuantity") x/2
     @test zero(typeof(x)) === Base.TwicePrecision(PhysQuantity{1}(0.0))
+
+    function twiceprecision_roundtrip_is_not_lossy(
+        ::Type{S},
+        x::T,
+    ) where {S<:Number, T<:Union{Number,Base.TwicePrecision}}
+        tw = Base.TwicePrecision{S}(x)
+        @test x == T(tw)
+    end
+
+    function twiceprecision_is_normalized(tw::Tw) where {Tw<:Base.TwicePrecision}
+        (hi, lo) = (tw.hi, tw.lo)
+        normalized = Tw(Base.canonicalize2(hi, lo)...)
+        @test (abs(lo) ≤ abs(hi)) & (tw == normalized)
+    end
+
+    rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T)))
+
+    rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo)
+
+    # For this test the `BigFloat` mantissa needs to be just a bit
+    # larger than the `Float64` mantissa
+    setprecision(BigFloat, 70) do
+        n = 10
+        @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n
+            rand_twiceprecision_is_ok(T)
+        end
+        @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n
+            twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat))
+        end
+        @testset "twiceprecision roundtrip is not lossy 2" for i ∈ 1:n
+            twiceprecision_roundtrip_is_not_lossy(Float64, rand_twiceprecision(Float32))
+        end
+        @testset "twiceprecision normalization 1: Float64 to Float32" for i ∈ 1:n
+            twiceprecision_is_normalized(Base.TwicePrecision{Float32}(rand_twiceprecision(Float64)))
+        end
+        @testset "twiceprecision normalization 2: Float32 to Float64" for i ∈ 1:n
+            twiceprecision_is_normalized(Base.TwicePrecision{Float64}(rand_twiceprecision(Float32)))
+        end
+    end
 end
 @testset "ranges" begin
     @test size(10:1:0) == (0,)
@@ -887,7 +927,15 @@ function range_fuzztests(::Type{T}, niter, nrange) where {T}
         @test m == length(r)
         @test strt == first(r)
         @test Δ == step(r)
-        @test stop ≈ last(r) atol = eps((n-1)*Δ) + eps(stop) # account for intermediate rounding in computation of stop
+        # potential floating point error:
+        #   stop = strt + (n-1)*Δ
+        #      *          error <= eps((n-1)*Δ)/2 <= abs((n-1)*Δ)/2 * eps(T)
+        #      +          error <= eps(stop)/2    <= abs(stop)/2    * eps(T)
+        #   last(r)
+        #     rat(strt)   error <= eps(strt)/2    <= abs(strt)/2    * eps(T)
+        #     rat(Δ)      error <= (n-1)*eps(Δ)/2 <= abs((n-1)*Δ)/2 * eps(T)
+        #     T(...)      error <= eps(last(r))/2 <= abs(stop)/2    * eps(T)
+        @test stop ≈ last(r) atol = (abs(strt)/2 + (n-1)*abs(Δ) + abs(stop)) * eps(T)
         l = range(strt, stop=stop, length=n)
         @test n == length(l)
         @test strt == first(l)
@@ -2392,3 +2440,40 @@ end
     @test test_firstindex(StepRange{Union{Int64,Int128},Int}(Int64(1), 1, Int128(1)))
     @test test_firstindex(StepRange{Union{Int64,Int128},Int}(Int64(1), 1, Int128(0)))
 end
+
+@testset "PR 49516" begin
+    struct PR49516 <: Signed
+        n::Int
+    end
+    PR49516(f::PR49516) = f
+    Base.:*(x::Integer, f::PR49516) = PR49516(*(x, f.n))
+    Base.:+(f1::PR49516, f2::PR49516) = PR49516(+(f1.n, f2.n))
+    Base.show(io::IO, f::PR49516) = print(io, "PR49516(", f.n, ")")
+
+    srl = StepRangeLen(PR49516(1), PR49516(2), 10)
+    @test sprint(show, srl) == "PR49516(1):PR49516(2):PR49516(19)"
+end
+
+@testset "Inline StepRange Construction #49270" begin
+    x = rand(Float32, 80)
+    a = rand(round(Int, length(x) / 2):length(x), 10^6)
+
+    function test(x, a)
+        c = zero(Float32)
+
+        @inbounds for j in a
+            for i in 1:8:j
+                c += x[i]
+            end
+        end
+
+        return c
+    end
+
+    llvm_ir(f, args) = sprint((io, args...) -> code_llvm(io, args...; debuginfo=:none), f, Base.typesof(args...))
+
+    ir = llvm_ir(test, (x, a))
+    @test !occursin("steprange_last", ir)
+    @test !occursin("_colon", ir)
+    @test !occursin("StepRange", ir)
+end
diff --git a/test/rational.jl b/test/rational.jl
index 9f47f2cb9dd16..a1af6eda64516 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -253,6 +253,10 @@ end
     rational2 = Rational(-4500, 9000)
     @test sprint(show, rational1) == "1465//8593"
     @test sprint(show, rational2) == "-1//2"
+    @test sprint(show, -2//2) == "-1//1"
+    @test sprint(show, [-2//2,]) == "Rational{$Int}[-1]"
+    @test sprint(show, MIME"text/plain"(), Union{Int, Rational{Int}}[7 3//6; 6//3 2]) ==
+        "2×2 Matrix{Union{Rational{$Int}, $Int}}:\n  7    1//2\n 2//1   2"
     let
         io1 = IOBuffer()
         write(io1, rational1)
@@ -265,6 +269,9 @@ end
         @test read(io2, typeof(rational2)) == rational2
     end
 end
+@testset "abs overflow for Rational" begin
+    @test_throws OverflowError abs(typemin(Int) // 1)
+end
 @testset "parse" begin
     # Non-negative Int in which parsing is expected to work
     @test parse(Rational{Int}, string(10)) == 10 // 1
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 5402376744e82..daa0a3fbe1f92 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -6,7 +6,16 @@ using Random
 
 # issue #35800
 # tested very early since it can be state-dependent
-@test @inferred(mapreduce(x->count(!iszero,x), +, [rand(1)]; init = 0.)) == 1.0
+
+function my_simple_count(pred, g::Vector{T}) where {T}
+    n::T = zero(T)
+    for x in g
+        n += pred(x)
+    end
+    return n
+end
+
+@test @inferred(mapreduce(x->my_simple_count(!iszero,x), +, [rand(1)]; init = 0.)) == 1.0
 
 function safe_mapslices(op, A, region)
     newregion = intersect(region, 1:ndims(A))
diff --git a/test/reflection.jl b/test/reflection.jl
index 1f49cd7d0be02..0ae8cb3f9d393 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -84,7 +84,6 @@ end # module ReflectionTest
 @test isconcretetype(DataType)
 @test isconcretetype(Union)
 @test !isconcretetype(Union{})
-@test isconcretetype(Tuple{Union{}})
 @test !isconcretetype(Complex)
 @test !isconcretetype(Complex.body)
 @test !isconcretetype(AbstractArray{Int,1})
@@ -532,7 +531,7 @@ let
     ft = typeof(f18888)
 
     code_typed(f18888, Tuple{}; optimize=false)
-    @test !isempty(m.specializations) # uncached, but creates the specializations entry
+    @test m.specializations !== Core.svec() # uncached, but creates the specializations entry
     mi = Core.Compiler.specialize_method(m, Tuple{ft}, Core.svec())
     interp = Core.Compiler.NativeInterpreter(world)
     @test !Core.Compiler.haskey(Core.Compiler.code_cache(interp), mi)
@@ -548,7 +547,7 @@ let
 end
 
 # code_typed_by_type
-@test Base.code_typed_by_type(Tuple{Type{<:Val}})[1][2] == Val
+@test Base.code_typed_by_type(Tuple{Type{<:Val}})[2][2] == Val
 @test Base.code_typed_by_type(Tuple{typeof(sin), Float64})[1][2] === Float64
 
 # New reflection methods in 0.6
@@ -648,7 +647,7 @@ let
     world = Core.Compiler.get_world_counter()
     match = Base._methods_by_ftype(T22979, -1, world)[1]
     instance = Core.Compiler.specialize_method(match)
-    cinfo_generated = Core.Compiler.get_staged(instance)
+    cinfo_generated = Core.Compiler.get_staged(instance, world)
     @test_throws ErrorException Base.uncompressed_ir(match.method)
 
     test_similar_codeinfo(code_lowered(f22979, typeof(x22979))[1], cinfo_generated)
@@ -726,6 +725,31 @@ Base.delete_method(m)
 @test faz4(1) == 1
 @test faz4(1.0) == 1
 
+# Deletion & invoke (issue #48802)
+function f48802!(log, x::Integer)
+    log[] = "default"
+    return x + 1
+end
+function addmethod_48802()
+    @eval function f48802!(log, x::Int)
+        ret = invoke(f48802!, Tuple{Any, Integer}, log, x)
+        log[] = "specialized"
+        return ret
+    end
+end
+log = Ref{String}()
+@test f48802!(log, 1) == 2
+@test log[] == "default"
+addmethod_48802()
+@test f48802!(log, 1) == 2
+@test log[] == "specialized"
+Base.delete_method(which(f48802!, Tuple{Any, Int}))
+@test f48802!(log, 1) == 2
+@test log[] == "default"
+addmethod_48802()
+@test f48802!(log, 1) == 2
+@test log[] == "specialized"
+
 # Methods with keyword arguments
 fookw(x; direction=:up) = direction
 fookw(y::Int) = 2
@@ -1007,10 +1031,30 @@ ambig_effects_test(a, b) = 1
     @test Base.infer_effects(ambig_effects_test, (Int,Int)) |> !Core.Compiler.is_nothrow # ambiguity error
     @test Base.infer_effects(ambig_effects_test, (Int,Any)) |> !Core.Compiler.is_nothrow # ambiguity error
     # builtins
-    @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_total
-    @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_total
+    @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_foldable_nothrow
+    @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_foldable_nothrow
     @test (Base.infer_effects(setfield!, ()); true) # `builtin_effects` shouldn't throw on empty `argtypes`
     @test (Base.infer_effects(Core.Intrinsics.arraylen, ()); true) # `intrinsic_effects` shouldn't throw on empty `argtypes`
 end
 
 @test Base._methods_by_ftype(Tuple{}, -1, Base.get_world_counter()) == Any[]
+@test length(methods(Base.Broadcast.broadcasted, Tuple{Any, Any, Vararg})) >
+      length(methods(Base.Broadcast.broadcasted, Tuple{Base.Broadcast.BroadcastStyle, Any, Vararg})) >=
+      length(methods(Base.Broadcast.broadcasted, Tuple{Base.Broadcast.DefaultArrayStyle{1}, Any, Vararg})) >=
+      10
+
+@testset "specializations" begin
+    f(x) = 1
+    f(1)
+    f("hello")
+    @test length(Base.specializations(only(methods(f)))) == 2
+end
+
+# https://github.com/JuliaLang/julia/issues/48856
+@test !Base.ismutationfree(Vector{Any})
+@test !Base.ismutationfree(Vector{Symbol})
+@test !Base.ismutationfree(Vector{UInt8})
+@test !Base.ismutationfree(Vector{Int32})
+@test !Base.ismutationfree(Vector{UInt64})
+
+@test Base.ismutationfree(Type{Union{}})
diff --git a/test/regex.jl b/test/regex.jl
index 70f620cad7141..e5f1428527512 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -59,6 +59,11 @@
     @test repr(r"\\\"") == raw"r\"\\\\\\\"\""
     @test repr(s"\\\"\\") == raw"s\"\\\\\\\"\\\\\""
 
+    @test repr(r""a) == "r\"\"a"
+    @test repr(r""imsxa) == "r\"\"imsxa"
+    @test repr(Regex("", Base.DEFAULT_COMPILER_OPTS, UInt32(0))) == """Regex("", $(repr(Base.DEFAULT_COMPILER_OPTS)), $(repr(UInt32(0))))"""
+    @test repr(Regex("", UInt32(0), Base.DEFAULT_MATCH_OPTS)) == """Regex("", $(repr(UInt32(0))), $(repr(Base.DEFAULT_MATCH_OPTS)))"""
+
     # findall
     @test findall(r"\w+", "foo bar") == [1:3, 5:7]
     @test findall(r"\w+", "foo bar", overlap=true) == [1:3, 2:3, 3:3, 5:7, 6:7, 7:7]
@@ -122,18 +127,24 @@
 
     # Backcapture reference in substitution string
     @test replace("abcde", r"(..)(?P<byname>d)" => s"\g<byname>xy\\\1") == "adxy\\bce"
-    @test_throws ErrorException replace("a", r"(?P<x>)" => s"\g<y>")
+    @test_throws(ErrorException("Bad replacement string: Group y not found in regex r\"(?P<x>)\""),
+        replace("a", r"(?P<x>)" => s"\g<y>"))
     # test replace with invalid substitution group pattern
-    @test_throws ErrorException replace("s", r"(?<g1>.)" => s"\gg1>")
+    @test_throws(ErrorException("Bad replacement string: \\gg1>"),
+        replace("s", r"(?<g1>.)" => s"\gg1>"))
     # test replace with 2-digit substitution group
     @test replace(("0" ^ 9) * "1", Regex(("(0)" ^ 9) * "(1)") => s"10th group: \10") == "10th group: 1"
 
     # Proper unicode handling
     @test  match(r"∀∀", "∀x∀∀∀").match == "∀∀"
 
-    # 'a' flag to disable UCP
+    # 'a' flag to disable UCP and UTF
     @test match(r"\w+", "Düsseldorf").match == "Düsseldorf"
     @test match(r"\w+"a, "Düsseldorf").match == "D"
+    @test match(r".+"a, "Düsseldorf").match == "Düsseldorf"
+    @test match(r".+"a, "Dü\xefsseldorf").match == "Dü\xefsseldorf"
+    @test_throws(ErrorException("PCRE.exec error: $(Base.PCRE.err_message(Base.PCRE.ERROR_UTF8_ERR6))"),
+        match(r"(*UTF).+"a, "Dü\xefsseldorf"))
 
     # Regex behaves like a scalar in broadcasting
     @test occursin.(r"Hello", ["Hello", "World"]) == [true, false]
@@ -211,8 +222,7 @@
     end
 
     # Test that PCRE throws the correct kind of error
-    # TODO: Uncomment this once the corresponding change has propagated to CI
-    #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
+    @test_throws ErrorException("PCRE error: NULL regex object") Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
 
     # test that we can get the error message of negative error codes
     @test Base.PCRE.err_message(Base.PCRE.ERROR_NOMEMORY) isa String
diff --git a/test/runtests.jl b/test/runtests.jl
index 91f3a67490315..16f60ddcf6764 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -75,6 +75,7 @@ move_to_node1("precompile")
 move_to_node1("SharedArrays")
 move_to_node1("threads")
 move_to_node1("Distributed")
+move_to_node1("gc")
 # Ensure things like consuming all kernel pipe memory doesn't interfere with other tests
 move_to_node1("stress")
 
diff --git a/test/show.jl b/test/show.jl
index 5e5583135915b..76772c649a838 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -268,7 +268,6 @@ end
 @test repr(Expr(:import, :Foo)) == ":(\$(Expr(:import, :Foo)))"
 @test repr(Expr(:import, Expr(:(.), ))) == ":(\$(Expr(:import, :(\$(Expr(:.))))))"
 
-
 @test repr(Expr(:using, Expr(:(.), :A))) == ":(using A)"
 @test repr(Expr(:using, Expr(:(.), :A),
                         Expr(:(.), :B))) == ":(using A, B)"
@@ -286,6 +285,10 @@ end
 @test repr(Expr(:import, Expr(:(.), :A, :B),
                          Expr(:(.), :C, :D))) == ":(import A.B, C.D)"
 
+# https://github.com/JuliaLang/julia/issues/49168
+@test repr(:(using A: (..))) == ":(using A: (..))"
+@test repr(:(using A: (..) as twodots)) == ":(using A: (..) as twodots)"
+
 # range syntax
 @test_repr "1:2"
 @test_repr "3:4:5"
@@ -788,6 +791,14 @@ let ms = methods(S45879)
     @test sprint(show, Base.MethodList(Method[], typeof(S45879).name.mt)) isa String
 end
 
+function f49475(a=12.0; b) end
+let ms = methods(f49475)
+    @test length(ms) == 2
+    repr1 = sprint(show, "text/plain", ms[1])
+    repr2 = sprint(show, "text/plain", ms[2])
+    @test occursin("f49475(; ...)", repr1) || occursin("f49475(; ...)", repr2)
+end
+
 if isempty(Base.GIT_VERSION_INFO.commit)
     @test occursin("https://github.com/JuliaLang/julia/tree/v$VERSION/base/special/trig.jl#L", Base.url(which(sin, (Float64,))))
 else
@@ -1348,6 +1359,14 @@ test_repr("(:).a")
 @test repr(Tuple{String, Int64, Int64, Int64}) == "Tuple{String, Int64, Int64, Int64}"
 @test repr(Tuple{String, Int64, Int64, Int64, Int64}) == "Tuple{String, Vararg{Int64, 4}}"
 
+# Test printing of NamedTuples using the macro syntax
+@test repr(@NamedTuple{kw::Int64}) == "@NamedTuple{kw::Int64}"
+@test repr(@NamedTuple{kw::Union{Float64, Int64}, kw2::Int64}) == "@NamedTuple{kw::Union{Float64, Int64}, kw2::Int64}"
+@test repr(@NamedTuple{kw::@NamedTuple{kw2::Int64}}) == "@NamedTuple{kw::@NamedTuple{kw2::Int64}}"
+@test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}"
+@test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}"
+
+
 @testset "issue #42931" begin
     @test repr(NTuple{4, :A}) == "NTuple{4, :A}"
     @test repr(NTuple{3, :A}) == "Tuple{:A, :A, :A}"
@@ -1827,8 +1846,8 @@ end
     # issue #27747
     let t = (x = Integer[1, 2],)
         v = [t, t]
-        @test showstr(v) == "NamedTuple{(:x,), Tuple{Vector{Integer}}}[(x = [1, 2],), (x = [1, 2],)]"
-        @test replstr(v) == "2-element Vector{NamedTuple{(:x,), Tuple{Vector{Integer}}}}:\n (x = [1, 2],)\n (x = [1, 2],)"
+        @test showstr(v) == "@NamedTuple{x::Vector{Integer}}[(x = [1, 2],), (x = [1, 2],)]"
+        @test replstr(v) == "2-element Vector{@NamedTuple{x::Vector{Integer}}}:\n (x = [1, 2],)\n (x = [1, 2],)"
     end
 
     # issue #25857
@@ -1867,6 +1886,10 @@ end
     @test replstr((; var"#var#"=1)) == """(var"#var#" = 1,)"""
     @test replstr((; var"a"=1, b=2)) == "(a = 1, b = 2)"
     @test replstr((; a=1, b=2)) == "(a = 1, b = 2)"
+
+    # issue 48828, typeinfo missing for arrays with >2 dimensions
+    @test showstr(Float16[1.0 3.0; 2.0 4.0;;; 5.0 7.0; 6.0 8.0]) ==
+                 "Float16[1.0 3.0; 2.0 4.0;;; 5.0 7.0; 6.0 8.0]"
 end
 
 @testset "#14684: `display` should print associative types in full" begin
@@ -2054,6 +2077,13 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     Base.IRShow.show_ir(io, ir, Base.IRShow.default_config(ir; verbose_linetable=true))
     seekstart(io)
     @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), eachline(io)) == 10
+
+    # Test that a bad :invoke doesn't cause an error during printing
+    Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(Expr(:invoke, nothing, sin), Any), false)
+    io = IOBuffer()
+    Base.IRShow.show_ir(io, ir)
+    seekstart(io)
+    @test contains(String(take!(io)), "Expr(:invoke, nothing")
 end
 
 # Verify that extra instructions at the end of the IR
diff --git a/test/some.jl b/test/some.jl
index 27d50ca354a49..e49fc586a3a6e 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -33,7 +33,7 @@
 @test convert(Union{Int, Nothing}, 1) === 1
 @test convert(Union{Int, Nothing}, 1.0) === 1
 @test convert(Nothing, nothing) === nothing
-@test_throws MethodError convert(Nothing, 1)
+@test_throws ErrorException("cannot convert a value to nothing for assignment") convert(Nothing, 1)
 
 ## show()
 
diff --git a/test/sorting.jl b/test/sorting.jl
index 691f0a0e2bc39..ec1666dabb2fb 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -84,6 +84,8 @@ end
         @test issorted(sort(1:2000, alg=Alg, by=x->0))
         @test issorted(sort(1:2000, alg=Alg, by=x->x÷100))
     end
+    @test sort(1:2000, by=x->x÷100, rev=true) == sort(1:2000, by=x->-x÷100) ==
+        vcat(2000, (x:x+99 for x in 1900:-100:100)..., 1:99)
 end
 
 @testset "partialsort" begin
@@ -560,6 +562,13 @@ end
     end
 end
 
+@testset "Offset with missing (#48862)" begin
+    v = [-1.0, missing, 1.0, 0.0, missing, -0.5, 0.5, 1.0, -0.5, missing, 0.5, -0.8, 1.5, NaN]
+    vo = OffsetArray(v, (firstindex(v):lastindex(v)).+100)
+    @test issorted(sort!(vo))
+    @test issorted(v)
+end
+
 @testset "searchsortedfirst/last with generalized indexing" begin
     o = OffsetVector(1:3, -2)
     @test searchsortedfirst(o, 4) == lastindex(o) + 1
@@ -764,6 +773,18 @@ end
 @testset "Unions with missing" begin
     @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100)))))
     @test issorted(sort(vcat(rand(Int8, 600), [missing])))
+
+    # Because we define defalg(::AbstractArray{Missing})
+    @test all(fill(missing, 10) .=== sort(fill(missing, 10)))
+
+    # Unit tests for WithoutMissingVector
+    a = [1,7,missing,4]
+    @test_throws ArgumentError Base.Sort.WithoutMissingVector(a)
+    @test eltype(a[[1,2,4]]) == eltype(a)
+    @test eltype(Base.Sort.WithoutMissingVector(a[[1,2,4]])) == Int
+    am = Base.Sort.WithoutMissingVector(a, unsafe=true)
+    @test am[2] == 7
+    @test eltype(am) == Int
 end
 
 @testset "Specific algorithms" begin
@@ -948,6 +969,17 @@ end
     test_allocs()
 end
 
+@testset "Presorted and reverse-presorted" begin
+    for len in [7, 92, 412, 780]
+        x = sort(randn(len))
+        for _ in 1:2
+            @test issorted(sort(x))
+            @test issorted(sort(x), by=x -> x+7)
+            reverse!(x)
+        end
+    end
+end
+
 # This testset is at the end of the file because it is slow.
 @testset "searchsorted" begin
     numTypes = [ Int8,  Int16,  Int32,  Int64,  Int128,
diff --git a/test/specificity.jl b/test/specificity.jl
index 1a5c117ce5d9d..9b605444bad42 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -214,7 +214,7 @@ f27361(::M) where M <: Tuple{3} = nothing
 @test length(methods(f27361)) == 2
 
 # specificity of TypeofBottom
-@test args_morespecific(Tuple{Core.TypeofBottom}, Tuple{DataType})
+@test !args_morespecific(Tuple{DataType}, Tuple{Core.TypeofBottom})
 @test args_morespecific(Tuple{Core.TypeofBottom}, Tuple{Type{<:Tuple}})
 
 @test  args_morespecific(Tuple{Type{Any}, Type}, Tuple{Type{T}, Type{T}} where T)
@@ -311,3 +311,8 @@ let A = Tuple{Type{SubString{S}},AbstractString} where S<:AbstractString,
     @test  args_morespecific(B, C)
     @test  args_morespecific(A, C)
 end
+
+@test args_morespecific(Tuple{Type{Union{}}, Any}, Tuple{Any, Type{Union{}}})
+@test args_morespecific(Tuple{typeof(Union{}), Any}, Tuple{Any, Type{Union{}}})
+@test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any}, Tuple{Type{Union{}}, Any, Type{Union{}}})
+@test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any, Type{Union{}}}, Tuple{Type{Union{}}, Any, Type{Union{}}, Type{Union{}}})
diff --git a/test/stack_overflow.jl b/test/stack_overflow.jl
index 9f4bae6f3f5b3..297186c8a4d3a 100644
--- a/test/stack_overflow.jl
+++ b/test/stack_overflow.jl
@@ -17,3 +17,20 @@ let exename = Base.julia_cmd()
     @show readchomperrors(`$exename -e "f() = f(); f()"`)
     @show readchomperrors(`$exename -e "f() = f(); fetch(@async f())"`)
 end
+
+# Issue #49507: stackoverflow in type inference caused by close(::Channel, ::Exception)
+@testset "close(::Channel, ::StackOverflowError)" begin
+    ch = let result = Channel()
+        foo() = try
+            foo()
+        catch e;
+            close(result, e)
+        end
+
+        foo()  # This shouldn't fail with an internal stackoverflow error in inference.
+
+        result
+    end
+
+    @test (try take!(ch) catch e; e; end) isa StackOverflowError
+end
diff --git a/test/stacktraces.jl b/test/stacktraces.jl
index fb873c1a5cfb7..96393b124f70e 100644
--- a/test/stacktraces.jl
+++ b/test/stacktraces.jl
@@ -91,8 +91,9 @@ trace = (try; f(3); catch; stacktrace(catch_backtrace()); end)[1:3]
 can_inline = Bool(Base.JLOptions().can_inline)
 for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false))
     @test frame.func === typeof(func).name.mt.name
-    #@test get(frame.linfo).def === which(func, (Any,)).func
-    #@test get(frame.linfo).specTypes === Tuple{typeof(func), Int}
+    @test frame.linfo.def.module === which(func, (Any,)).module
+    @test frame.linfo.def === which(func, (Any,))
+    @test frame.linfo.specTypes === Tuple{typeof(func), Int}
     # line
     @test frame.file === Symbol(@__FILE__)
     @test !frame.from_c
diff --git a/test/staged.jl b/test/staged.jl
index 4a7fa3d7f4c84..0fa8ecb182cff 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -196,12 +196,11 @@ let gf_err2
         return nothing
     end
     Expected = ErrorException("code reflection cannot be used from generated functions")
+    @test_throws Expected gf_err2(code_lowered)
     @test_throws Expected gf_err2(code_typed)
     @test_throws Expected gf_err2(code_llvm)
     @test_throws Expected gf_err2(code_native)
-    @test gf_err_ref[] == 66
-    @test gf_err2(code_lowered) === nothing
-    @test gf_err_ref[] == 1077
+    @test gf_err_ref[] == 88
 end
 
 # issue #15043
@@ -246,12 +245,18 @@ f22440kernel(x::AbstractFloat) = x * x
 f22440kernel(::Type{T}) where {T} = one(T)
 f22440kernel(::Type{T}) where {T<:AbstractFloat} = zero(T)
 
-@generated function f22440(y)
-    match = Base._methods_by_ftype(Tuple{typeof(f22440kernel),y}, -1, typemax(UInt))[1]
+function f22440_gen(world::UInt, source, _, y)
+    match = only(Base._methods_by_ftype(Tuple{typeof(f22440kernel),y}, -1, world))
     code_info = Base.uncompressed_ir(match.method)
     Meta.partially_inline!(code_info.code, Any[], match.spec_types, Any[match.sparams...], 0, 0, :propagate)
+    # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+    # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return code_info
 end
+@eval function f22440(y)
+    $(Expr(:meta, :generated, f22440_gen))
+    $(Expr(:meta, :generated_only))
+end
 
 @test f22440(Int) === f22440kernel(Int)
 @test f22440(Float64) === f22440kernel(Float64)
@@ -309,26 +314,33 @@ end
 # https://github.com/JuliaDebug/CassetteOverlay.jl/issues/12
 # generated function with varargs and unfortunately placed unused slot
 @generated function f_vararg_generated(args...)
+    local unusedslot4
+    local unusedslot5
+    local unusedslot6
     :($args)
 end
 g_vararg_generated() = f_vararg_generated((;), (;), Base.inferencebarrier((;)))
 let tup = g_vararg_generated()
     @test all(==(typeof((;))), tup)
-    # This is just to make sure that the test is actually testing what we want -
-    # the test only works if there's an unused that matches the position of the
-    # inferencebarrier argument above (N.B. the generator function itself
+    # This is just to make sure that the test is actually testing what we want:
+    # the test only works if there is an unused that matches the position of
+    # the inferencebarrier argument above (N.B. the generator function itself
     # shifts everything over by 1)
-    @test only(code_lowered(only(methods(f_vararg_generated)).generator.gen)).slotflags[5] == UInt8(0x00)
+    @test_broken only(code_lowered(only(methods(f_vararg_generated)).generator.gen)).slotflags[5] == 0x00
 end
 
 # respect a given linetable in code generation
 # https://github.com/JuliaLang/julia/pull/47750
-let match = Base._which(Tuple{typeof(sin),Int})
+let world = Base.get_world_counter()
+    match = Base._which(Tuple{typeof(sin), Int}; world)
     mi = Core.Compiler.specialize_method(match)
-    lwr = Core.Compiler.retrieve_code_info(mi)
-    @test all(lin->lin.method===:sin, lwr.linetable)
-    @generated sin_generated(a) = lwr
+    lwr = Core.Compiler.retrieve_code_info(mi, world)
+    @test all(lin->lin.method === :sin, lwr.linetable)
+    @eval function sin_generated(a)
+        $(Expr(:meta, :generated, Returns(lwr)))
+        $(Expr(:meta, :generated_only))
+    end
     src = only(code_lowered(sin_generated, (Int,)))
-    @test all(lin->lin.method===:sin, src.linetable)
+    @test all(lin->lin.method === :sin, src.linetable)
     @test sin_generated(42) == sin(42)
 end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index 33c64410454ef..602c38551f6d8 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -164,6 +164,12 @@ end
     @test endswith(y)(y)
     @test endswith(z, z)
     @test endswith(z)(z)
+    #40616 startswith for IO objects
+    let s = "JuliaLang", io = IOBuffer(s)
+        for prefix in ("Julia", "July", s^2, "Ju", 'J', 'x', ('j','J'))
+            @test startswith(io, prefix) == startswith(s, prefix)
+        end
+    end
 end
 
 @testset "SubStrings and Views" begin
@@ -938,6 +944,21 @@ end
     end
 end
 
+@testset "Conversion to Type{Union{String, SubString{String}}}" begin
+    str = "abc"
+    substr = SubString(str)
+    for T in [String, SubString{String}]
+        conv_str = convert(T, str)
+        conv_substr = convert(T, substr)
+
+        if T == String
+            @test conv_str === conv_substr === str
+        elseif T == SubString{String}
+            @test conv_str === conv_substr === substr
+        end
+    end
+end
+
 @test unsafe_wrap(Vector{UInt8},"\xcc\xdd\xee\xff\x80") == [0xcc,0xdd,0xee,0xff,0x80]
 
 @test iterate("a", 1)[2] == 2
@@ -1104,6 +1125,32 @@ end
     @test sprint(summary, "") == "empty String"
 end
 
+@testset "isascii" begin
+    N = 1
+    @test isascii("S"^N) == true
+    @test isascii("S"^(N - 1)) == true
+    @test isascii("S"^(N + 1)) == true
+
+    @test isascii("λ" * ("S"^(N))) == false
+    @test isascii(("S"^(N)) * "λ") == false
+
+    for p = 1:16
+        N = 2^p
+        @test isascii("S"^N) == true
+        @test isascii("S"^(N - 1)) == true
+        @test isascii("S"^(N + 1)) == true
+
+        @test isascii("λ" * ("S"^(N))) == false
+        @test isascii(("S"^(N)) * "λ") == false
+        @test isascii("λ"*("S"^(N - 1))) == false
+        @test isascii(("S"^(N - 1)) * "λ") == false
+        if N > 4
+            @test isascii("λ" * ("S"^(N - 3))) == false
+            @test isascii(("S"^(N - 3)) * "λ") == false
+        end
+    end
+end
+
 @testset "Plug holes in test coverage" begin
     @test_throws MethodError checkbounds(Bool, "abc", [1.0, 2.0])
 
@@ -1144,4 +1191,198 @@ end
         end
         return a
     end |> Core.Compiler.is_foldable
+    let i=49248
+        @test String(lazy"PR n°$i") == "PR n°49248"
+    end
+end
+
+@testset "String Effects" begin
+    for (f, Ts) in [(*, (String, String)),
+                   (*, (Char, String)),
+                   (*, (Char, Char)),
+                   (string, (Symbol, String, Char)),
+                   (==, (String, String)),
+                   (cmp, (String, String)),
+                   (==, (Symbol, Symbol)),
+                   (cmp, (Symbol, Symbol)),
+                   (String, (Symbol,)),
+                   (length, (String,)),
+                   (hash, (String,UInt)),
+                   (hash, (Char,UInt)),]
+        e = Base.infer_effects(f, Ts)
+        @test Core.Compiler.is_foldable(e) || (f, Ts)
+        @test Core.Compiler.is_removable_if_unused(e) || (f, Ts)
+    end
+    for (f, Ts) in [(^, (String, Int)),
+                   (^, (Char, Int)),
+                   (codeunit, (String, Int)),
+                   ]
+        e = Base.infer_effects(f, Ts)
+        @test Core.Compiler.is_foldable(e) || (f, Ts)
+        @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
+    end
+    # Substrings don't have any nice effects because the compiler can
+    # invent fake indices leading to out of bounds
+    for (f, Ts) in [(^, (SubString{String}, Int)),
+                   (string, (String, SubString{String})),
+                   (string, (Symbol, SubString{String})),
+                   (hash, (SubString{String},UInt)),
+                   ]
+        e = Base.infer_effects(f, Ts)
+        @test !Core.Compiler.is_foldable(e) || (f, Ts)
+        @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
+    end
+    @test_throws ArgumentError Symbol("a\0a")
+end
+
+@testset "Ensure UTF-8 DFA can never leave invalid state" begin
+    for b = typemin(UInt8):typemax(UInt8)
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_INVALID,[b],1,1) == Base._UTF8_DFA_INVALID
+    end
+end
+@testset "Ensure  UTF-8 DFA stays in ASCII State for all ASCII" begin
+    for b = 0x00:0x7F
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) == Base._UTF8_DFA_ASCII
+    end
+end
+
+@testset "Validate UTF-8 DFA" begin
+    # Unicode 15
+    # Table 3-7. Well-Formed UTF-8 Byte Sequences
+
+    table_rows = [  [0x00:0x7F],
+                    [0xC2:0xDF,0x80:0xBF],
+                    [0xE0:0xE0,0xA0:0xBF,0x80:0xBF],
+                    [0xE1:0xEC,0x80:0xBF,0x80:0xBF],
+                    [0xED:0xED,0x80:0x9F,0x80:0xBF],
+                    [0xEE:0xEF,0x80:0xBF,0x80:0xBF],
+                    [0xF0:0xF0,0x90:0xBF,0x80:0xBF,0x80:0xBF],
+                    [0xF1:0xF3,0x80:0xBF,0x80:0xBF,0x80:0xBF],
+                    [0xF4:0xF4,0x80:0x8F,0x80:0xBF,0x80:0xBF]]
+    invalid_first_bytes = union(0xC0:0xC1,0xF5:0xFF,0x80:0xBF)
+
+    valid_first_bytes = union(collect(first(r) for r in table_rows)...)
+
+
+
+    # Prove that the first byte sets in the table & invalid cover all bytes
+    @test length(union(valid_first_bytes,invalid_first_bytes)) == 256
+    @test length(intersect(valid_first_bytes,invalid_first_bytes)) == 0
+
+    #Check the ASCII range
+    for b = 0x00:0x7F
+        #Test from both UTF-8 state and ascii state
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b],1,1) == Base._UTF8_DFA_ACCEPT
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) == Base._UTF8_DFA_ASCII
+    end
+
+    #Check the remaining first bytes
+    for b = 0x80:0xFF
+        if b ∈ invalid_first_bytes
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b],1,1) == Base._UTF8_DFA_INVALID
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) == Base._UTF8_DFA_INVALID
+        else
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b],1,1) != Base._UTF8_DFA_INVALID
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) != Base._UTF8_DFA_INVALID
+        end
+    end
+
+    # Check two byte Sequences
+    for table_row in [table_rows[2]]
+        b1 = first(table_row[1])
+        state1 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+        state2 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        @test state1 == state2
+        #Prove that all the first bytes in a row give same state
+        for b1 in table_row[1]
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        end
+        b1 = first(table_row[1])
+        #Prove that all valid second bytes return correct state
+        for b2 = table_row[2]
+            @test Base._UTF8_DFA_ACCEPT == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+        for b2 = setdiff(0x00:0xFF,table_row[2])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+    end
+
+    # Check three byte Sequences
+    for table_row in table_rows[3:6]
+        b1 = first(table_row[1])
+        state1 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+        state2 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        @test state1 == state2
+        #Prove that all the first bytes in a row give same state
+        for b1 in table_row[1]
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        end
+
+        b1 = first(table_row[1])
+        b2 = first(table_row[2])
+        #Prove that all valid second bytes return same state
+        state2 = Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        for b2 = table_row[2]
+            @test state2 == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+        for b2 = setdiff(0x00:0xFF,table_row[2])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+
+        b2 = first(table_row[2])
+        #Prove that all valid third bytes return correct state
+        for b3 = table_row[3]
+            @test Base._UTF8_DFA_ACCEPT == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+        for b3 = setdiff(0x00:0xFF,table_row[3])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+    end
+
+    # Check Four byte Sequences
+    for table_row in table_rows[7:9]
+        b1 = first(table_row[1])
+        state1 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+        state2 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        @test state1 == state2
+        #Prove that all the first bytes in a row give same state
+        for b1 in table_row[1]
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        end
+
+        b1 = first(table_row[1])
+        b2 = first(table_row[2])
+        #Prove that all valid second bytes return same state
+        state2 = Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        for b2 = table_row[2]
+            @test state2 == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+        for b2 = setdiff(0x00:0xFF,table_row[2])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+
+
+        b2 = first(table_row[2])
+        b3 = first(table_row[3])
+        state3 = Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        #Prove that all valid third bytes return same state
+        for b3 = table_row[3]
+            @test state3 == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+        for b3 = setdiff(0x00:0xFF,table_row[3])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+
+        b3 = first(table_row[3])
+        #Prove that all valid forth bytes return correct state
+        for b4 = table_row[4]
+            @test Base._UTF8_DFA_ACCEPT == Base._isvalid_utf8_dfa(state3,[b4],1,1)
+        end
+        for b4 = setdiff(0x00:0xFF,table_row[4])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state3,[b4],1,1)
+        end
+    end
 end
diff --git a/test/subtype.jl b/test/subtype.jl
index 40c60670110fb..4a3e55c039e94 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -588,7 +588,7 @@ function test_old()
 end
 
 const easy_menagerie =
-    Any[Bottom, Any, Int, Int8, Integer, Real,
+    Any[Any, Int, Int8, Integer, Real,
         Array{Int,1}, AbstractArray{Int,1},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
         Union{Int,Int8},
@@ -627,6 +627,10 @@ end
 
 add_variants!(easy_menagerie)
 add_variants!(hard_menagerie)
+push!(easy_menagerie, Bottom)
+push!(easy_menagerie, Ref{Bottom})
+push!(easy_menagerie, @UnionAll N NTuple{N,Bottom})
+push!(easy_menagerie, @UnionAll S<:Bottom Ref{S})
 
 const menagerie = [easy_menagerie; hard_menagerie]
 
@@ -673,9 +677,11 @@ function test_properties()
             @test isequal_type(T, S) == isequal_type(Ref{T}, Ref{S})
 
             # covariance
-            @test issubTS == issub(Tuple{T}, Tuple{S})
-            @test issubTS == issub(Tuple{Vararg{T}}, Tuple{Vararg{S}})
-            @test issubTS == issub(Tuple{T}, Tuple{Vararg{S}})
+            if T !== Bottom && S !== Bottom
+                @test issubTS == issub(Tuple{T}, Tuple{S})
+                @test issubTS == issub(Tuple{Vararg{T}}, Tuple{Vararg{S}})
+                @test issubTS == issub(Tuple{T}, Tuple{Vararg{S}})
+            end
 
             # pseudo-contravariance
             @test issubTS == issub(¬S, ¬T)
@@ -753,8 +759,11 @@ function test_intersection()
     @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Int, Array{Number,1}},
                    Tuple{Int, Array{Number,1}})
 
+    # TODO: improve this result
+    #@testintersect((@UnionAll S Tuple{S,Vector{S}}), (@UnionAll T<:Real Tuple{T,AbstractVector{T}}),
+    #               (@UnionAll S<:Real Tuple{S,Vector{S}}))
     @testintersect((@UnionAll S Tuple{S,Vector{S}}), (@UnionAll T<:Real Tuple{T,AbstractVector{T}}),
-                   (@UnionAll S<:Real Tuple{S,Vector{S}}))
+                   (@UnionAll S<:Real Tuple{Real,Vector{S}}))
 
     # typevar corresponding to a type it will end up being neither greater than nor
     # less than
@@ -813,9 +822,9 @@ function test_intersection()
                    Tuple{Tuple{Vararg{Integer}}, Tuple{Integer,Integer}},
                    Tuple{Tuple{Integer,Integer}, Tuple{Integer,Integer}})
 
-    #@test isequal_type(typeintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
-    #                                 Tuple{Tuple{Int,Vararg{Int}},Array}),
-    #                   Tuple{Tuple{Int,Vararg{Int}},Array{Int,N}})
+    @test isequal_type(typeintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
+                                     Tuple{Tuple{Int,Vararg{Int}},Array}),
+                       @UnionAll N Tuple{Tuple{Int,Vararg{Int}},Array{Int,N}})
 
     @testintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
                    Tuple{Tuple{Int,Vararg{Int}},Array{Int,2}},
@@ -904,11 +913,11 @@ function test_intersection()
     # both of these answers seem acceptable
     #@testintersect(Tuple{T,T} where T<:Union{UpperTriangular, UnitUpperTriangular},
     #               Tuple{AbstractArray{T,N}, AbstractArray{T,N}} where N where T,
-    #               Union{Tuple{T,T} where T<:UpperTriangular,
-    #                     Tuple{T,T} where T<:UnitUpperTriangular})
+    #               Union{Tuple{T,T} where T<:UpperTriangular{T1},
+    #                     Tuple{T,T} where T<:UnitUpperTriangular{T1}} where T)
     @testintersect(Tuple{T,T} where T<:Union{UpperTriangular, UnitUpperTriangular},
                    Tuple{AbstractArray{T,N}, AbstractArray{T,N}} where N where T,
-                   Tuple{T,T} where T<:Union{UpperTriangular, UnitUpperTriangular})
+                   Tuple{T,T} where {T1, T<:Union{UpperTriangular{T1}, UnitUpperTriangular{T1}}})
 
     @testintersect(DataType, Type, DataType)
     @testintersect(DataType, Type{T} where T<:Integer, Type{T} where T<:Integer)
@@ -924,9 +933,10 @@ function test_intersection()
     # since this T is inside the invariant ctor Type{}, we allow T == Any here
     @testintersect((Type{Tuple{Vararg{T}}} where T), Type{Tuple}, Type{Tuple})
 
+    # TODO: improve this
     @testintersect(Tuple{Type{S}, Tuple{Any, Vararg{Any}}} where S<:Tuple{Any, Vararg{Any}},
                    Tuple{Type{T}, T} where T,
-                   Tuple{Type{S},S} where S<:Tuple{Any,Vararg{Any}})
+                   Tuple{Type{S}, Tuple{Any, Vararg{Any}}} where S<:Tuple{Any, Vararg{Any}})
 
     # part of issue #20450
     @testintersect(Tuple{Array{Ref{T}, 1}, Array{Pair{M, V}, 1}} where V where T where M,
@@ -1043,11 +1053,7 @@ function test_intersection()
                    Type{Tuple{Int,T}} where T<:Integer)
     @testintersect(Type{<:Tuple{Any,Vararg{Any}}},
                    Type{Tuple{Vararg{Int,N}}} where N,
-                   !Union{})
-
-    @test typeintersect(Type{<:Tuple{Any,Vararg{Any}}}, Type{Tuple{Vararg{Int,N}}} where N) != Type{Tuple{Int,Vararg{Int}}}
-    @test_broken typeintersect(Type{<:Tuple{Any,Vararg{Any}}}, Type{Tuple{Vararg{Int,N}}} where N) == Type{Tuple{Int,Vararg{Int,N}}} where N
-    @test_broken typeintersect(Type{<:Tuple{Any,Vararg{Any}}}, Type{Tuple{Vararg{Int,N}}} where N) != Type{<:Tuple{Int,Vararg{Int}}}
+                   Type{Tuple{Int,Vararg{Int,N}}} where N)
 
     @testintersect(Type{<:Array},
                    Type{AbstractArray{T}} where T,
@@ -1077,8 +1083,7 @@ function test_intersection_properties()
             I2 = _type_intersect(S,T)
             @test isequal_type(I, I2)
             if i > length(easy_menagerie) || j > length(easy_menagerie)
-                # TODO: these cases give a conservative answer
-                @test issub(I, T) || issub(I, S)
+                # @test issub(I, T) || issub(I, S)
             else
                 @test issub(I, T) && issub(I, S)
             end
@@ -1209,12 +1214,12 @@ let a = Tuple{Float64,T3,T4} where T4 where T3,
     b = Tuple{S2,Tuple{S3},S3} where S2 where S3
     I1 = typeintersect(a, b)
     I2 = typeintersect(b, a)
-    @test I1 <: I2
+    @test_broken I1 <: I2
     @test I2 <: I1
     @test I1 <: a
     @test I2 <: a
     @test_broken I1 <: b
-    @test_broken I2 <: b
+    @test I2 <: b
 end
 let a = Tuple{T1,Tuple{T1}} where T1,
     b = Tuple{Float64,S3} where S3
@@ -1231,12 +1236,12 @@ let a = Tuple{5,T4,T5} where T4 where T5,
     b = Tuple{S2,S3,Tuple{S3}} where S2 where S3
     I1 = typeintersect(a, b)
     I2 = typeintersect(b, a)
-    @test I1 <: I2
+    @test_broken I1 <: I2
     @test I2 <: I1
     @test I1 <: a
     @test I2 <: a
     @test_broken I1 <: b
-    @test_broken I2 <: b
+    @test I2 <: b
 end
 let a = Tuple{T2,Tuple{T4,T2}} where T4 where T2,
     b = Tuple{Float64,Tuple{Tuple{S3},S3}} where S3
@@ -1246,12 +1251,12 @@ let a = Tuple{Tuple{T2,4},T6} where T2 where T6,
     b = Tuple{Tuple{S2,S3},Tuple{S2}} where S2 where S3
     I1 = typeintersect(a, b)
     I2 = typeintersect(b, a)
-    @test I1 <: I2
+    @test_broken I1 <: I2
     @test I2 <: I1
     @test I1 <: a
     @test I2 <: a
     @test_broken I1 <: b
-    @test_broken I2 <: b
+    @test I2 <: b
 end
 let a = Tuple{T3,Int64,Tuple{T3}} where T3,
     b = Tuple{S3,S3,S4} where S4 where S3
@@ -1484,6 +1489,8 @@ f24521(::Type{T}, ::Type{T}) where {T} = T
 @test !(Ref{Union{Int64, Val{Number}}} <: Ref{Union{Val{T}, T}} where T)
 @test !(Ref{Union{Ref{Number}, Int64}} <: Ref{Union{Ref{T}, T}} where T)
 @test !(Ref{Union{Val{Number}, Int64}} <: Ref{Union{Val{T}, T}} where T)
+@test !(Val{Ref{Union{Int64, Ref{Number}}}} <: Val{S} where {S<:Ref{Union{Ref{T}, T}} where T})
+@test !(Tuple{Ref{Union{Int64, Ref{Number}}}} <: Tuple{S} where {S<:Ref{Union{Ref{T}, T}} where T})
 
 # issue #26180
 @test !(Ref{Union{Ref{Int64}, Ref{Number}}} <: Ref{Ref{T}} where T)
@@ -1597,7 +1604,7 @@ end
                Tuple{Type{A29955{T,TV,TM}},
                      TM} where {T,TV<:AbstractVector{T},TM<:M29955{T,TV}},
                Tuple{Type{A29955{Float64,Array{Float64,1},TM}},
-                     TM} where TM<:M29955{Float64,Array{Float64,1}})
+                   M29955{Float64,Vector{Float64}}} where TM<:M29955{Float64,Array{Float64,1}})
 let M = M29955{T,Vector{Float64}} where T
     @test M == (M29955{T,Vector{Float64}} where T)
     @test M{Float64} == M29955{Float64,Vector{Float64}}
@@ -1615,9 +1622,9 @@ end
                Tuple{LT,R,I} where LT<:Union{I, R} where R<:Rational{I} where I<:Integer,
                Tuple{LT,Rational{Int},Int} where LT<:Union{Rational{Int},Int})
 
-#@testintersect(Tuple{Any,Tuple{Int},Int},
-#               Tuple{LT,R,I} where LT<:Union{I, R} where R<:Tuple{I} where I<:Integer,
-#               Tuple{LT,Tuple{Int},Int} where LT<:Union{Tuple{Int},Int})
+@testintersect(Tuple{Any,Tuple{Int},Int},
+               Tuple{LT,R,I} where LT<:Union{I, R} where R<:Tuple{I} where I<:Integer,
+               Tuple{LT,Tuple{Int},Int} where LT<:Union{Tuple{Int},Int})
 # fails due to this:
 let U = Tuple{Union{LT, LT1},Union{R, R1},Int} where LT1<:R1 where R1<:Tuple{Int} where LT<:Int where R<:Tuple{Int},
     U2 = Union{Tuple{LT,R,Int} where LT<:Int where R<:Tuple{Int}, Tuple{LT,R,Int} where LT<:R where R<:Tuple{Int}},
@@ -1634,9 +1641,10 @@ end
 # issue #31082 and #30741
 @test typeintersect(Tuple{T, Ref{T}, T} where T,
                     Tuple{Ref{S}, S, S} where S) != Union{}
+# TODO: improve this bound
 @testintersect(Tuple{Pair{B,C},Union{C,Pair{B,C}},Union{B,Real}} where {B,C},
                Tuple{Pair{B,C},C,C} where {B,C},
-               Tuple{Pair{B,C},C,C} where C<:Union{Real, B} where B)
+               Tuple{Pair{B,C}, Union{Pair{B,C},C},Union{Real,B}} where {B,C})
 f31082(::Pair{B, C}, ::Union{C, Pair{B, C}}, ::Union{B, Real}) where {B, C} = 0
 f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
 @test f31082(""=>1, 2, 3) == 1
@@ -1802,8 +1810,18 @@ end
 #end
 
 # issue #32386
-@test typeintersect(Type{S} where S<:(Vector{Pair{_A,N} where N} where _A),
-                    Type{Vector{T}} where T) == Type{Vector{Pair{_A,N} where N}} where _A
+@testintersect(Type{S} where S<:(Vector{Pair{_A,N} where N} where _A),
+               Type{Vector{T}} where T,
+               Type{Vector{Pair{_A,N} where N}} where _A)
+
+# pr #49049
+@testintersect(Tuple{Type{Pair{T, A} where {T, A<:Array{T}}}, Int, Any},
+               Tuple{Type{F}, Any, Int} where {F<:(Pair{T, A} where {T, A<:Array{T}})},
+               Tuple{Type{Pair{T, A} where {T, A<:(Array{T})}}, Int, Int})
+
+@testintersect(Type{Ref{Union{Int, Tuple{S,S} where S<:T}}} where T,
+              Type{F} where F<:(Base.RefValue{Union{Int, Tuple{S,S} where S<:T}} where T),
+              Union{})
 
 # issue #32488
 struct S32488{S <: Tuple, T, N, L}
@@ -1862,8 +1880,11 @@ s26065 = Ref{Tuple{T,Ref{Union{Ref{Tuple{Ref{Union{Ref{Ref{Tuple{Ref{Tuple{Union
              Tuple{Type{Tuple{Vararg{V}}}, Tuple{Vararg{V}}} where V)
 
 # issue 36100
-@test NamedTuple{(:a, :b), Tuple{Missing, Union{}}} == NamedTuple{(:a, :b), Tuple{Missing, Union{}}}
-@test Val{Tuple{Missing, Union{}}} === Val{Tuple{Missing, Union{}}}
+@test Pair{(:a, :b), Tuple{Missing, Vararg{Union{},N}} where N} ===
+      Pair{(:a, :b), Tuple{Missing, Vararg{Union{},N}} where N} !=
+      Pair{(:a, :b), Tuple{Missing, Vararg{Union{}}}} === Pair{(:a, :b), Tuple{Missing}}
+@test Val{Tuple{Missing, Vararg{Union{},N}} where N} === Val{Tuple{Missing, Vararg{Union{},N}} where N} !=
+      Val{Tuple{Missing, Vararg{Union{}}}} === Val{Tuple{Missing}}
 
 # issue #36869
 struct F36869{T, V} <: AbstractArray{Union{T, V}, 1}
@@ -1881,28 +1902,26 @@ end
 # issue #38081
 struct AlmostLU{T, S<:AbstractMatrix{T}}
 end
-let X1 = Tuple{AlmostLU, Vector{T}} where T,
-    X2 = Tuple{AlmostLU{S, X} where X<:Matrix, Vector{S}} where S<:Union{Float32, Float64},
-    I = Tuple{AlmostLU{T, S} where S<:Matrix{T}, Vector{T}} where T<:Union{Float32, Float64}
-    @testintersect(X1, X2, I)
-end
-
-let
-    # issue #22787
-    @testintersect(Tuple{Type{Q}, Q, Ref{Q}} where Q<:Ref,
-                   Tuple{Type{S}, Union{Ref{S}, Ref{R}}, R} where R where S,
-                   !Union{})
+@testintersect(Tuple{AlmostLU, Vector{T}} where T,
+               Tuple{AlmostLU{S, X} where X<:Matrix, Vector{S}} where S<:Union{Float32, Float64},
+               Tuple{AlmostLU{T, X} where X<:Matrix{T}, Vector{T}} where T<:Union{Float32, Float64})
 
-    t = typeintersect(Tuple{Type{T}, T, Ref{T}} where T,
-                      Tuple{Type{S}, Ref{S}, S} where S)
-    @test_broken t != Union{}
+# issue #22787
+@testintersect(Tuple{Type{Q}, Q, Ref{Q}} where Q<:Ref,
+               Tuple{Type{S}, Union{Ref{S}, Ref{R}}, R} where R where S,
+               Tuple{Type{Q}, Union{Ref{Q}, Ref{R}}, Ref{Q}} where {Q<:Ref, R}) # likely suboptimal
 
-    # issue #38279
-    t = typeintersect(Tuple{<:Array{T, N}, Val{T}} where {T<:Real, N},
-                      Tuple{<:Array{T, N}, Val{<:AbstractString}}  where {T<:Real, N})
-    @test t == Tuple{<:Array{Union{}, N}, Val{Union{}}} where N
+let t = typeintersect(Tuple{Type{T}, T, Ref{T}} where T,
+                  Tuple{Type{S}, Ref{S}, S} where S)
+    @test_broken t == Tuple{Type{T}, Ref{T}, Ref{T}} where T>:Ref
+    @test t == Tuple{Type{T}, Ref{T}, Ref{T}} where T
 end
 
+# issue #38279
+t = typeintersect(Tuple{<:Array{T, N}, Val{T}} where {T<:Real, N},
+                  Tuple{<:Array{T, N}, Val{<:AbstractString}}  where {T<:Real, N})
+@test t == Tuple{<:Array{Union{}, N}, Val{Union{}}} where N
+
 # issue #36951
 @testintersect(Type{T} where T>:Missing,
                Type{Some{T}} where T,
@@ -1939,10 +1958,25 @@ end
 # issue #34170
 let A = Tuple{Type{T} where T<:Ref, Ref, Union{T, Union{Ref{T}, T}} where T<:Ref},
     B = Tuple{Type{T}, Ref{T}, Union{Int, Ref{T}, T}} where T
-    I = typeintersect(A,B)
     # this was a case where <: disagreed with === (due to a badly-normalized type)
-    @test I == typeintersect(A,B)
-    @test I == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+    I = _type_intersect(B, A)
+    @test_broken I == Union{Tuple{Type{T}, Ref{T}, Ref{T}} where T<:Ref, Tuple{Type{T}, Ref{T}, T} where T<:Ref}
+    @test I == _type_intersect(B, A) == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+    I = typeintersect(B, A)
+    @test_broken I == Tuple{Type{T}, Ref{T}, Union{Ref{T}, T}} where T<:Ref
+    @test I == typeintersect(B, A) <: Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+
+    I = _type_intersect(A, B)
+    @test !Base.has_free_typevars(I)
+    J = Tuple{Type{T1}, Ref{T1}, Ref} where {T, T1<:Union{Ref, Ref{T}}}
+    @test I == _type_intersect(A, B) == J
+    @test_broken I == Tuple{Type{T}, Ref{T}, T1} where {T<:Ref, T1<:Union{T, Ref{T}}} # a better result, == to the result with arguments switched
+
+    I = typeintersect(A, B)
+    @test !Base.has_free_typevars(I)
+    J = Tuple{Type{T1}, Ref{T1}, Ref} where {T, T1<:Union{Ref, Ref{T}}}
+    @test I == typeintersect(A, B) == J
+
 end
 
 # issue #39218
@@ -1971,20 +2005,14 @@ let A = Tuple{Type{<:Union{Number, T}}, Ref{T}} where T,
 end
 
 # issue #39698
-let T = Type{T} where T<:(AbstractArray{I}) where I<:(Base.IteratorsMD.CartesianIndex),
-    S = Type{S} where S<:(Base.IteratorsMD.CartesianIndices{A, B} where B<:Tuple{Vararg{Any, A}} where A)
-    I = typeintersect(T, S)
-    @test_broken I <: T
-    @test I <: S
-    @test_broken I == typeintersect(S, T)
-end
+@testintersect(Type{T} where T<:(AbstractArray{I}) where I<:(Base.IteratorsMD.CartesianIndex),
+    Type{S} where S<:(Base.IteratorsMD.CartesianIndices{A, B} where B<:Tuple{Vararg{Any, A}} where A),
+    Type{S} where {N, S<:(Base.IteratorsMD.CartesianIndices{N, B} where B<:Tuple{Vararg{Any, N}})})
 
 # issue #39948
-let A = Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
-    I = typeintersect(A, Tuple{Vararg{Vector{T}}} where T)
-    @test I <: A
-    @test !Base.has_free_typevars(I)
-end
+@testintersect(Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
+    Tuple{Vararg{Vector{T}}} where T,
+    Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1}, Array{Pair{T, JT} where JT<:Ref{T}, 1}} where T)
 
 # issue #8915
 struct D8915{T<:Union{Float32,Float64}}
@@ -2006,8 +2034,8 @@ let A = Tuple{Ref{T}, Vararg{T}} where T,
     I = typeintersect(A, B)
     Ts = (Tuple{Ref{Int}, Int, Int}, Tuple{Ref{Ref{Int}}, Ref{Int}, Ref{Int}})
     @test I != Union{}
-    @test I <: A
-    @test_broken I <: B
+    @test_broken I <: A
+    @test I <: B
     for T in Ts
         if T <: A && T <: B
             @test T <: I
@@ -2015,8 +2043,8 @@ let A = Tuple{Ref{T}, Vararg{T}} where T,
     end
     J = typeintersect(A, C)
     @test J != Union{}
-    @test J <: A
-    @test_broken J <: C
+    @test_broken J <: A
+    @test J <: C
     for T in Ts
         if T <: A && T <: C
             @test T <: J
@@ -2025,9 +2053,13 @@ let A = Tuple{Ref{T}, Vararg{T}} where T,
 end
 
 let A = Tuple{Dict{I,T}, I, T} where T where I,
-    B = Tuple{AbstractDict{I,T}, T, I} where T where I
-    # TODO: we should probably have I == T here
-    @test typeintersect(A, B) == Tuple{Dict{I,T}, I, T} where {I, T}
+    B = Tuple{AbstractDict{I,T}, T, I} where T where I,
+    I = typeintersect(A, B)
+    # TODO: we should probably have something approaching I == T here,
+    # though note something more complex is needed since the intersection must also include types such as;
+    # Tuple{Dict{Integer,Any}, Integer, Int}
+    @test_broken I <: A && I <: B
+    @test I == typeintersect(B, A) == Tuple{Dict{I, T}, Any, Any} where {I, T}
 end
 
 let A = Tuple{UnionAll, Vector{Any}},
@@ -2194,26 +2226,36 @@ let A = Pair{NTuple{N, Int}, NTuple{N, Int}} where N,
     Bs = (Pair{<:Tuple{Int, Vararg{Int}}, <:Tuple{Int, Int, Vararg{Int}}},
           Pair{Tuple{Int, Vararg{Int,N1}}, Tuple{Int, Int, Vararg{Int,N2}}} where {N1,N2},
           Pair{<:Tuple{Int, Vararg{Int,N}} where {N}, <:Tuple{Int, Int, Vararg{Int,N}} where {N}})
-    Cerr = Pair{Tuple{Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
-    for B in Bs
-        C = typeintersect(A, B)
-        @test C == typeintersect(B, A) != Union{}
-        @test C != Cerr
-        @test_broken C != B
+    Cs = (Bs[2], Bs[2], Bs[3])
+    for (B, C) in zip(Bs, Cs)
+        # TODO: The ideal result is Pair{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+        @testintersect(A, B, C)
     end
 end
 
 # Example from pr#39098
 @testintersect(NTuple, Tuple{Any,Vararg}, Tuple{T, Vararg{T}} where {T})
 
+@testintersect(Val{T} where T<:Tuple{Tuple{Any, Vararg{Any}}},
+               Val{Tuple{Tuple{Vararg{Any, N}}}} where {N},
+               Val{Tuple{Tuple{Any, Vararg{Any, N}}}} where {N})
+
 let A = Pair{NTuple{N, Int}, Val{N}} where N,
-    Bs = (Pair{<:Tuple{Int, Vararg{Int}}, <:Val},
-          Pair{Tuple{Int, Vararg{Int,N1}}, Val{N2}} where {N1,N2})
-    Cerr = Pair{Tuple{Int, Vararg{Int,N}}, Val{N}} where N
-    for B in Bs
-        @testintersect(A, B, !Cerr)
-        @testintersect(A, B, !Union{})
-    end
+    C = Pair{Tuple{Int, Vararg{Int,N1}}, Val{N2}} where {N1,N2},
+    B = Pair{<:Tuple{Int, Vararg{Int}}, <:Val}
+    @testintersect A B C
+    @testintersect A C C
+end
+
+# issue #49484
+let S = Tuple{Integer, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
+    T = Tuple{Int, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
+    @testintersect(S, Tuple{Int, U} where {U<:Tuple{Vararg{Any}}}, T)
+    @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Vararg{Any,N}}}, T)
+    @testintersect(S, Tuple{Int, U} where {U<:Tuple{Any,Vararg{Any}}}, T)
+    @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Vararg{Any,N}}}, T)
+    @testintersect(S, Tuple{Int, U} where {U<:Tuple{Any,Any,Vararg{Any}}}, Union{})
+    @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Any,Vararg{Any,N}}}, Union{})
 end
 
 # issue #43064
@@ -2280,7 +2322,7 @@ struct Z38497{T>:Int} <: Y38497{T} end
 @test only(intersection_env(Union{S, Matrix{Int}} where S<:Matrix, Matrix)[2]) isa TypeVar
 T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{AbstractMatrix{B}, AbstractMatrix{<:Vector{<:B}}}}
 @testintersect(T46784{T,S} where {T,S}, T46784, !Union{})
-@test_broken T46784 <: T46784{T,S} where {T,S}
+@test T46784 <: T46784{T,S} where {T,S}
 
 #issue 36185
 let S = Tuple{Type{T},Array{Union{T,Missing},N}} where {T,N},
@@ -2348,7 +2390,7 @@ let S = Tuple{Type{T1}, T1, Val{T1}} where T1<:(Val{S1} where S1<:Val),
     @test I1 !== Union{} && I2 !== Union{}
     @test_broken I1 <: S
     @test_broken I2 <: T
-    @test I2 <: S
+    @test_broken I2 <: S
     @test_broken I2 <: T
 end
 
@@ -2374,19 +2416,17 @@ abstract type P47654{A} end
 
 @testset "known subtype/intersect issue" begin
     #issue 45874
-    # Causes a hang due to jl_critical_error calling back into malloc...
-    # let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P,
-    #     T = Pair{Val{R}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where {P,R}
-    #     @test_broken S <: T
-    #     @test_broken typeintersect(S,T) === S
-    # end
+    let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P,
+        T = Pair{Val{R}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where {P,R}
+        @test S <: T
+    end
 
     #issue 41561
     @test_broken typeintersect(Tuple{Vector{VT}, Vector{VT}} where {N1, VT<:AbstractVector{N1}},
                 Tuple{Vector{VN} where {N, VN<:AbstractVector{N}}, Vector{Vector{Float64}}}) !== Union{}
     #issue 40865
-    @test_broken Tuple{Set{Ref{Int}}, Set{Ref{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Ref{K}}}
-    @test_broken Tuple{Set{Val{Int}}, Set{Val{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Val{K}}}
+    @test Tuple{Set{Ref{Int}}, Set{Ref{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Ref{K}}}
+    @test Tuple{Set{Val{Int}}, Set{Val{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Val{K}}}
 
     #issue 39099
     A = Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Vararg{Int, N}}, Tuple{Vararg{Int, N}}} where N
@@ -2405,7 +2445,7 @@ abstract type P47654{A} end
     # issue 22123
     t1 = Ref{Ref{Ref{Union{Int64, T}}} where T}
     t2 = Ref{Ref{Ref{Union{T, S}}} where T} where S
-    @test_broken t1 <: t2
+    @test t1 <: t2
 
     # issue 21153
     @test_broken (Tuple{T1,T1} where T1<:(Val{T2} where T2)) <: (Tuple{Val{S},Val{S}} where S)
@@ -2420,18 +2460,90 @@ end
 
 # try to fool a greedy algorithm that picks X=Int, Y=String here
 @test Tuple{Ref{Union{Int,String}}, Ref{Union{Int,String}}} <: Tuple{Ref{Union{X,Y}}, Ref{X}} where {X,Y}
-# this slightly more complex case has been broken since 1.0 (worked in 0.6)
-@test_broken Tuple{Ref{Union{Int,String,Missing}}, Ref{Union{Int,String}}} <: Tuple{Ref{Union{X,Y}}, Ref{X}} where {X,Y}
+@test Tuple{Ref{Union{Int,String,Missing}}, Ref{Union{Int,String}}} <: Tuple{Ref{Union{X,Y}}, Ref{X}} where {X,Y}
 
 @test !(Tuple{Any, Any, Any} <: Tuple{Any, Vararg{T}} where T)
 
+# issue #39967
+@test (NTuple{27, T} where {S, T<:Union{Array, Array{S}}}) <: Tuple{Array, Array, Vararg{AbstractArray, 25}}
+
 abstract type MyAbstract47877{C}; end
 struct MyType47877{A,B} <: MyAbstract47877{A} end
 let A = Tuple{Type{T}, T} where T,
     B = Tuple{Type{MyType47877{W, V} where V<:Union{Base.BitInteger, MyAbstract47877{W}}}, MyAbstract47877{<:Base.BitInteger}} where W
-    C = Tuple{Type{MyType47877{W, V} where V<:Union{MyAbstract47877{W1}, Base.BitInteger}}, MyType47877{W, V} where V<:Union{MyAbstract47877{W1}, Base.BitInteger}} where {W<:Base.BitInteger, W1<:Base.BitInteger}
-    # ensure that merge_env for innervars does not blow up (the large Unions ensure this will take excessive memory if it does)
-    @test typeintersect(A, B) == C # suboptimal, but acceptable
     C = Tuple{Type{MyType47877{W, V} where V<:Union{MyAbstract47877{W}, Base.BitInteger}}, MyType47877{W, V} where V<:Union{MyAbstract47877{W}, Base.BitInteger}} where W<:Base.BitInteger
-    @test typeintersect(B, A) == C
+    # ensure that merge_env for innervars does not blow up (the large Unions ensure this will take excessive memory if it does)
+    @testintersect(A, B, C)
+end
+
+let
+    a = (isodd(i) ? Pair{Char, String} : Pair{String, String} for i in 1:2000)
+    @test Tuple{Type{Pair{Union{Char, String}, String}}, a...} <: Tuple{Type{Pair{K, V}}, Vararg{Pair{A, B} where B where A}} where V where K
+    a = (isodd(i) ? Matrix{Int} : Vector{Int} for i in 1:4000)
+    @test Tuple{Type{Pair{Union{Char, String}, String}}, a...,} <: Tuple{Type{Pair{K, V}}, Vararg{Array}} where V where K
+end
+
+#issue 48582
+@test !<:(Tuple{Pair{<:T,<:T}, Val{S} where {S}} where {T<:Base.BitInteger},
+          Tuple{Pair{<:T,<:T}, Val{Int}} where {T<:Base.BitInteger})
+
+struct T48695{T, N, H<:AbstractArray} <: AbstractArray{Union{Missing, T}, N} end
+struct S48695{T, N, H<:AbstractArray{T, N}} <: AbstractArray{T, N} end
+let S = Tuple{Type{S48695{T, 2, T48695{B, 2, C}}} where {T<:(Union{Missing, A} where A), B, C}, T48695{T, 2} where T},
+    T = Tuple{Type{S48695{T, N, H}}, H} where {T, N, H<:AbstractArray{T, N}}
+    V = typeintersect(S, T)
+    vars_in_unionall(s) = s isa UnionAll ? (s.var, vars_in_unionall(s.body)...) : ()
+    @test V != Union{}
+    @test allunique(vars_in_unionall(V))
+    @test typeintersect(V, T) != Union{}
+end
+
+#issue 48961
+@test !<:(Type{Union{Missing, Int}}, Type{Union{Missing, Nothing, Int}})
+
+#issue 49127
+struct F49127{m,n} <: Function end
+let a = [TypeVar(:V, Union{}, Function) for i in 1:32]
+    b = a[1:end-1]
+    S = foldr((v, d) -> UnionAll(v, d), a; init = foldl((i, j) -> F49127{i, j}, a))
+    T = foldr((v, d) -> UnionAll(v, d), b; init = foldl((i, j) -> F49127{i, j}, b))
+    @test S <: T
+end
+
+# requires assertions enabled (to test union-split in `obviously_disjoint`)
+@test !<:(Tuple{Type{Int}, Int}, Tuple{Type{Union{Int, T}}, T} where T<:Union{Int8,Int16})
+@test <:(Tuple{Type{Int}, Int}, Tuple{Type{Union{Int, T}}, T} where T<:Union{Int8,Int})
+
+#issue #49354 (requires assertions enabled)
+@test !<:(Tuple{Type{Union{Int, Val{1}}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Val)
+@test !<:(Tuple{Type{Union{Int, Val{1}}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Union{Val,Pair})
+@test <:(Tuple{Type{Union{Int, Val{1}}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Union{Integer,Val})
+@test <:(Tuple{Type{Union{Int, Int8}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Integer)
+@test !<:(Tuple{Type{Union{Pair{Int, Any}, Pair{Int, Int}}}, Pair{Int, Any}},
+          Tuple{Type{Union{Pair{Int, Any}, T1}}, T1} where T1<:(Pair{T,T} where {T}))
+
+let A = Tuple{Type{T}, T, Val{T}} where T,
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S
+    @test_broken typeintersect(A, B) == Tuple{Type{T}, Val{T}, Val{T}} where T>:Val
+    @test typeintersect(A, B) <: Tuple{Type{T}, Val{T}, Val{T}} where T
+end
+let A = Tuple{Type{T}, T, Val{T}} where T<:Val,
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S<:Val
+    @test_broken typeintersect(A, B) == Tuple{Type{Val}, Val{Val}, Val{Val}}
+    @test typeintersect(A, B) <: Tuple{Type{T}, Val{T}, Val{T}} where T<:Val
+end
+let A = Tuple{Type{T}, T, Val{T}} where T<:Val,
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S<:Val{A} where A
+    @test typeintersect(A, B) == Union{}
+end
+let A = Tuple{Type{T}, T, Val{T}} where T<:Val{<:Val},
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S<:Val
+    @test_broken typeintersect(A, B) == Tuple{Type{Val{<:Val}}, Val{Val{<:Val}}, Val{Val{<:Val}}}
+    @test typeintersect(A, B) <: Tuple{Type{T}, Val{T}, Val{T}} where T<:(Val{<:Val})
+end
+let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} where T<:Val{A} where A where S<:Val,
+    S = Tuple{Type{T}, T, Val{T}} where T<:(Val{S} where S<:Val)
+    # optimal = Union{}?
+    @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val)
+    @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)}
 end
diff --git a/test/syntax.jl b/test/syntax.jl
index 32f343f4a392e..8bba5f9205613 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -839,6 +839,8 @@ let ε=1, μ=2, x=3, î=4, ⋅=5, (-)=6
     @test Meta.parse("100.0f\u22122") === Meta.parse("100.0f-2")
     @test Meta.parse("0x100p\u22128") === Meta.parse("0x100P\u22128") === Meta.parse("0x100p-8")
     @test (−) == (-) == 6
+    # hbar ℏ to ħ - (#48870)
+    @test :ℏ === :ħ
 end
 
 # issue #8925
@@ -2007,7 +2009,7 @@ end
 @test Meta.parse("import Base.Foo.:(==).bar") == :(import Base.Foo.==.bar)
 
 # issue #33135
-function f33135(x::T) where {C1, T}
+@test_warn "declares type variable C1 but does not use it" @eval function f33135(x::T) where {C1, T}
     let C1 = 1, C2 = 2
         C1
     end
@@ -2226,6 +2228,14 @@ end
     @test Meta.parse("a ⫫ b") == Expr(:call, :⫫, :a, :b)
 end
 
+# issue 45962
+@testset "binary ⭄, ⥺, ⭃, and ⥷" begin
+    @test Meta.parse("a ⭄ b") == Expr(:call, :⭄, :a, :b)
+    @test Meta.parse("a ⥺ b") == Expr(:call, :⥺, :a, :b)
+    @test Meta.parse("a ⭃ b") == Expr(:call, :⭃, :a, :b)
+    @test Meta.parse("a ⥷ b") == Expr(:call, :⥷, :a, :b)
+end
+
 # only allow certain characters after interpolated vars (#25231)
 @test Meta.parse("\"\$x෴  \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
 @test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) === :string
@@ -2879,7 +2889,7 @@ end
 @test eval(:(x = $(QuoteNode(Core.SSAValue(1))))) == Core.SSAValue(1)
 @test eval(:(x = $(QuoteNode(Core.SlotNumber(1))))) == Core.SlotNumber(1)
 @test_throws ErrorException("syntax: SSAValue objects should not occur in an AST") eval(:(x = $(Core.SSAValue(1))))
-@test_throws ErrorException("syntax: Slot objects should not occur in an AST") eval(:(x = $(Core.SlotNumber(1))))
+@test_throws ErrorException("syntax: SlotNumber objects should not occur in an AST") eval(:(x = $(Core.SlotNumber(1))))
 
 # juxtaposition of radical symbols (#40094)
 @test Meta.parse("2√3") == Expr(:call, :*, 2, Expr(:call, :√, 3))
@@ -3053,9 +3063,6 @@ end
 end
 
 # issue 25678
-@generated f25678(x::T) where {T} = code_lowered(sin, Tuple{x})[]
-@test f25678(pi/6) === sin(pi/6)
-
 @generated g25678(x) = return :x
 @test g25678(7) === 7
 
diff --git a/test/sysinfo.jl b/test/sysinfo.jl
index e423f6071c9e0..3a16dc73b4f6a 100644
--- a/test/sysinfo.jl
+++ b/test/sysinfo.jl
@@ -9,3 +9,32 @@ Base.Sys.loadavg()
 
 @test Base.libllvm_path() isa Symbol
 @test contains(String(Base.libllvm_path()), "LLVM")
+
+if Sys.isunix()
+    mktempdir() do tempdir
+        firstdir = joinpath(tempdir, "first")
+        seconddir = joinpath(tempdir, "second")
+
+        mkpath(firstdir)
+        mkpath(seconddir)
+
+        touch(joinpath(firstdir, "foo"))
+        touch(joinpath(seconddir, "foo"))
+
+        chmod(joinpath(firstdir, "foo"), 0o777)
+        chmod(joinpath(seconddir, "foo"), 0o777)
+
+        # zero permissions on first directory
+        chmod(firstdir, 0o000)
+
+        original_path = ENV["PATH"]
+        ENV["PATH"] = string(firstdir, ":", seconddir, ":", original_path)
+        try
+            @test abspath(Base.Sys.which("foo")) == abspath(joinpath(seconddir, "foo"))
+        finally
+            # clean up
+            chmod(firstdir, 0o777)
+            ENV["PATH"] = original_path
+        end
+    end
+end
diff --git a/test/testhelpers/DualNumbers.jl b/test/testhelpers/DualNumbers.jl
new file mode 100644
index 0000000000000..9f62e3bf0d429
--- /dev/null
+++ b/test/testhelpers/DualNumbers.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module DualNumbers
+
+export Dual
+
+# Dual numbers type with minimal interface
+# example of a (real) number type that subtypes Number, but not Real.
+# Can be used to test generic linear algebra functions.
+
+struct Dual{T<:Real} <: Number
+    val::T
+    eps::T
+end
+Base.:+(x::Dual, y::Dual) = Dual(x.val + y.val, x.eps + y.eps)
+Base.:-(x::Dual, y::Dual) = Dual(x.val - y.val, x.eps - y.eps)
+Base.:*(x::Dual, y::Dual) = Dual(x.val * y.val, x.eps * y.val + y.eps * x.val)
+Base.:*(x::Number, y::Dual) = Dual(x*y.val, x*y.eps)
+Base.:*(x::Dual, y::Number) = Dual(x.val*y, x.eps*y)
+Base.:/(x::Dual, y::Dual) = Dual(x.val / y.val, (x.eps*y.val - x.val*y.eps)/(y.val*y.val))
+
+Base.:(==)(x::Dual, y::Dual) = x.val == y.val && x.eps == y.eps
+
+Base.promote_rule(::Type{Dual{T}}, ::Type{T}) where {T} = Dual{T}
+Base.promote_rule(::Type{Dual{T}}, ::Type{S}) where {T,S<:Real} = Dual{promote_type(T, S)}
+Base.promote_rule(::Type{Dual{T}}, ::Type{Dual{S}}) where {T,S} = Dual{promote_type(T, S)}
+
+Base.convert(::Type{Dual{T}}, x::Dual{T}) where {T} = x
+Base.convert(::Type{Dual{T}}, x::Dual) where {T} = Dual(convert(T, x.val), convert(T, x.eps))
+Base.convert(::Type{Dual{T}}, x::Real) where {T} = Dual(convert(T, x), zero(T))
+
+Base.float(x::Dual) = Dual(float(x.val), float(x.eps))
+# the following two methods are needed for normalize (to check for potential overflow)
+Base.typemax(x::Dual) = Dual(typemax(x.val), zero(x.eps))
+Base.prevfloat(x::Dual{<:AbstractFloat}) = prevfloat(x.val)
+
+Base.abs2(x::Dual) = x*x
+Base.abs(x::Dual) = sqrt(abs2(x))
+Base.sqrt(x::Dual) = Dual(sqrt(x.val), x.eps/(2sqrt(x.val)))
+
+Base.isless(x::Dual, y::Dual) = x.val < y.val
+Base.isless(x::Real, y::Dual) = x < y.val
+Base.isinf(x::Dual) = isinf(x.val) & isfinite(x.eps)
+Base.real(x::Dual) = x # since we curently only consider Dual{<:Real}
+
+end # module
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index 17dd270cd2424..c592699440ee0 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -39,7 +39,7 @@ function open_fake_pty()
         rc = ccall(:unlockpt, Cint, (Cint,), fdm)
         rc != 0 && error("unlockpt")
 
-        fds = ccall(:open, Cint, (Ptr{UInt8}, Cint),
+        fds = ccall(:open, Cint, (Ptr{UInt8}, Cint, UInt32...),
             ccall(:ptsname, Ptr{UInt8}, (Cint,), fdm), O_RDWR | O_NOCTTY)
         pts = RawFD(fds)
 
diff --git a/test/testhelpers/FillArrays.jl b/test/testhelpers/FillArrays.jl
new file mode 100644
index 0000000000000..1f36a77bf8c12
--- /dev/null
+++ b/test/testhelpers/FillArrays.jl
@@ -0,0 +1,33 @@
+module FillArrays
+
+struct Fill{T, N, S<:NTuple{N,Integer}} <: AbstractArray{T,N}
+    value::T
+    size::S
+end
+
+Fill(v, size::Vararg{Integer}) = Fill(v, size)
+
+Base.size(F::Fill) = F.size
+
+@inline getindex_value(F::Fill) = F.value
+
+@inline function Base.getindex(F::Fill{<:Any,N}, i::Vararg{Int,N}) where {N}
+    @boundscheck checkbounds(F, i...)
+    getindex_value(F)
+end
+
+@inline function Base.setindex!(F::Fill, v, k::Integer)
+    @boundscheck checkbounds(F, k)
+    v == getindex_value(F) || throw(ArgumentError("Cannot setindex! to $v for a Fill with value $(getindex_value(F))."))
+    F
+end
+
+@inline function Base.fill!(F::Fill, v)
+    v == getindex_value(F) || throw(ArgumentError("Cannot fill! with $v a Fill with value $(getindex_value(F))."))
+    F
+end
+
+Base.show(io::IO, F::Fill) = print(io, "Fill($(F.value), $(F.size))")
+Base.show(io::IO, ::MIME"text/plain", F::Fill) = show(io, F)
+
+end
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index 15950a9f9ca4b..17970f0b0572e 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -21,9 +21,10 @@ Furlong{p}(v::Number) where {p} = Furlong{p,typeof(v)}(v)
 Furlong{p}(x::Furlong{q}) where {p,q} = (typeassert(x, Furlong{p}); Furlong{p,typeof(x.val)}(x.val))
 Furlong{p,T}(x::Furlong{q}) where {T,p,q} = (typeassert(x, Furlong{p}); Furlong{p,T}(T(x.val)))
 
-Base.promote_type(::Type{Furlong{p,T}}, ::Type{Furlong{p,S}}) where {p,T,S} =
+Base.promote_rule(::Type{Furlong{p,T}}, ::Type{Furlong{p,S}}) where {p,T,S} =
     Furlong{p,promote_type(T,S)}
-
+Base.promote_rule(::Type{Furlong{0,T}}, ::Type{S}) where {T,S<:Union{Real,Complex}} =
+    Furlong{0,promote_type(T,S)}
 # only Furlong{0} forms a ring and isa Number
 Base.convert(::Type{T}, y::Number) where {T<:Furlong{0}} = T(y)::T
 Base.convert(::Type{Furlong}, y::Number) = Furlong{0}(y)
@@ -35,11 +36,11 @@ Base.convert(::Type{Furlong}, y::Furlong) = y
 Base.convert(::Type{Furlong{<:Any,T}}, y::Furlong{p}) where {p,T<:Number} = Furlong{p,T}(y)
 Base.convert(::Type{T}, y::Furlong) where {T<:Furlong} = T(y)::T
 
-Base.one(x::Furlong{p,T}) where {p,T} = one(T)
+Base.one(::Furlong{p,T}) where {p,T} = one(T)
 Base.one(::Type{Furlong{p,T}}) where {p,T} = one(T)
-Base.oneunit(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(one(T))
-Base.oneunit(x::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(one(T))
-Base.zero(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(zero(T))
+Base.oneunit(::Furlong{p,T}) where {p,T} = Furlong{p,T}(one(T))
+Base.oneunit(::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(one(T))
+Base.zero(::Furlong{p,T}) where {p,T} = Furlong{p,T}(zero(T))
 Base.zero(::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(zero(T))
 Base.iszero(x::Furlong) = iszero(x.val)
 Base.float(x::Furlong{p}) where {p} = Furlong{p}(float(x.val))
diff --git a/test/testhelpers/InfiniteArrays.jl b/test/testhelpers/InfiniteArrays.jl
index d69130f4d726a..14b2e56daf1c6 100644
--- a/test/testhelpers/InfiniteArrays.jl
+++ b/test/testhelpers/InfiniteArrays.jl
@@ -21,11 +21,14 @@ Base.:(==)(::Infinity, ::Int) = false
 Base.:(==)(::Int, ::Infinity) = false
 Base.:(<)(::Int, ::Infinity) = true
 Base.:(≤)(::Int, ::Infinity) = true
+Base.:(<)(::Infinity, ::Int) = false
 Base.:(≤)(::Infinity, ::Int) = false
 Base.:(≤)(::Infinity, ::Infinity) = true
 Base.:(-)(::Infinity, ::Int) = Infinity()
 Base.:(+)(::Infinity, ::Int) = Infinity()
 Base.:(:)(::Infinity, ::Infinity) = 1:0
+Base.max(::Infinity, ::Int) = Infinity()
+Base.max(::Int, ::Infinity) = Infinity()
 
 """
     OneToInf(n)
diff --git a/test/threadpool_use.jl b/test/threadpool_use.jl
index 47c45bdd71eb8..e5ea5f95cf4ff 100644
--- a/test/threadpool_use.jl
+++ b/test/threadpool_use.jl
@@ -4,13 +4,10 @@ using Test
 using Base.Threads
 
 @test nthreadpools() == 2
-@test threadpool() === :default
-@test threadpool(2) === :interactive
-dtask() = @test threadpool(current_task()) === :default
-itask() = @test threadpool(current_task()) === :interactive
-dt1 = @spawn dtask()
-dt2 = @spawn :default dtask()
-it = @spawn :interactive itask()
-wait(dt1)
-wait(dt2)
-wait(it)
+@test threadpool() === :interactive
+@test threadpool(2) === :default
+@test fetch(Threads.@spawn Threads.threadpool()) === :default
+@test fetch(Threads.@spawn :default Threads.threadpool()) === :default
+@test fetch(Threads.@spawn :interactive Threads.threadpool()) === :interactive
+@test Threads.threadpooltids(:interactive) == [1]
+@test Threads.threadpooltids(:default) == [2]
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 68ba9377cf955..9c7c524febeff 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -27,6 +27,8 @@ end
 # (expected test duration is about 18-180 seconds)
 Timer(t -> killjob("KILLING BY THREAD TEST WATCHDOG\n"), 1200)
 
+@testset """threads_exec.jl with JULIA_NUM_THREADS == $(ENV["JULIA_NUM_THREADS"])""" begin
+
 @test Threads.threadid() == 1
 @test 1 <= threadpoolsize() <= Threads.maxthreadid()
 
@@ -232,7 +234,7 @@ end
 # Make sure that eval'ing in a different module doesn't mess up other threads
 orig_curmodule14726 = @__MODULE__
 main_var14726 = 1
-module M14726
+@eval Main module M14726
 module_var14726 = 1
 end
 
@@ -252,7 +254,7 @@ end
     @test @__MODULE__() == orig_curmodule14726
 end
 
-module M14726_2
+@eval Main module M14726_2
 using Test
 using Base.Threads
 @threads for i in 1:100
@@ -1067,3 +1069,25 @@ end
         popfirst!(LOAD_PATH)
     end
 end
+
+# issue #49746, thread safety in `atexit(f)`
+@testset "atexit thread safety" begin
+    f = () -> nothing
+    before_len = length(Base.atexit_hooks)
+    @sync begin
+        for _ in 1:1_000_000
+            Threads.@spawn begin
+                atexit(f)
+            end
+        end
+    end
+    @test length(Base.atexit_hooks) == before_len + 1_000_000
+    @test all(hook -> hook === f, Base.atexit_hooks[1 : 1_000_000])
+
+    # cleanup
+    Base.@lock Base._atexit_hooks_lock begin
+        deleteat!(Base.atexit_hooks, 1:1_000_000)
+    end
+end
+
+end # main testset
diff --git a/test/tuple.jl b/test/tuple.jl
index ae764bd05481b..71770b6a553c2 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -265,8 +265,10 @@ end
         @test map(foo, (1,2,3,4), (1,2,3,4)) === (2,4,6,8)
         @test map(foo, longtuple, longtuple) === ntuple(i->2i,20)
         @test map(foo, vlongtuple, vlongtuple) === ntuple(i->2i,33)
-        @test_throws BoundsError map(foo, (), (1,))
-        @test_throws BoundsError map(foo, (1,), ())
+        @test map(foo, longtuple, vlongtuple) === ntuple(i->2i,20)
+        @test map(foo, vlongtuple, longtuple) === ntuple(i->2i,20)
+        @test map(foo, (), (1,)) === ()
+        @test map(foo, (1,), ()) === ()
     end
 
     @testset "n arguments" begin
@@ -276,8 +278,11 @@ end
         @test map(foo, (1,2,3,4), (1,2,3,4), (1,2,3,4)) === (3,6,9,12)
         @test map(foo, longtuple, longtuple, longtuple) === ntuple(i->3i,20)
         @test map(foo, vlongtuple, vlongtuple, vlongtuple) === ntuple(i->3i,33)
-        @test_throws BoundsError map(foo, (), (1,), (1,))
-        @test_throws BoundsError map(foo, (1,), (1,), ())
+        @test map(foo, vlongtuple, longtuple, longtuple) === ntuple(i->3i,20)
+        @test map(foo, longtuple, vlongtuple, longtuple) === ntuple(i->3i,20)
+        @test map(foo, longtuple, vlongtuple, vlongtuple) === ntuple(i->3i,20)
+        @test map(foo, (), (1,), (1,)) === ()
+        @test map(foo, (1,), (1,), ()) === ()
     end
 end
 
@@ -783,3 +788,11 @@ namedtup = (;a=1, b=2, c=3)
 # Make sure that tuple iteration is foldable
 @test Core.Compiler.is_foldable(Base.infer_effects(iterate, Tuple{NTuple{4, Float64}, Int}))
 @test Core.Compiler.is_foldable(Base.infer_effects(eltype, Tuple{Tuple}))
+
+# some basic equivalence handling tests for Union{} appearing in Tuple Vararg parameters
+@test Tuple{} <: Tuple{Vararg{Union{}}}
+@test Tuple{Int} <: Tuple{Int, Vararg{Union{}}}
+@test_throws ErrorException("Tuple field type cannot be Union{}") Tuple{Int, Vararg{Union{},1}}
+@test_throws ErrorException("Tuple field type cannot be Union{}") Tuple{Vararg{Union{},1}}
+@test Tuple{} <: Tuple{Vararg{Union{},N}} where N
+@test !(Tuple{} >: Tuple{Vararg{Union{},N}} where N)
diff --git a/test/worlds.jl b/test/worlds.jl
index 39a9dc4d9a788..b5a8f1c5449ac 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -233,23 +233,10 @@ function method_instance(f, types=Base.default_tt(f))
     m = which(f, types)
     inst = nothing
     tt = Base.signature_type(f, types)
-    specs = m.specializations
-    if isa(specs, Nothing)
-    elseif isa(specs, Core.SimpleVector)
-        for i = 1:length(specs)
-            mi = specs[i]
-            if mi isa Core.MethodInstance
-                if mi.specTypes <: tt && tt <: mi.specTypes
-                    inst = mi
-                    break
-                end
-            end
-        end
-    else
-        Base.visit(specs) do mi
-            if mi.specTypes === tt
-                inst = mi
-            end
+    for mi in Base.specializations(m)
+        if mi.specTypes <: tt && tt <: mi.specTypes
+            inst = mi
+            break
         end
     end
     return inst
@@ -408,3 +395,27 @@ wc_aiw2 = get_world_counter()
 @test Base.invoke_in_world(wc_aiw2, f_inworld, 2) == "world two; x=2"
 @test Base.invoke_in_world(wc_aiw1, g_inworld, 2, y=3) == "world one; x=2, y=3"
 @test Base.invoke_in_world(wc_aiw2, g_inworld, 2, y=3) == "world two; x=2, y=3"
+
+# logging
+mc48954(x, y) = false
+mc48954(x::Int, y::Int) = x == y
+mc48954(x::Symbol, y::Symbol) = x == y
+function mcc48954(container, y)
+    x = container[1]
+    return mc48954(x, y)
+end
+
+mcc48954(Any[1], 1)
+mc48954i = method_instance(mc48954, (Any, Int))
+mcc48954i = method_instance(mcc48954, (Vector{Any}, Int))
+list48954 = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
+mc48954(x::AbstractFloat, y::Int) = x == y
+ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
+@test list48954 == [
+    mcc48954i,
+    1,
+    mc48954i,
+    "jl_method_table_insert",
+    which(mc48954, (AbstractFloat, Int)),
+    "jl_method_table_insert"
+]