From 2f72704bcd1db87da0caaf62945c75d4e1be7323 Mon Sep 17 00:00:00 2001 From: Rebecca Turner Date: Thu, 29 Aug 2024 10:42:02 -0700 Subject: [PATCH] Use response files for `ghc` invocations Before this change, `cabal` could fail with the following error message when building very large Haskell packages: ``` ghc: createProcess: posix_spawnp: resource exhausted (Argument list too long) ``` This is because when the number of modules or dependencies grows large enough, then the `ghc` command line can potentially exceed the `ARG_MAX` command line length limit. However, `ghc` supports response files in order to work around these sorts of command line length limitations, so this change enables the use of those response files. Note that this requires taking a special precaution to not pass RTS options to the response file because there's no way that `ghc` can support RTS options via the response file. The reason why is because the Haskell runtime processes these options (not `ghc`), so if you store the RTS options in the response file then `ghc`'s command line parser won't know what to do with them. This means that `ghc` commands can still potentially fail if the RTS options get long enough, but this is less likely to occur in practice since RTS options tend to be significantly smaller than non-RTS options. This also requires skipping the response file if the first argument is `--interactive`. See the corresponding code comment which explains why in more detail. Co-Authored-By: Gabriella Gonzales --- Cabal/src/Distribution/Simple/GHC.hs | 2 + .../Simple/GHC/Build/ExtraSources.hs | 16 ++- .../src/Distribution/Simple/GHC/Build/Link.hs | 33 ++++- .../Distribution/Simple/GHC/Build/Modules.hs | 31 +++-- Cabal/src/Distribution/Simple/Program/GHC.hs | 117 ++++++++++++++++-- changelog.d/pr-9367 | 21 ++++ 6 files changed, 191 insertions(+), 29 deletions(-) create mode 100644 changelog.d/pr-9367 diff --git a/Cabal/src/Distribution/Simple/GHC.hs b/Cabal/src/Distribution/Simple/GHC.hs index db475a01dd5..cb1bd207b16 100644 --- a/Cabal/src/Distribution/Simple/GHC.hs +++ b/Cabal/src/Distribution/Simple/GHC.hs @@ -627,6 +627,8 @@ startInterpreter verbosity progdb comp platform packageDBs = do } checkPackageDbStack verbosity comp packageDBs (ghcProg, _) <- requireProgram verbosity ghcProgram progdb + -- This doesn't pass source file arguments to GHC, so we don't have to worry + -- about using a response file here. runGHC verbosity ghcProg comp platform Nothing replOpts -- ----------------------------------------------------------------------------- diff --git a/Cabal/src/Distribution/Simple/GHC/Build/ExtraSources.hs b/Cabal/src/Distribution/Simple/GHC/Build/ExtraSources.hs index 31aa92a3b2a..f2ca9aba02f 100644 --- a/Cabal/src/Distribution/Simple/GHC/Build/ExtraSources.hs +++ b/Cabal/src/Distribution/Simple/GHC/Build/ExtraSources.hs @@ -23,6 +23,7 @@ import Distribution.Simple.GHC.Build.Modules import Distribution.Simple.GHC.Build.Utils import Distribution.Simple.LocalBuildInfo import Distribution.Simple.Program.Types +import Distribution.Simple.Setup.Common (commonSetupTempFileOptions) import Distribution.System (Arch (JavaScript), Platform (..)) import Distribution.Types.ComponentLocalBuildInfo import Distribution.Utils.Path @@ -176,7 +177,17 @@ buildExtraSources sources = viewSources (targetComponent targetInfo) comp = compiler lbi platform = hostPlatform lbi - runGhcProg = runGHC verbosity ghcProg comp platform + tempFileOptions = commonSetupTempFileOptions $ buildingWhatCommonFlags buildingWhat + runGhcProg = + runGHCWithResponseFile + "ghc.rsp" + Nothing + tempFileOptions + verbosity + ghcProg + comp + platform + mbWorkDir buildAction :: SymbolicPath Pkg File -> IO () buildAction sourceFile = do @@ -219,7 +230,7 @@ buildExtraSources compileIfNeeded :: GhcOptions -> IO () compileIfNeeded opts = do needsRecomp <- checkNeedsRecompilation mbWorkDir sourceFile opts - when needsRecomp $ runGhcProg mbWorkDir opts + when needsRecomp $ runGhcProg opts createDirectoryIfMissingVerbose verbosity True (i odir) case targetComponent targetInfo of @@ -251,6 +262,7 @@ buildExtraSources DynWay -> compileIfNeeded sharedSrcOpts ProfWay -> compileIfNeeded profSrcOpts ProfDynWay -> compileIfNeeded profSharedSrcOpts + -- build any sources if (null sources || componentIsIndefinite clbi) then return mempty diff --git a/Cabal/src/Distribution/Simple/GHC/Build/Link.hs b/Cabal/src/Distribution/Simple/GHC/Build/Link.hs index ef9f33d79c9..27b418d9119 100644 --- a/Cabal/src/Distribution/Simple/GHC/Build/Link.hs +++ b/Cabal/src/Distribution/Simple/GHC/Build/Link.hs @@ -98,6 +98,7 @@ linkOrLoadComponent clbi = buildCLBI pbci isIndef = componentIsIndefinite clbi mbWorkDir = mbWorkDirLBI lbi + tempFileOptions = commonSetupTempFileOptions $ buildingWhatCommonFlags what -- See Note [Symbolic paths] in Distribution.Utils.Path i = interpretSymbolicPathLBI lbi @@ -188,10 +189,25 @@ linkOrLoadComponent -- exports. when (case component of CLib lib -> null (allLibModules lib clbi); _ -> False) $ warn verbosity "No exposed modules" - runReplOrWriteFlags ghcProg lbi replFlags replOpts_final (pkgName (PD.package pkg_descr)) target + runReplOrWriteFlags + ghcProg + lbi + replFlags + replOpts_final + (pkgName (PD.package pkg_descr)) + target _otherwise -> let - runGhcProg = runGHC verbosity ghcProg comp platform mbWorkDir + runGhcProg = + runGHCWithResponseFile + "ghc.rsp" + Nothing + tempFileOptions + verbosity + ghcProg + comp + platform + mbWorkDir platform = hostPlatform lbi comp = compiler lbi get_rpaths ways = @@ -730,8 +746,19 @@ runReplOrWriteFlags ghcProg lbi rflags ghcOpts pkg_name target = common = configCommonFlags $ configFlags lbi mbWorkDir = mbWorkDirLBI lbi verbosity = fromFlag $ setupVerbosity common + tempFileOptions = commonSetupTempFileOptions common in case replOptionsFlagOutput (replReplOptions rflags) of - NoFlag -> runGHC verbosity ghcProg comp platform mbWorkDir ghcOpts + NoFlag -> + runGHCWithResponseFile + "ghc.rsp" + Nothing + tempFileOptions + verbosity + ghcProg + comp + platform + mbWorkDir + ghcOpts Flag out_dir -> do let uid = componentUnitId clbi this_unit = prettyShow uid diff --git a/Cabal/src/Distribution/Simple/GHC/Build/Modules.hs b/Cabal/src/Distribution/Simple/GHC/Build/Modules.hs index 2e8ba35ccb6..fd86820f9fb 100644 --- a/Cabal/src/Distribution/Simple/GHC/Build/Modules.hs +++ b/Cabal/src/Distribution/Simple/GHC/Build/Modules.hs @@ -137,20 +137,29 @@ buildHaskellModules numJobs ghcProg mbMainFile inputModules buildTargetDir neede | BuildRepl{} <- what = True | otherwise = False - -- TODO: do we need to put hs-boot files into place for mutually recursive - -- modules? FIX: what about exeName.hi-boot? + -- TODO: do we need to put hs-boot files into place for mutually recursive + -- modules? FIX: what about exeName.hi-boot? - -- Determine if program coverage should be enabled and if so, what - -- '-hpcdir' should be. - let isCoverageEnabled = if isLib then libCoverage lbi else exeCoverage lbi - hpcdir way - | forRepl = mempty -- HPC is not supported in ghci - | isCoverageEnabled = Flag $ Hpc.mixDir (coerceSymbolicPath $ coerceSymbolicPath buildTargetDir extraCompilationArtifacts) way - | otherwise = mempty + -- Determine if program coverage should be enabled and if so, what + -- '-hpcdir' should be. + isCoverageEnabled = if isLib then libCoverage lbi else exeCoverage lbi + hpcdir way + | forRepl = mempty -- HPC is not supported in ghci + | isCoverageEnabled = Flag $ Hpc.mixDir (coerceSymbolicPath $ coerceSymbolicPath buildTargetDir extraCompilationArtifacts) way + | otherwise = mempty - let mbWorkDir = mbWorkDirLBI lbi - runGhcProg = runGHC verbosity ghcProg comp platform mbWorkDir + tempFileOptions = commonSetupTempFileOptions $ buildingWhatCommonFlags what + runGhcProg = + runGHCWithResponseFile + "ghc.rsp" + Nothing + tempFileOptions + verbosity + ghcProg + comp + platform + mbWorkDir platform = hostPlatform lbi (hsMains, scriptMains) = diff --git a/Cabal/src/Distribution/Simple/Program/GHC.hs b/Cabal/src/Distribution/Simple/Program/GHC.hs index 86320cc9472..1f1239eef56 100644 --- a/Cabal/src/Distribution/Simple/Program/GHC.hs +++ b/Cabal/src/Distribution/Simple/Program/GHC.hs @@ -16,6 +16,7 @@ module Distribution.Simple.Program.GHC , ghcInvocation , renderGhcOptions , runGHC + , runGHCWithResponseFile , packageDbArgsDb , normaliseGhcArgs ) where @@ -32,8 +33,10 @@ import Distribution.Simple.Compiler import Distribution.Simple.Flag import Distribution.Simple.GHC.ImplInfo import Distribution.Simple.Program.Find (getExtraPathEnv) +import Distribution.Simple.Program.ResponseFile import Distribution.Simple.Program.Run import Distribution.Simple.Program.Types +import Distribution.Simple.Utils (TempFileOptions, infoNoWrap) import Distribution.System import Distribution.Types.ComponentId import Distribution.Types.ParStrat @@ -42,17 +45,19 @@ import Distribution.Utils.Path import Distribution.Verbosity import Distribution.Version +import GHC.IO.Encoding (TextEncoding) import Language.Haskell.Extension import Data.List (stripPrefix) import qualified Data.Map as Map import Data.Monoid (All (..), Any (..), Endo (..)) import qualified Data.Set as Set +import qualified System.Process as Process normaliseGhcArgs :: Maybe Version -> PackageDescription -> [String] -> [String] normaliseGhcArgs (Just ghcVersion) PackageDescription{..} ghcArgs | ghcVersion `withinRange` supportedGHCVersions = - argumentFilters . filter simpleFilters . filterRtsOpts $ ghcArgs + argumentFilters . filter simpleFilters . filterRtsArgs $ ghcArgs where supportedGHCVersions :: VersionRange supportedGHCVersions = orLaterVersion (mkVersion [8, 0]) @@ -162,18 +167,9 @@ normaliseGhcArgs (Just ghcVersion) PackageDescription{..} ghcArgs flagArgumentFilter ["-ghci-script", "-H", "-interactive-print"] - filterRtsOpts :: [String] -> [String] - filterRtsOpts = go False - where - go :: Bool -> [String] -> [String] - go _ [] = [] - go _ ("+RTS" : opts) = go True opts - go _ ("-RTS" : opts) = go False opts - go isRTSopts (opt : opts) = addOpt $ go isRTSopts opts - where - addOpt - | isRTSopts = id - | otherwise = (opt :) + -- \| Remove RTS arguments from a list. + filterRtsArgs :: [String] -> [String] + filterRtsArgs = snd . splitRTSArgs simpleFilters :: String -> Bool simpleFilters = @@ -647,6 +643,81 @@ runGHC verbosity ghcProg comp platform mbWorkDir opts = do runProgramInvocation verbosity =<< ghcInvocation verbosity ghcProg comp platform mbWorkDir opts +runGHCWithResponseFile + :: FilePath + -> Maybe TextEncoding + -> TempFileOptions + -> Verbosity + -> ConfiguredProgram + -> Compiler + -> Platform + -> Maybe (SymbolicPath CWD (Dir Pkg)) + -> GhcOptions + -> IO () +runGHCWithResponseFile fileNameTemplate encoding tempFileOptions verbosity ghcProg comp platform maybeWorkDir opts = do + invocation <- ghcInvocation verbosity ghcProg comp platform maybeWorkDir opts + + let compilerSupportsResponseFiles = + case compilerCompatVersion GHC comp of + -- GHC 9.4 is the first version which supports response files. + Just version -> version >= mkVersion [9, 4] + Nothing -> False + + args = progInvokeArgs invocation + + -- Don't use response files if the first argument is `--interactive`, for + -- two related reasons. + -- + -- `hie-bios` relies on a hack to intercept the command-line that `Cabal` + -- supplies to `ghc`. Specifically, `hie-bios` creates a script around + -- `ghc` that detects if the first option is `--interactive` and if so then + -- instead of running `ghc` it prints the command-line that `ghc` was given + -- instead of running the command: + -- + -- https://github.com/haskell/hie-bios/blob/ce863dba7b57ded20160b4f11a487e4ff8372c08/wrappers/cabal#L7 + -- + -- … so we can't store that flag in the response file, otherwise that will + -- break. However, even if we were to add a special-case to keep that flag + -- out of the response file things would still break because `hie-bios` + -- stores the arguments to `ghc` that the wrapper script outputs and reuses + -- them later. That breaks if you use a response file because it will + -- store an argument like `@…/ghc36000-0.rsp` which is a temporary path + -- that no longer exists after the wrapper script completes. + -- + -- The work-around here is that we don't use a response file at all if the + -- first argument (and only the first argument) to `ghc` is + -- `--interactive`. This ensures that `hie-bios` and all downstream + -- utilities (e.g. `haskell-language-server`) continue working. + -- + -- + useResponseFile = + case args of + "--interactive" : _ -> False + _ -> compilerSupportsResponseFiles + + if not useResponseFile + then runProgramInvocation verbosity invocation + else do + let (rtsArgs, otherArgs) = splitRTSArgs args + + withResponseFile + verbosity + tempFileOptions + fileNameTemplate + encoding + otherArgs + $ \responseFile -> do + let newInvocation = + invocation{progInvokeArgs = ('@' : responseFile) : rtsArgs} + + infoNoWrap verbosity $ + "GHC response file arguments: " + <> case otherArgs of + [] -> "" + arg : args' -> Process.showCommandForUser arg args' + + runProgramInvocation verbosity newInvocation + ghcInvocation :: Verbosity -> ConfiguredProgram @@ -960,6 +1031,26 @@ packageDbArgs implInfo | flagPackageConf implInfo = packageDbArgsConf | otherwise = packageDbArgsDb +-- | Split a list of command-line arguments into RTS arguments and non-RTS +-- arguments. +splitRTSArgs :: [String] -> ([String], [String]) +splitRTSArgs args = + let addRTSArg arg ~(rtsArgs, nonRTSArgs) = (arg : rtsArgs, nonRTSArgs) + addNonRTSArg arg ~(rtsArgs, nonRTSArgs) = (rtsArgs, arg : nonRTSArgs) + + go _ [] = ([], []) + go isRTSArg (arg : rest) = + case arg of + "+RTS" -> addRTSArg arg $ go True rest + "-RTS" -> addRTSArg arg $ go False rest + "--RTS" -> ([arg], rest) + "--" -> ([], arg : rest) + _ -> + if isRTSArg + then addRTSArg arg $ go isRTSArg rest + else addNonRTSArg arg $ go isRTSArg rest + in go False args + -- ----------------------------------------------------------------------------- -- Boilerplate Monoid instance for GhcOptions diff --git a/changelog.d/pr-9367 b/changelog.d/pr-9367 new file mode 100644 index 00000000000..75c4da78f43 --- /dev/null +++ b/changelog.d/pr-9367 @@ -0,0 +1,21 @@ +--- +synopsis: "Use response files when calling `ghc`" +packages: [Cabal] +prs: 9367 +--- + +Cabal now passes GHC arguments through a response file. This prevents an error +where very large packages would fail to build due to the module names exceeding +the `ARG_MAX` command-line length limit: + +``` +ghc: createProcess: posix_spawnp: resource exhausted (Argument list too long) +``` + +Notes: +- Response files will not be used if the first argument to `ghc` is + `--interactive`. +- RTS options (like `+RTS -H32m -S -RTS`) will not be included in response + files. +- For reproducing commands after Cabal exits, `--keep-temp-files` can be used + to prevent the response files from being deleted.