-
Notifications
You must be signed in to change notification settings - Fork 30
/
CPP.hs
348 lines (309 loc) · 12.4 KB
/
CPP.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
-- Copyright (c) Facebook, Inc. and its affiliates.
--
-- This source code is licensed under the MIT license found in the
-- LICENSE file in the root directory of this source tree.
--
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Retrie.CPP
( CPP(..)
, addImportsCPP
, parseCPPFile
, parseCPP
, printCPP
-- ** Internal interface exported for tests
, cppFork
) where
import Data.Char (isSpace)
import Data.Function (on)
import Data.Functor.Identity
import Data.List (nubBy, sortOn)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import Debug.Trace
import Retrie.ExactPrint
import Retrie.GHC
import Retrie.Replace
-- Note [CPP]
-- We can't just run the pre-processor on files and then rewrite them, because
-- the rewrites will apply to a module that never exists as code! Exactprint
-- has no support for roundtripping CPP, because the GHC parser doesn't
-- actually parse it (it looks for the pragma and then delegates to the
-- pre-processor).
--
-- To solve this, we instead generate all possible versions of the module
-- (exponential in the number of #if directives :-P). We then apply rewrites
-- to all versions, and collect all the 'Replacement's that they generate.
-- We can then use these to splice results back into the original file.
--
-- Suprisingly, this works. It depends on a few observations:
--
-- * We don't need to actually evaluate any CPP directives. This is because
-- we want all versions of the file.
--
-- * Since we don't need to evaluate, we can simply replace all CPP directives
-- with blank lines and the locations of all AST elements in each version of
-- the module will be exactly the same as in the original module. This is the
-- key to splicing properly.
--
-- * Replacements can be spliced in directly with no smarts about binders, etc,
-- because retrie did the instantiation during matching.
--
-- The CPP Type ----------------------------------------------------------------
data CPP a
= NoCPP a
| CPP Text [AnnotatedImports] [a]
instance Functor CPP where
fmap f (NoCPP x) = NoCPP (f x)
fmap f (CPP orig is xs) = CPP orig is (map f xs)
instance Foldable CPP where
foldMap f (NoCPP x) = f x
foldMap f (CPP _ _ xs) = foldMap f xs
instance Traversable CPP where
traverse f (NoCPP x) = NoCPP <$> f x
traverse f (CPP orig is xs) = CPP orig is <$> traverse f xs
addImportsCPP
:: [AnnotatedImports]
-> CPP AnnotatedModule
-> CPP AnnotatedModule
addImportsCPP is (NoCPP m) =
NoCPP $ runIdentity $ transformA m $ insertImports is
addImportsCPP is (CPP orig is' ms) = CPP orig (is++is') ms
-- Parsing a CPP Module --------------------------------------------------------
parseCPPFile
:: (FilePath -> String -> IO AnnotatedModule)
-> FilePath
-> IO (CPP AnnotatedModule)
parseCPPFile p fp =
-- read file strictly
Text.readFile fp >>= parseCPP (p fp)
parseCPP
:: Monad m
=> (String -> m AnnotatedModule)
-> Text -> m (CPP AnnotatedModule)
parseCPP p orig
| any isCPP (Text.lines orig) =
CPP orig [] <$> mapM (p . Text.unpack) (cppFork orig)
| otherwise = NoCPP <$> p (Text.unpack orig)
-- Printing a CPP Module -------------------------------------------------------
printCPP :: [Replacement] -> CPP AnnotatedModule -> String
printCPP _ (NoCPP m) = printA m
printCPP repls (CPP orig is ms) = Text.unpack $ Text.unlines $
case is of
[] -> splice "" 1 1 sorted origLines
_ ->
splice
(Text.unlines newHeader)
(length revHeader + 1)
1
sorted
(reverse revDecls)
where
sorted = sortOn fst
[ (r, replReplacement)
| Replacement{..} <- repls
, Just r <- [getRealSpan replLocation]
]
origLines = Text.lines orig
mbName = unLoc <$> hsmodName (unLoc $ astA $ head ms)
importLines = runIdentity $ fmap astA $ transformA (filterAndFlatten mbName is) $
mapM $ fmap (Text.pack . dropWhile isSpace . printA) . pruneA
p t = isImport t || isModule t || isPragma t
(revDecls, revHeader) = break p (reverse origLines)
newHeader = reverse revHeader ++ importLines
splice :: Text -> Int -> Int -> [(RealSrcSpan, String)] -> [Text] -> [Text]
splice _ _ _ _ [] = []
splice prefix _ _ [] (t:ts) = prefix <> t : ts
splice prefix l c rs@((r, repl):rs') ts@(t:ts')
| srcSpanStartLine r > l =
-- Next rewrite is not on this line. Output line.
prefix <> t : splice "" (l+1) 1 rs ts'
| srcSpanStartLine r < l || srcSpanStartCol r < c =
-- Next rewrite starts before current position. This happens when
-- the same rewrite is made in multiple versions of the CPP'd module.
-- Drop the duplicate rewrite and keep going.
splice prefix l c rs' ts
| (old, ln:lns) <- splitAt (srcSpanEndLine r - l) ts =
-- The next rewrite starts on this line.
let
start = srcSpanStartCol r
end = srcSpanEndCol r
prefix' = prefix <> Text.take (start - c) t <> Text.pack repl
ln' = Text.drop (end - c) ln
-- For an example of how this can happen, see the CPPConflict test.
errMsg = unlines
[ "Refusing to rewrite across CPP directives."
, ""
, "Location: " ++ locStr
, ""
, "Original:"
, ""
, Text.unpack orig
, ""
, "Replacement:"
, ""
, repl
]
orig =
Text.unlines $ (prefix <> t : drop 1 old) ++ [Text.take (end - c) ln]
locStr = unpackFS (srcSpanFile r) ++ ":" ++ show l ++ ":" ++ show start
in
if any isCPP old
then trace errMsg $ splice prefix l c rs' ts
else splice prefix' (srcSpanEndLine r) end rs' (ln':lns)
| otherwise = error "printCPP: impossible replacement past end of file"
-- Forking the module ----------------------------------------------------------
cppFork :: Text -> [Text]
cppFork = cppTreeToList . mkCPPTree
-- | Tree representing the module. Each #endif becomes a Node.
data CPPTree
= Node [Text] CPPTree CPPTree
| Leaf [Text]
-- | Stack type used to keep track of how many #ifs we are nested into.
-- Controls whether we emit lines into each version of the module.
data CPPBranch
= CPPTrue -- print until an 'else'
| CPPFalse -- print blanks until an 'else' or 'endif'
| CPPOmit -- print blanks until an 'endif'
-- | Build CPPTree from lines of the module.
mkCPPTree :: Text -> CPPTree
mkCPPTree = go False [] [] . reverse . Text.lines
-- We reverse the lines once up front, then process the module from bottom
-- to top, branching at #endifs. If we were to process from top to bottom,
-- we'd have to reverse each version later, rather than reversing the original
-- once. This also makes it easy to spot import statements and stop branching
-- since we don't care about differences in imports.
where
go :: Bool -> [CPPBranch] -> [Text] -> [Text] -> CPPTree
go _ _ suffix [] = Leaf suffix
go True [] suffix ls =
Leaf (blankifyAndReverse suffix ls) -- See Note [Imports]
go seenImport st suffix (l:ls) =
case extractCPPCond l of
Just If -> -- pops from stack
case st of
(_:st') -> emptyLine st'
[] -> error "mkCPPTree: if with empty stack"
Just ElIf -> -- stack same size
case st of
(CPPOmit:_) -> emptyLine st
(CPPFalse:st') -> emptyLine (CPPOmit:st')
(CPPTrue:st') -> -- See Note [ElIf]
let
omittedSuffix = replicate (length suffix) ""
in
Node
[]
(emptyLine (CPPOmit:st'))
(go seenImport (CPPTrue:st') ("":omittedSuffix) ls)
[] -> error "mkCPPTree: else with empty stack"
Just Else -> -- stack same size
case st of
(CPPOmit:_) -> emptyLine st
(CPPTrue:st') -> emptyLine (CPPFalse:st')
(CPPFalse:st') -> emptyLine (CPPTrue:st')
[] -> error "mkCPPTree: else with empty stack"
Just EndIf -> -- push to stack
case st of
(CPPOmit:_) -> emptyLine (CPPOmit:st)
(CPPFalse:_) -> emptyLine (CPPOmit:st)
_ ->
Node
suffix
(go seenImport (CPPTrue:st) [""] ls)
(go seenImport (CPPFalse:st) [""] ls)
Nothing -> -- stack same size
case st of
(CPPOmit:_) -> go seenImport' st ("":suffix) ls
(CPPFalse:_) -> go seenImport' st ("":suffix) ls
_ -> go seenImport' st (blankCPP l:suffix) ls
where
emptyLine st' = go seenImport st' ("":suffix) ls
seenImport' = seenImport || isImport l
blankifyAndReverse :: [Text] -> [Text] -> [Text]
blankifyAndReverse suffix [] = suffix
blankifyAndReverse suffix (l:ls) = blankifyAndReverse (blankCPP l:suffix) ls
-- Note [Imports]
-- If we have seen an import statement, and have an empty stack, that means all
-- conditionals above this point only control imports/exports, etc. Retrie
-- doesn't match in those places anyway, and the imports don't matter because
-- we only parse, no renaming. As a micro-optimization, we can stop branching.
-- This saves forking the module in the common case that CPP is used to choose
-- imports. We have to wait for stack to be empty because we might have seen an
-- import in one branch, but there is a decl in the other branch.
-- Note [ElIf]
-- The way we handle #elif is pretty subtle. Some observations:
-- If we're on the CPPOmit branch, keep omitting up to the next #if, like usual.
-- If we're on the CPPFalse branch, we didn't show the #elif, but either we
-- showed the #else, or this whole #if might not output anything. So either way,
-- we need to omit up to the next #if.
-- If we're on the CPPTrue branch, we definitely showed the #elif, so we need to
-- fork with a Node. One side of the branch omits up to the next #if. The other
-- side is as if we have omitted everything from the last #endif, and we
-- continue showing up from here. This will show whatever is above the #elif.
-- It is crucial we do this branching on the CPPTrue branch, so any #elif
-- above this point is also handled correctly.
-- | Expand CPPTree into 2^h-1 versions of the module.
cppTreeToList :: CPPTree -> [Text]
cppTreeToList t = go [] t []
where
go rest (Leaf suffix) = (Text.unlines (suffix ++ rest) :)
go rest (Node suffix l r) =
let rest' = suffix ++ rest -- right-nested
in go rest' l . go rest' r
-- Spotting CPP directives -----------------------------------------------------
data CPPCond = If | ElIf | Else | EndIf
extractCPPCond :: Text -> Maybe CPPCond
extractCPPCond t
| Just ('#',t') <- Text.uncons t =
case Text.words t' of
("if":_) -> Just If
("else":_) -> Just Else
("elif":_) -> Just ElIf
("endif":_) -> Just EndIf
_ -> Nothing
| otherwise = Nothing
blankCPP :: Text -> Text
blankCPP t
| isCPP t = ""
| otherwise = t
isCPP :: Text -> Bool
isCPP = Text.isPrefixOf "#"
isImport :: Text -> Bool
isImport = Text.isPrefixOf "import"
isModule :: Text -> Bool
isModule = Text.isPrefixOf "module"
isPragma :: Text -> Bool
isPragma = Text.isPrefixOf "{-#"
-------------------------------------------------------------------------------
-- This would make more sense in Retrie.Expr, but that creates an import cycle.
-- Ironic, I know.
insertImports
:: Monad m
=> [AnnotatedImports] -- ^ imports and their annotations
-> Located HsModule -- ^ target module
-> TransformT m (Located HsModule)
insertImports is (L l m) = do
imps <- graftA $ filterAndFlatten (unLoc <$> hsmodName m) is
let
deduped = nubBy (eqImportDecl `on` unLoc) $ hsmodImports m ++ imps
return $ L l m { hsmodImports = deduped }
filterAndFlatten :: Maybe ModuleName -> [AnnotatedImports] -> AnnotatedImports
filterAndFlatten mbName is =
runIdentity $ transformA (mconcat is) $ return . externalImps mbName
where
externalImps :: Maybe ModuleName -> [LImportDecl GhcPs] -> [LImportDecl GhcPs]
externalImps (Just mn) = filter ((/= mn) . unLoc . ideclName . unLoc)
externalImps _ = id
eqImportDecl :: ImportDecl GhcPs -> ImportDecl GhcPs -> Bool
eqImportDecl x y =
((==) `on` unLoc . ideclName) x y
&& ((==) `on` ideclQualified) x y
&& ((==) `on` ideclAs) x y
&& ((==) `on` ideclHiding) x y
&& ((==) `on` ideclPkgQual) x y
&& ((==) `on` ideclSource) x y
&& ((==) `on` ideclSafe) x y
-- intentionally leave out ideclImplicit and ideclSourceSrc
-- former doesn't matter for this check, latter is prone to whitespace issues