-
Notifications
You must be signed in to change notification settings - Fork 6
/
zip-downstream-fork.py
executable file
·1451 lines (1249 loc) · 57.9 KB
/
zip-downstream-fork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python
#
# This tool takes a repository containing monorepo history, rewritten
# subproject fork histories (done by migrate-downstream-fork.py) along
# with the revmap produced by migrage-downstream-fork.py, an
# "umbrella" history consisting of submodule updates from subprojects
# and rewrites the umbrella history so that the submodule updates are
# "inlined" directly from the rewritten subproject histories. The
# result is a history that interleaves rewritten subproject commits
# (zips them) according to the submodules updates, making it appear as
# if the commits were originally against the monorepo in the order
# implied by the umbrella history.
#
# Any non-LLVM submodules will be retained in their directories as
# they appear in the umbrella history.
#
# Usage:
#
# First, prepare a repository by following the instructions in
# migrate-downstream-fork.py. Pass --revmap-out=<file> to create a
# mapping from old downstream commits to new downstream commits.
#
# Then add umbrella history:
# git remote add umbrella https://...
#
# Be sure to add the history from any non-llvm submodules:
#
# for submodule in ${my_non_llvm_submodule_list[@]}; do
# git remote add ${submodule} $(my_submodule_url ${submodule})
# done
#
# Pull it all down:
# git fetch --all
#
# Then, run this script:
# zip-downstream-fork.py refs/remotes/umbrella --revmap-in=<file> \
# --subdir=<dir> [--submodule-map=<file>] \
# [--revmap-out=<file>]
#
# --subdir specified where to rewrite trees (directories and files)
# that are not part of a submodule. Things such as top-level READMEs,
# build scripts, etc. will appear under <dir>. This is to avoid
# possible conflicts with top-level entries in the upstream monorepo.
#
# The option --submodule-map=<file> is useful if your submodule layout
# is different from the monorepo layout. By default the tool assumes
# project submodules exist at the top level of the umbrella history
# (e.g. in the same relative path as in the monorepo). Use
# --submodule-map if your layout differs. The map file should contain
# a mapping from submodule path to monorepo path, one mapping per
# line. If a submodule path doesn't exist in the map, it is assumed
# to map to the same path in the monorepo.
#
# For example, if your layout looks like this:
#
# <root>
# local-sources
# upstream-sources
# clang
# compiler-rt
# llvm
#
# then your submodule map file (submodule-map.txt) would look like
# this:
#
# upstream-sources/clang clang
# upstream-sources/compiler-rt compiler-rt
# upstream-sources/llvm llvm
#
# and you would invoke the tools as:
#
# zip-downstream-fork.py refs/remotes/umbrella --revmap-in=$file \
# --subdir=<dir> \
# --submodule-map=submodule-map.txt
#
# Note that the mapping simply maps matching umbrella path names to
# monorepo paths. There is no requirement that the umbrella path end
# with the same name as the monorepo path. If your clang is imported
# under fizzbin, simply tell the mapper that:
#
# fizzbin clang
#
# The mapper can also move third-party submodules to new places:
#
# my-top-level-tool third-party/my-tool
#
# With --revmap-out=<file> the tool will dump a map from original
# umbrella commit hash to rewritten umbrella commit hash.
#
# On the rewriting of trees and parents:
#
# The tool takes care to preserve the proper history for upstream
# monorepo bits that do not participate in the submodule process. For
# example, say the umbrella history looks like this:
#
# * (HEAD -> umbrella/master) Update submodule clang to FOO
# |
# * Update submodule llvm to BAR
# |
# | * (HEAD -> llvm/local) Do commit BAR in llvm
# | |
# | | * (HEAD -> clang/local) Do commit FOO in clang
# | | |
# * | | Downstream umbrella work
# | | |
# llvm clang
#
# The umbrella history updates submodules from commits in local copies
# of llvm and clang. Note that the llvm and clang histories have not
# yet been rewritten against the monorepo.
#
# Let's also say when the local llvm and clang branches are rewritten
# against the monorepo (by migrate-downstream-fork.py), it looks
# something like this:
#
# * (HEAD -> monorepo/master) Do commit XYZZY in lld
# |
# | * (HEAD -> monorepo-llvm/local) Do commit BAR in llvm
# | |
# | /
# |/
# * Do commit BAZ in compiler-rt
# |
# * Do commit QUUZ in clang
# |
# | * (HEAD -> monorepo-clang/local) Do commit FOO in clang
# | |
# | /
# |/
# * Do commit QUUX in compiler-rt
# |
# * (tag: llvmorg-10.0.0) Do commit GARPLY in clang
# |
#
# The commits from compiler-rt come from upstream (no local work
# exists for compiler-rt) but commits BAR and BAZ exist in local
# histories for llvm and clang which were rewritten against the
# upstream monorepo (i.e. they are in branches off monorepo/master or
# some other point).
#
# The tool rewrites parents to indicate which tree was used for an
# inlined submodule commit:
#
# * (HEAD -> monorepo/master) Do commit XYZZY in lld
# |
# | * (HEAD -> zip/master) Do commit FOO in clang
# | |\
# | * \ Do commit BAR in llvm
# | / |
# |/ |
# * | Do commit BAZ in compiler-rt
# | /
# * / Do commit QUUZ in clang
# | /
# | /
# |/
# * Do commit QUUX in compiler-rt
# |
# * (tag: llvmorg-10.0.0) Do commit GARPLY in clang
# |
#
# The edge from compiler-rt/QUUX to zip/master appears redundant (it
# was supposedly merged along with compiler-rt/BAZ). However,
# according to the submodule history, clang/FOO should be paired with
# llvm/BAR. clang/FOO is based on clang/GARPLY and any files not
# touched by clang/FOO will reflect their state at clang/GARPLY, not
# their state at clang/QUUZ. Therefore, the tool keeps the edge from
# compiler-rt/QUUX as a visual reminder of the state of the tree. The
# script favors preserving submodule updates and their trees as they
# appeared in the umbrella history rather than trying to merge local
# changes into the latest version of a tree.
#
# The tool takes care to correct write trees for subprojects not
# participating in the umbrella history. Given the above graph, a
# naive tree rewriting would result in compiler-rt being written
# incorrectly, resulting in compiler-rt/QUUX at zip/master rather than
# the proper compiler-rt/BAZ. This is because monorepo-clang/FOO
# incorpates the tree from compiler-rt/QUUX
#
# The script attempts to get this right by tracking the most recent
# upstream commit that has been merged from the monorepo along each
# zipped branch. If a submodule update brings in an older tree from
# the monorepo that doesn't participate in submodule history, that
# tree is discarded in favor of the more recent tree. This means that
# the script assumes there is a total topological ordering among
# upstream commits brought in via submodule updates. For example, the
# script will abort if trying to create a history like this:
#
# * (HEAD -> zip/master)
# /|
# / |
# * | (HEAD -> llvm/local)
# / |
# / |
# * | (HEAD -> monorepo/master
# | |
# | * (HEAD -> clang/local)
# | /
# | /
# | * (HEAD -> monorepo/branch1)
# | /
# |/
# * XYZ work
# |
#
# llvm/local and clang/local are based off divergent branches of the
# monorepo and there is no total topological order among them. It is
# not clear which monorepo tree should be used for other subprojects
# (compiler-rt, etc.). In this case the script aborts with an error
# indicating the commit would create such a merge point.
#
# If there are downstream commits that are not inlined into the
# zippped history (because submodule updates did not directly bring
# them in), their parents will be rewritten to point to inlined
# commits. For example, given this umbrella history with llvm/GRAULT
# not updated in the umbrella:
#
# * (HEAD -> umbrella/master) Update submodule clang to FOO
# |
# * Update submodule llvm to BAR
# |
# | * (HEAD -> llvm/local) Do commit BAR in llvm
# | |
# | * Do commit GRAULT in llvm
# | |
# | * Do commit WALDO in llvm
# | |
# | | * (HEAD -> clang/local) Do commit FOO in clang
# | | |
# * | | Downstream umbrella work
# | | |
# llvm clang
#
# With the monorepo history from above, the zipped history will look
# like this:
#
# * (HEAD -> monorepo/master) Do commit XYZZY in lld
# |
# | * (HEAD -> zip/master) Do commit FOO in clang
# | |\
# | * \ Do commit BAR in llvm
# | | |
# | * | Do commit GRAULT in llvm
# | | |
# | * | Do commit WALDO in llvm
# | / |
# |/ |
# * | Do commit BAZ in compiler-rt
# | /
# * / Do commit QUUZ in clang
# | /
# | /
# |/
# * Do commit QUUX in compiler-rt
# |
# * (tag: llvmorg-10.0.0) Do commit GARPLY in clang
# |
#
# Parent rewriting happens in two stages. Stage1 updates the parents
# of commits that were inlined into the zippped history. Stage2
# updates the parents of commits that were not inlined. The reason
# this must happen in two stages is that in stage1 we are traversing
# the old umbrella history. The only other commits we see are those
# from the gitlinks pointing to submodule commits we want to inline
# and we use the trees of those commits to get the right blobs into
# the rewritten umbrella commits, effectively inlining the gitlinks.
# The new umbrella commits are the only commits where we can update
# parents in stage1. In stage2, the new umbrella (zipped) commits now
# have parents pointing to submodule commits that were not inlined
# (thanks to the parent rewriting in stage1) and thus we will see
# those commits in stage2 and can rewrite their parents.
#
# On the rewriting of tags
#
# With the --update-tags option, the script will rewrite any tags
# pointing to inlined submodule commits to point at the new inlined
# commit. No attempt is made to distinguish upstream tags from local
# tags. Therefore, rewriting could be surprising, as in this example:
#
# * (HEAD -> umbrella/master) Get upstream clang/GARPLY
# |
# * Update submodule llvm to BAR
# |
# | * (HEAD -> llvm/local) Do commit BAR in llvm
# | |
# | | * (HEAD -> clang/local) Do commit FOO in clang
# | | |
# * | | Downstream umbrella work
# | | |
# llvm clang
#
# * (HEAD -> monorepo/master) Do commit XYZZY in lld
# |
# | * (HEAD -> zip/master) (tag: llvmorg-10.0.0) Getupstream clang/GARPLY
# | |\
# | * \ Do commit BAR in llvm
# | / |
# |/ |
# * | Do commit BAZ in compiler-rt
# | |
# * | Do commit QUUZ in clang
# | /
# | /
# | /
# * / Do commit QUUX in compiler-rt
# |/
# * Do commit GARPLY in clang (previously tagged llvmorg-10.0.0)
# |
#
# The umbrella pulled in a commit directly from upstream which
# happened to have a tag associated with it and so when it was inlined
# into the zipped history with --update-tags, the tag was rewritten to
# point to the inlined commit. This is almost certainly not what is
# wanted, which is why rewriting tags is an optional feature.
# However, this is probably an uncommon occurrence and it is generally
# safe and correct to use --update-tags. If upstream tags happen to
# be rewritten it is always possible to move the tag back to its
# correct location.
#
# TODO/Limitations:
#
# - The script requires a history with submodule updates. It should
# be fairly straightforward to enhance the script to take a revlist
# directly, ordering the commits according to the revlist. Such a
# revlist could be generated from an umbrella history or via
# site-specific mechanisms. This would be passed to
# fast_filter_branch.py directly, rather than generating a list via
# expand_ref_pattern(self.reflist) in Zipper.run as is currently
# done. Changes would need to be made to fast_filter_branch.py to
# accept a revlist to process directly, bypassing its invocation of
# git rev-list within do_filter.
#
# - Submodule removal is not handled at all. A third-party subproject
# will continue to exist though no updates to it will be made. This
# could by added by judicial use of fast_filter_branch.py's
# TreeEntry.remove_entry. For projects managed by upstream (clang,
# llvm, etc.), if a commit doesn't include a submodule (because it
# was removed), the subproject tree is taken from the upstream
# monorepo tree just as it is for upstream projects not
# participating in the umbrella history.
#
import argparse
import fast_filter_branch
import os
import re
import subprocess
import sys
def expand_ref_pattern(patterns):
return subprocess.check_output(
["git", "for-each-ref", "--format=%(refname)"] + patterns
).split("\n")[:-1]
class Zipper:
"""Destructively zip a submodule umbrella repository."""
def __init__(self, new_upstream_prefix, revmap_in_file, revmap_out_file,
reflist, debug, abort_bad_submodule, no_rewrite_commit_msg,
subdir, submodule_map_file, update_tags, old_upstream_prefix):
if not new_upstream_prefix.endswith('/'):
new_upstream_prefix = new_upstream_prefix + '/'
if not old_upstream_prefix.endswith('/'):
old_upstream_prefix = old_upstream_prefix + '/'
# Options
self.new_upstream_prefix = new_upstream_prefix
self.old_upstream_prefix = old_upstream_prefix
self.revmap_in_file = revmap_in_file
self.revmap_out_file = revmap_out_file
self.reflist = reflist
self.dbg = debug
self.new_upstream_hashes = set()
self.abort_bad_submodule = abort_bad_submodule
self.no_rewrite_commit_msg = no_rewrite_commit_msg
self.subdir = subdir
self.update_tags = update_tags
# Filter state
# Latest merged upstream parents for each submodule, indexed by
# old umbrella parent.
self.merged_upstream_parents = {}
# Latest merged downstream parents for each submodule, indexed by
# old umbrella parent.
self.merged_downstream_parents = {}
# Map from old downstream commit hash to new downstream commit
# hash.
self.revap = {}
# Most-recently-merged commit of each submodule, indexed by old
# umbrella parents.
self.prev_submodules = {}
# Map from old umbrella commit to the upstream commit used for the
# base tree of the corresponding new commit.
self.base_tree_map = {}
# Map from old umbrella commit hash or original submodule commit
# to map from submodule path to the original submodule commit
# tree. Used to determine which submodules were inlined into each
# umbrella commit.
self.submodule_tree_map = {}
# Set of hashes of monorepo commits inlined into umbrella commits.
self.inlined_submodule_commits = set()
# Map from original downstream or umbrella commit to its final
# commit in the zippped history. Used for rewriting tags.
self.tag_revmap = {}
# Map from monorepo-rewritten submodule commit hash to new
# umbrella commit hash/mark where it was inlined. Used for
# updating parents of non-inlined submodule commits.
self.inlined_submodule_revmap = {}
# Map from new umbrella commit/hash to new submodule commit hashes
# inlined into it.
self.stage1_submodule_reverse_revmap = {}
# Map from old umbrella commit hash to new umbrella commit
# hash/mark after stage1.
self.stage1_umbrella_revmap = {}
# Map from old umbrella commit hash to new umbrella commit
# hash/mark after stage2.
self.stage2_umbrella_revmap = {}
# Map from new umbrella commit hash/mark to old umbrella commit
# hash.
self.stage1_umbrella_old_revmap = {}
if submodule_map_file:
with open(submodule_map_file) as f:
self.submodule_map = dict(line.split() for line in f)
else:
subprojects = ['clang',
'clang-tools-extra',
'compiler-rt',
'debuginfo-tests',
'libclc',
'libcxx',
'libcxxabi',
'libunwind',
'lld',
'lldb',
'llgo',
'llvm',
'openmp',
'parallel-libs',
'polly',
'pstl']
self.submodule_map = dict((s, s) for s in subprojects)
def debug(self, msg):
if self.dbg:
print msg
sys.stdout.flush
def gather_upstream_commits(self):
"""Walk all refs under new_upstream_prefix and record hashes."""
new_refs = expand_ref_pattern([self.new_upstream_prefix])
if not new_refs:
raise Exception("No refs matched new upstream prefix %s" % self.new_upstream_prefix)
# Save the set of git hashes for the new monorepo.
self.new_upstream_hashes = set(subprocess.check_output(['git', 'rev-list'] + new_refs).split('\n')[:-1])
old_refs = expand_ref_pattern([self.old_upstream_prefix])
if not old_refs:
raise Exception("No refs matched old upstream prefix %s" % self.old_upstream_prefix)
# Save the set of git hashes for the new monorepo.
self.old_upstream_hashes = set(subprocess.check_output(['git', 'rev-list'] + old_refs).split('\n')[:-1])
def find_submodules_in_entry(self, githash, tree, path):
"""Figure out which submodules/submodules commit an existing tree references.
Returns [([submodule pathsegs], commit_hash)], or [] if there are
no submodule updates to submodules we care about. commit_hash is
a reference to the commit pointed to by the submodule gitlink.
Recurses on subentries and submodules.
"""
subentries = tree.get_subentries(self.fm)
submodules = []
for name, e in subentries.iteritems():
if e.mode == '160000':
# A commit; this is a submodule gitlink.
try:
commit = self.fm.get_commit(e.githash)
except:
# It can happen that a submodule update refers to a commit
# that no longer exists. This is usually the result of user
# error with a submodule update to a commit not reachable by
# any branch in the subproject. We almost always want to
# skip these, but ask the user to make sure. If they don't
# want to skip it, then we really don't know what to do and
# the user will have to fix things up and try again.
print 'WARNING: No commit %s for submodule %s in commit %s' % (e.githash, name, githash)
if self.abort_bad_submodule:
raise Exception('No commit %s for submodule %s in commit %s' % (e.githash, name, githash))
continue
else:
# Recurse on the submodule to see if there are other
# submodules referenced by it.
submodule_path = list(path)
submodule_path.append(name)
submodule_entry = (submodule_path, e.githash)
submodules.append(submodule_entry)
submodules.extend(self.find_submodules_in_entry(e.githash,
commit.get_tree_entry(),
submodule_path))
elif e.mode == '40000':
subpath = list(path)
subpath.append(name)
submodules.extend(self.find_submodules_in_entry(githash, e, subpath))
return submodules
def find_submodules(self, commit, githash):
"""Figure out which submodules/submodule commits an existing commit references.
Returns [([submodule pathsegs], hash)], or [] if there are no submodule
updates to submodules we care about. Recurses the tree structure.
"""
return self.find_submodules_in_entry(githash, commit.get_tree_entry(), [])
def clear_tree(self, tree):
"""Remove all entries from tree"""
subentries = tree.get_subentries(self.fm).items()
for name, entry in subentries:
tree = tree.remove_entry(self.fm, name)
return tree
def is_mark(self, mark):
if mark.startswith(':'):
return True
return False
def is_ancestor(self, potential_ancestor, potential_descendent):
if self.is_mark(potential_ancestor):
raise Exception('Cannot check ancestry of mark %s' % potential_ancestor)
if self.is_mark(potential_descendent):
raise Exception('Cannot check ancestry of mark %s' % potential_descendent)
return subprocess.call(["git", "merge-base", "--is-ancestor",
potential_ancestor, potential_descendent]) == 0
def is_same_or_ancestor(self, potential_ancestor, potential_descendent):
if self.is_mark(potential_ancestor):
raise Exception('Cannot check ancestry of mark %s' % potential_ancestor)
if self.is_mark(potential_descendent):
raise Exception('Cannot check ancestry of mark %s' % potential_descendent)
if potential_ancestor == potential_descendent:
return True
return self.is_ancestor(potential_ancestor, potential_descendent)
def list_is_ancestor(self, potential_ancestors, potential_descendent):
for potential_ancestor in potential_ancestors:
if not self.is_ancestor(potential_ancestor, potential_descendent):
return False
return True
def is_same_or_ancestor_of_any(self, potential_ancestor, potential_descendents):
for potential_descendent in potential_descendents:
if self.is_same_or_ancestor(potential_ancestor, potential_descendent):
return potential_descendent
return None
def get_latest_upstream_commit(self, githash, submodules, candidates):
"""Determine which of candidates has the upstream tree we want."""
if not candidates:
return None
result, result_path = candidates[0]
if len(candidates) == 1:
return result
for candidate, path in candidates[1:]:
self.debug("%s %s is_ancestor %s %s\n" % (result_path, result, path, candidate))
if self.is_ancestor(result, candidate):
result, result_path = [candidate, path] # Candidate is newer
elif not self.is_ancestor(candidate, result):
# Neither is an ancestor of the other. This must be a case
# where the umbrella repository has updates from two different
# upstream branches. We don't handle this yet as it would
# require merging the trees.
warnstr = "Commit %s %s: no order between (%s %s)\n\n" % (githash, path,
result, candidate)
for pathsegs, oldhash in submodules:
errpath = '/'.join(pathsegs)
errstr += "%s %s\n" % (errpath, oldhash)
print('WARNING: %s' % warnstr)
return None
return result
def remove_submodules(self, tree, submodule_paths, parent_path):
for pathsegs in submodule_paths:
entry = tree.get_path(self.fm, pathsegs)
if entry and entry.mode == '160000':
self.debug('Removing submodule %s from %s' % ('/'.join(pathsegs),
parent_path))
tree = tree.remove_path(self.fm, pathsegs)
return tree
def record_stage1_mappings(self, newhash, oldhash, updated_submodules,
inlined_submodules):
"""Record the mapping of the original umbrella commit and map
submodule update hashes to newhash so tags know where to
point.
"""
# Map the original commit to the new zippped commit.
mapped_newhash = self.fm.get_mark(newhash)
self.debug('Mapping umbrella %s to %s' % (oldhash, mapped_newhash))
self.stage1_umbrella_revmap[oldhash] = mapped_newhash
self.stage1_umbrella_old_revmap[mapped_newhash] = oldhash
# Map the submodule commit to the new zipped commit so we can
# update tags. These mappings will be used in stage2 to
# eventually map the original submodule commit to its final commit
# in zipped history.
self.debug('Updated submodules %s' % updated_submodules)
self.debug('Inlined submodules %s' % inlined_submodules)
for pathsegs, oldshash, newshash in inlined_submodules:
path = '/'.join(pathsegs)
self.debug('Mapping inlined submodule %s %s to %s' %
(path, newshash, mapped_newhash))
self.inlined_submodule_revmap[newshash] = mapped_newhash
reverse_submodule_mapping = self.stage1_submodule_reverse_revmap.get(mapped_newhash)
if reverse_submodule_mapping:
reverse_submodule_mapping.add(newshash)
else:
reverse_submodule_mapping = set()
reverse_submodule_mapping.add(newshash)
self.stage1_submodule_reverse_revmap[mapped_newhash] = reverse_submodule_mapping
return None
def record_stage2_mappings(self, newziphash, oldziphash):
"""Record the mapping of the original umbrella commit and map
submodule update hashes to newhash so tags know where to
point.
"""
mapped_oldziphash = self.fm.get_mark(oldziphash)
mapped_newziphash = self.fm.get_mark(newziphash)
# If this is an original umbrella commit, map it to the stage2
# zippped commit.
orig_umbrella_hash = self.stage1_umbrella_old_revmap.get(mapped_oldziphash)
if orig_umbrella_hash:
self.debug('Mapping original umbrella %s to %s' %
(orig_umbrella_hash, mapped_newziphash))
self.stage2_umbrella_revmap[orig_umbrella_hash] = newziphash
self.tag_revmap[orig_umbrella_hash] = newziphash
migrate_commit_hash = self.revmap.get(orig_umbrella_hash)
if migrate_commit_hash:
# This is a commit in a subproject used as the umbrella. It
# was rewritten to migrate_commit_hash by
# migrate-downstream-fork.py. That commit migt have tags
# associated with it, so map it to the new zipped commit.
self.debug('%s is migrated project commit, mapping to %s' %
(orig_umbrella_hash, newziphash))
self.tag_revmap[migrate_commit_hash] = newziphash
# Map submodule commits inlined into this umbrella from their
# original downstream commits to the new zippped commit.
reverse_submodule_mapping = self.stage1_submodule_reverse_revmap.get(mapped_oldziphash)
if reverse_submodule_mapping:
for newshash in reverse_submodule_mapping:
self.debug('Mapping inlined submodule downstream commit %s to %s' %
(newshash, mapped_newziphash))
self.inlined_submodule_revmap[newshash] = newziphash
self.tag_revmap[newshash] = newziphash
else:
# Otherwise, this is a downstream commit that may or may not be
# linked into the final zipped history. If it was inlined, it
# will not be linked in (children will have their parent
# rewritten to the zippped commit where it was inlined) and its
# mapping for the purpose of tag rewriting was done above when
# its corresponding umbrella commit was processed. If it was
# not inlined, record the mapping from its original downstream
# commit to its new downstream commit (they may differ due to
# parent rewriting).
if not oldziphash in self.inlined_submodule_commits:
self.debug('Mapping non-inlined downstream %s to %s' %
(mapped_oldziphash, mapped_newziphash))
self.stage2_umbrella_revmap[mapped_oldziphash] = newziphash
# Map this non-inlined downstream commit to newziphash to we can
# update tags.
self.tag_revmap[mapped_oldziphash] = newziphash
else:
self.debug('%s is inlined submodule, not mapping' % oldziphash)
return None
def get_base_tree_commit_hash(self, fm, githash, commit, oldparents, submodules):
"""Determine the base tree for the rewritten umbrella commit"""
# The content of the commit should be the combination of the
# content from the submodules and elements from the monorepo tree
# not updated by submodules. The tricky part is figuring out
# which monorepo tree that should be.
# Check to see if this commit is actually a monorepo-rewritten
# commit. If it is, use it as the base tree. This happens if an
# upstream project has submodules added to it.
mapped_githash = self.revmap.get(githash)
if mapped_githash:
self.debug('Using mapped umbrella commit %s as base tree' % mapped_githash)
return mapped_githash
# Check all of the upstream ancestors and see which is the
# earliest.
commits_to_check = []
# Add the merge base from the umbrella's parents to the candidate
# list. Also check for upstream parents which are also
# candidates.
for op in oldparents:
self.debug('Checking umbrella parent %s for merge base' % op)
parent_merge_base = self.base_tree_map.get(op)
if parent_merge_base:
self.debug('Adding parent merge base %s to merge base set' % parent_merge_base)
commits_to_check.append([parent_merge_base, '.'])
mapped_op = self.revmap.get(op)
if mapped_op:
# The umbrella commit itself has a monorepo-rewritten parent.
# This can happen if submodules were added to an upstream
# project.
self.debug('Adding monorepo parent %s to merge base set' % mapped_op)
commits_to_check.append([mapped_op, '.'])
for pathsegs, oldhash in submodules:
path='/'.join(pathsegs)
self.debug('Found submodule (%s, %s)' % (path, oldhash))
# Get the hash of the monorepo-rewritten commit corresponding to
# the submodule update.
newhash = self.revmap.get(oldhash, oldhash)
self.debug('New hash: %s' % newhash)
if newhash in self.new_upstream_hashes:
self.debug("Upstream submodule update to %s\n" % newhash)
commits_to_check.append([newhash, path])
newcommit = self.fm.get_commit(newhash)
self.debug('%s\n' % newcommit.msg)
for parent in newcommit.parents:
if parent in self.new_upstream_hashes:
# This submodule has an upstream parent. It is a candidate
# for the base tree.
self.debug("Upstream parent %s\n" % parent)
commits_to_check.append([parent, path])
result = self.get_latest_upstream_commit(githash, submodules,
commits_to_check)
if not result:
raise Exception('Umbrella incorprated submodules from multiple monorepo branches')
self.debug('Using commit %s as base tree' % result)
return result
def submodule_was_added_or_updated(self, oldparents, submodule_path,
submodule_oldhash):
"""Return whether submodule_oldhash represents an addition of a new
submodule or an update of an existing submodule."""
# If submodule_oldhash matches any submodule along oldparents,
# this is not a submodule add or update.
for op in oldparents:
prev_submodules_map = self.prev_submodules.get(op)
if prev_submodules_map:
prev_submodule_hash = prev_submodules_map.get(submodule_path)
if prev_submodule_hash and prev_submodule_hash == submodule_oldhash:
return False
return True
def get_updated_or_added_submodules(self, githash, commit, oldparents,
submodules):
"""Return a list of (submodule, oldhash, newhash) for each submodule
that was newly added or updated in this commit."""
prev_submodules_map = {}
for op in oldparents:
self.prev_submodules[op] = set()
updated_submodules = []
for pathsegs, oldhash in submodules:
path='/'.join(pathsegs)
if self.submodule_was_added_or_updated(oldparents, path, oldhash):
# Get the hash of the monorepo-rewritten commit corresponding to
# the submodule update.
newhash = self.revmap.get(oldhash, oldhash)
updated_submodules.append((pathsegs, oldhash, newhash))
# Record the submodule state for this commit.
self.prev_submodules[githash] = submodules
self.debug('Updated or added submodules: %s' % updated_submodules)
return updated_submodules
def submodule_tree_in_old_parents(self, path, submodule_tree, parents):
"""Return whether submodule_tree appears in any of parents."""
for p in parents:
parent_commit = self.fm.get_commit(p)
parent_tree = parent_commit.get_tree_entry()
if not parent_tree:
raise Exception('Could not find submodule %s in old parent %s' % (path, p))
if parent_tree == submodule_tree:
self.debug('submodule tree %s' % str(submodule_tree))
self.debug('parent tree %s' % str(parent_tree))
self.debug('%s tree in old parent %s' % (path, p))
return True
return False
def submodule_tree_in_umbrella_parents(self, pathsegs, submodule_tree,
parents):
"""Return whether submodule_tree was written into any of parents."""
path = '/'.join(pathsegs)
for p in parents:
parent_submodule_tree_map = self.submodule_tree_map.get(p)
if parent_submodule_tree_map:
parent_submodule_tree = parent_submodule_tree_map.get(path)
if parent_submodule_tree and parent_submodule_tree == submodule_tree:
self.debug('submodule tree %s' % str(submodule_tree))
self.debug('parent tree %s' % str(parent_submodule_tree))
self.debug('%s tree in umbrella parent %s' % (path, p))
return True
return False
def get_inlined_submodules(self, githash, commit, oldparents,
updated_submodules):
"""Return a list of (submodule, oldhash, newhash) for each submodule
that will be inlined into this commit. This differs from
updated or added submodules in that updated or added means the
tree for the submodule changes from the previous zipped commit
while inlined means the tree differs from all parents,
including any downstream parents."""
inlined_submodules = []
for pathsegs, oldhash, newhash in updated_submodules:
path='/'.join(pathsegs)
newcommit = self.fm.get_commit(newhash)
submodule_tree = newcommit.get_tree_entry()
if not submodule_tree:
raise Exception('Could not find submodule %s in submodule commit %s' %
(path, newhash))
# Check previously-merged upstream and downstream parents. If
# their submodule tree is the same as the updated submodule,
# then the submodule was NOT inlined in this commit.
merged_parents = []
for op in oldparents:
upstream_map = self.merged_upstream_parents.get(op)
if upstream_map:
upstream_parents = upstream_map.get(path)
if upstream_parents:
merged_parents.extend(upstream_parents)
downstream_map = self.merged_downstream_parents.get(op)
if downstream_map:
downstream_parents = downstream_map.get(path)
if downstream_parents:
merged_parents.extend(downstream_parents)
if not self.submodule_tree_in_old_parents(path, submodule_tree, merged_parents):
if not self.submodule_tree_in_umbrella_parents(pathsegs, submodule_tree, oldparents):
self.debug('Inlined %s %s' % (path, newhash))
self.inlined_submodule_commits.add(newhash)
inlined_submodules.append((pathsegs, oldhash, newhash))
self.debug('Inlined submodules: %s' % inlined_submodules)
return inlined_submodules
def update_merged_parents(self, githash, submodules):
"""Record the upstream and downstream parents of updated
submodules."""
self.merged_upstream_parents[githash] = {}
self.merged_downstream_parents[githash] = {}
for pathsegs, oldhash, in submodules:
path='/'.join(pathsegs)
# Get the hash of the monorepo-rewritten commit corresponding to
# the submodule update.
newhash = self.revmap.get(oldhash, oldhash)
# Get the monorepo-rewritten submodule commit.
newcommit = self.fm.get_commit(newhash)
upstream_parents = []
downstream_parents = []
for p in newcommit.parents:
if p in self.new_upstream_hashes:
upstream_parents.append(p)
continue
downstream_parents.append(p)
# Also include the submodule commit itself.
if newhash in self.new_upstream_hashes:
upstream_parents.append(newhash)
else:
downstream_parents.append(newhash)
self.merged_upstream_parents[githash][path] = upstream_parents
self.merged_downstream_parents[githash][path] = downstream_parents
def determine_parents(self, fm, githash, commit, oldparents, submodules,
updated_submodules):
# Rewrite existing new parents. If the umbrella is actually an
# upstream project that's had submodules added to it, then this
# commit and its parents are actually split commits, not monorepo
# commits. Otherwise commit is a rewritten umbrella commit and
# its parents were already rewritten.
parents = []
rewritten_or_upstream_parents = []
for np in commit.parents:
# Sometimes fast_filter_branch sets a parent to a mark even if
# the parent is an upstream monorepo commit. We want the real
# commit hash if it's available.
np_hash = self.fm.get_mark(np)
mapped_np = self.revmap.get(np_hash)
if mapped_np:
parents.append(mapped_np)
rewritten_or_upstream_parents.append(mapped_np)
else:
parents.append(np)
if np_hash in self.new_upstream_hashes:
rewritten_or_upstream_parents.append(np_hash)
# Check submodules that were added or updated. If their commits
# have parents not already included, add them.
submodule_upstream_parent_candidates = []
for pathsegs, oldhash, newhash in updated_submodules:
path='/'.join(pathsegs)
newcommit = self.fm.get_commit(newhash)
# Gather previously-merged upstream and downstream parents.
merged_upstream_parents = []
merged_downstream_parents = []
for op in oldparents:
upstream_map = self.merged_upstream_parents.get(op)
if upstream_map:
upstream_parents = upstream_map.get(path)
if upstream_parents:
merged_upstream_parents.extend(upstream_parents)
downstream_map = self.merged_downstream_parents.get(op)
if downstream_map:
downstream_parents = downstream_map.get(path)
if downstream_parents:
merged_downstream_parents.extend(downstream_parents)
for p in newcommit.parents:
if p in self.new_upstream_hashes:
# This is a rewritten upstream commit.
maybe_descendent = self.is_same_or_ancestor_of_any(p, merged_upstream_parents)
if maybe_descendent:
self.debug('Filtering submodule %s upstream parent %s which is ancestor of %s' %
(path, p, maybe_descendent))
continue
self.debug('Add submodule %s upstream parent %s' % (path, p))
parents.append(p)
continue