diff --git a/stactools_cli/stactools/cli/commands/merge.py b/stactools_cli/stactools/cli/commands/merge.py index 821b06f2..02239fce 100644 --- a/stactools_cli/stactools/cli/commands/merge.py +++ b/stactools_cli/stactools/cli/commands/merge.py @@ -8,7 +8,9 @@ def merge(source_catalog, target_catalog, collection_id=None, move_assets=False, - ignore_conflicts=False): + ignore_conflicts=False, + as_child=False, + child_folder=None): source = pystac.read_file(source_catalog) target = pystac.read_file(target_catalog) @@ -20,7 +22,8 @@ def merge(source_catalog, 'A collection with ID {} does not exist in {}'.format( collection_id, target_catalog)) - merge_all_items(source, target, move_assets, ignore_conflicts) + merge_all_items(source, target, move_assets, ignore_conflicts, as_child, + child_folder) target.save() @@ -43,12 +46,24 @@ def create_merge_command(cli): help=('If there are conflicts with an item in both catalogs having ' 'the same asset key, do not error, leave the original asset ' 'from the target catalog in place.')) + @click.option( + '-c', + '--as-child', + is_flag=True, + help='Merge as child catalog of destination catalog or collection') + @click.option('-f', + '--child-folder', + help=('The subfolder name to copy to if the option to merge ' + 'as a child is used. If not provided, the catalog id ' + 'will be used')) def merge_command(source_catalog, target_catalog, collection, move_assets, - ignore_conflicts): + ignore_conflicts, as_child, child_folder): merge(source_catalog, target_catalog, collection_id=collection, move_assets=move_assets, - ignore_conflicts=ignore_conflicts) + ignore_conflicts=ignore_conflicts, + as_child=as_child, + child_folder=child_folder) return merge_command diff --git a/stactools_core/stactools/core/merge.py b/stactools_core/stactools/core/merge.py index 03132f2b..51f29847 100644 --- a/stactools_core/stactools/core/merge.py +++ b/stactools_core/stactools/core/merge.py @@ -5,8 +5,8 @@ from pystac.utils import (is_absolute_href, make_relative_href) from shapely.geometry import shape, mapping -from stactools.core.copy import (move_asset_file_to_item, move_assets as - do_move_assets) +from stactools.core.copy import (move_asset_file_to_item, copy_catalog, + move_assets as do_move_assets) def merge_items(source_item, @@ -62,7 +62,9 @@ def merge_items(source_item, def merge_all_items(source_catalog, target_catalog, move_assets=False, - ignore_conflicts=False): + ignore_conflicts=False, + as_child=False, + child_folder=None): """Merge all items from source_catalog into target_catalog. Calls merge_items on any items that have the same ID between the two catalogs. @@ -71,47 +73,61 @@ def merge_all_items(source_catalog, new items. Args: - source_catalog (Catalog or Colletion): The catalog or collection that items + source_catalog (Catalog or Collection): The catalog or collection that items will be drawn from to merge into the target catalog. This catalog is not mutated in this operation. - target_item (Catalog or Colletion): The target catalog that will be merged into. + target_item (Catalog or Collection): The target catalog that will be merged into. This catalog will not be mutated in this operation. move_assets (bool): If true, move the asset files alongside the target item. ignore_conflicts (bool): If True, assets with the same keys will not be merged, and asset files that would be moved to overwrite an existing file will not be moved. If False, either of these situations will throw an error. + as_child (bool): If True, a child catalog will be added with the content of the + source catalog. Otherwise, items will be added directly to the destination + catalog. + child_folder (str): name of the subfolder to use in case the as_child option is + set to True. If None, the id of the catalog will be used as folder name. Returns: - Catalog or Colletion: The target_catalog + Catalog or Collection: The target_catalog """ source_items = source_catalog.get_all_items() ids_to_items = {item.id: item for item in source_items} - for item in target_catalog.get_all_items(): - source_item = ids_to_items.get(item.id) - if source_item is not None: - merge_items(source_item, - item, - move_assets=move_assets, - ignore_conflicts=ignore_conflicts) - del ids_to_items[item.id] - - # Process source items that did not match existing target items - layout_strategy = BestPracticesLayoutStrategy() parent_dir = os.path.dirname(target_catalog.get_self_href()) - for item in ids_to_items.values(): - item_copy = item.clone() - item_copy.set_self_href( - layout_strategy.get_item_href(item_copy, parent_dir)) - target_catalog.add_item(item_copy) - - if isinstance(target_catalog, pystac.Collection): - item_copy.set_collection(target_catalog) - else: - item_copy.set_collection(None) + if as_child: + child_dir = os.path.join(parent_dir, child_folder or source_catalog.id) + copy_catalog(source_catalog, child_dir, source_catalog.catalog_type, + move_assets) + child_catalog_path = os.path.join( + child_dir, os.path.basename(source_catalog.get_self_href())) + source_catalog = pystac.read_file(child_catalog_path) + target_catalog.add_child(source_catalog, source_catalog.title) + else: + for item in target_catalog.get_all_items(): + source_item = ids_to_items.get(item.id) + if source_item is not None: + merge_items(source_item, + item, + move_assets=move_assets, + ignore_conflicts=ignore_conflicts) + del ids_to_items[item.id] + + # Process source items that did not match existing target items + layout_strategy = BestPracticesLayoutStrategy() + for item in ids_to_items.values(): + item_copy = item.clone() + item_copy.set_self_href( + layout_strategy.get_item_href(item_copy, parent_dir)) + target_catalog.add_item(item_copy) + + if isinstance(target_catalog, pystac.Collection): + item_copy.set_collection(target_catalog) + else: + item_copy.set_collection(None) - if move_assets: - do_move_assets(item_copy, copy=False) + if move_assets: + do_move_assets(item_copy, copy=False) if target_catalog.STAC_OBJECT_TYPE == pystac.STACObjectType.COLLECTION: target_catalog.update_extent_from_items() diff --git a/tests/cli/commands/test_merge.py b/tests/cli/commands/test_merge.py index beb46817..b5485752 100644 --- a/tests/cli/commands/test_merge.py +++ b/tests/cli/commands/test_merge.py @@ -56,6 +56,21 @@ def test_merge_moves_assets(self): os.path.dirname(asset.get_absolute_href()), os.path.dirname(item.get_self_href())) + def test_merge_as_child(self): + with TemporaryDirectory() as tmp_dir: + col_paths = copy_two_planet_disaster_subsets(tmp_dir) + + cmd = ['merge', '-a', "-c", col_paths[0], col_paths[1]] + + self.run_command(cmd) + + target_col = pystac.read_file(col_paths[1]) + + links = list(target_col.get_child_links()) + self.assertEqual(2, len(links)) + for child in links: + self.assertTrue(os.path.exists(child.get_absolute_href())) + def test_merge_updates_collection_extent(self): with TemporaryDirectory() as tmp_dir: col_paths = copy_two_planet_disaster_subsets(tmp_dir)