Skip to content

Commit

Permalink
Merge pull request #75 from volaya/merge_as_child
Browse files Browse the repository at this point in the history
add option to 'merge' command to merge as child catalog
  • Loading branch information
lossyrob authored Mar 25, 2021
2 parents e29546e + 7339b5c commit 01d157c
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 33 deletions.
23 changes: 19 additions & 4 deletions stactools_cli/stactools/cli/commands/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ def merge(source_catalog,
target_catalog,
collection_id=None,
move_assets=False,
ignore_conflicts=False):
ignore_conflicts=False,
as_child=False,
child_folder=None):
source = pystac.read_file(source_catalog)
target = pystac.read_file(target_catalog)

Expand All @@ -20,7 +22,8 @@ def merge(source_catalog,
'A collection with ID {} does not exist in {}'.format(
collection_id, target_catalog))

merge_all_items(source, target, move_assets, ignore_conflicts)
merge_all_items(source, target, move_assets, ignore_conflicts, as_child,
child_folder)

target.save()

Expand All @@ -43,12 +46,24 @@ def create_merge_command(cli):
help=('If there are conflicts with an item in both catalogs having '
'the same asset key, do not error, leave the original asset '
'from the target catalog in place.'))
@click.option(
'-c',
'--as-child',
is_flag=True,
help='Merge as child catalog of destination catalog or collection')
@click.option('-f',
'--child-folder',
help=('The subfolder name to copy to if the option to merge '
'as a child is used. If not provided, the catalog id '
'will be used'))
def merge_command(source_catalog, target_catalog, collection, move_assets,
ignore_conflicts):
ignore_conflicts, as_child, child_folder):
merge(source_catalog,
target_catalog,
collection_id=collection,
move_assets=move_assets,
ignore_conflicts=ignore_conflicts)
ignore_conflicts=ignore_conflicts,
as_child=as_child,
child_folder=child_folder)

return merge_command
74 changes: 45 additions & 29 deletions stactools_core/stactools/core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from pystac.utils import (is_absolute_href, make_relative_href)
from shapely.geometry import shape, mapping

from stactools.core.copy import (move_asset_file_to_item, move_assets as
do_move_assets)
from stactools.core.copy import (move_asset_file_to_item, copy_catalog,
move_assets as do_move_assets)


def merge_items(source_item,
Expand Down Expand Up @@ -62,7 +62,9 @@ def merge_items(source_item,
def merge_all_items(source_catalog,
target_catalog,
move_assets=False,
ignore_conflicts=False):
ignore_conflicts=False,
as_child=False,
child_folder=None):
"""Merge all items from source_catalog into target_catalog.
Calls merge_items on any items that have the same ID between the two catalogs.
Expand All @@ -71,47 +73,61 @@ def merge_all_items(source_catalog,
new items.
Args:
source_catalog (Catalog or Colletion): The catalog or collection that items
source_catalog (Catalog or Collection): The catalog or collection that items
will be drawn from to merge into the target catalog.
This catalog is not mutated in this operation.
target_item (Catalog or Colletion): The target catalog that will be merged into.
target_item (Catalog or Collection): The target catalog that will be merged into.
This catalog will not be mutated in this operation.
move_assets (bool): If true, move the asset files alongside the target item.
ignore_conflicts (bool): If True, assets with the same keys will not be merged,
and asset files that would be moved to overwrite an existing file
will not be moved. If False, either of these situations will throw an error.
as_child (bool): If True, a child catalog will be added with the content of the
source catalog. Otherwise, items will be added directly to the destination
catalog.
child_folder (str): name of the subfolder to use in case the as_child option is
set to True. If None, the id of the catalog will be used as folder name.
Returns:
Catalog or Colletion: The target_catalog
Catalog or Collection: The target_catalog
"""
source_items = source_catalog.get_all_items()
ids_to_items = {item.id: item for item in source_items}

for item in target_catalog.get_all_items():
source_item = ids_to_items.get(item.id)
if source_item is not None:
merge_items(source_item,
item,
move_assets=move_assets,
ignore_conflicts=ignore_conflicts)
del ids_to_items[item.id]

# Process source items that did not match existing target items
layout_strategy = BestPracticesLayoutStrategy()
parent_dir = os.path.dirname(target_catalog.get_self_href())
for item in ids_to_items.values():
item_copy = item.clone()
item_copy.set_self_href(
layout_strategy.get_item_href(item_copy, parent_dir))
target_catalog.add_item(item_copy)

if isinstance(target_catalog, pystac.Collection):
item_copy.set_collection(target_catalog)
else:
item_copy.set_collection(None)
if as_child:
child_dir = os.path.join(parent_dir, child_folder or source_catalog.id)
copy_catalog(source_catalog, child_dir, source_catalog.catalog_type,
move_assets)
child_catalog_path = os.path.join(
child_dir, os.path.basename(source_catalog.get_self_href()))
source_catalog = pystac.read_file(child_catalog_path)
target_catalog.add_child(source_catalog, source_catalog.title)
else:
for item in target_catalog.get_all_items():
source_item = ids_to_items.get(item.id)
if source_item is not None:
merge_items(source_item,
item,
move_assets=move_assets,
ignore_conflicts=ignore_conflicts)
del ids_to_items[item.id]

# Process source items that did not match existing target items
layout_strategy = BestPracticesLayoutStrategy()
for item in ids_to_items.values():
item_copy = item.clone()
item_copy.set_self_href(
layout_strategy.get_item_href(item_copy, parent_dir))
target_catalog.add_item(item_copy)

if isinstance(target_catalog, pystac.Collection):
item_copy.set_collection(target_catalog)
else:
item_copy.set_collection(None)

if move_assets:
do_move_assets(item_copy, copy=False)
if move_assets:
do_move_assets(item_copy, copy=False)

if target_catalog.STAC_OBJECT_TYPE == pystac.STACObjectType.COLLECTION:
target_catalog.update_extent_from_items()
Expand Down
15 changes: 15 additions & 0 deletions tests/cli/commands/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,21 @@ def test_merge_moves_assets(self):
os.path.dirname(asset.get_absolute_href()),
os.path.dirname(item.get_self_href()))

def test_merge_as_child(self):
with TemporaryDirectory() as tmp_dir:
col_paths = copy_two_planet_disaster_subsets(tmp_dir)

cmd = ['merge', '-a', "-c", col_paths[0], col_paths[1]]

self.run_command(cmd)

target_col = pystac.read_file(col_paths[1])

links = list(target_col.get_child_links())
self.assertEqual(2, len(links))
for child in links:
self.assertTrue(os.path.exists(child.get_absolute_href()))

def test_merge_updates_collection_extent(self):
with TemporaryDirectory() as tmp_dir:
col_paths = copy_two_planet_disaster_subsets(tmp_dir)
Expand Down

0 comments on commit 01d157c

Please sign in to comment.