Skip to content

Commit

Permalink
Updates based on testing live
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrigsby-sc committed Apr 22, 2021
1 parent ac7639e commit 3cd0e4d
Showing 1 changed file with 24 additions and 25 deletions.
49 changes: 24 additions & 25 deletions batch-setup/make_meta_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from tilequeue.store import make_s3_tile_key_generator
from ModestMaps.Core import Coordinate
from multiprocessing import Pool
import time
import sys

MissingTiles = namedtuple('MissingTiles', 'low_zoom_file high_zoom_file')
Expand Down Expand Up @@ -149,7 +150,7 @@ def missing_tiles_split(self, split_zoom, zoom_max, big_jobs):

self.read_metas_to_file(missing_meta_file, compress=True)

print("Splitting into high and low zoom lists")
print("[%s] Splitting into high and low zoom lists" % (time.ctime()))

# contains zooms 0 until group zoom. the jobs between the group
# zoom and RAWR zoom are merged into the parent at group zoom.
Expand Down Expand Up @@ -190,11 +191,13 @@ def missing_tiles_split(self, split_zoom, zoom_max, big_jobs):
for coord in missing_high:
fh.write(serialize_coord(coord) + "\n")

print("[%s] Done splitting into high and low zoom lists" % (time.ctime()))
yield MissingTiles(missing_low_file, missing_high_file)

finally:
shutil.rmtree(tmpdir)


@contextmanager
def present_tiles(self):
"""
Expand Down Expand Up @@ -265,7 +268,7 @@ def __init__(self, default_mem_gb=8, spec_dict={}):
"10/12/3": {"mem_gb": 0.3, "order": 10}}
"""
self.default_mem_gb = default_mem_gb
self.spec_dict = {}
self.spec_dict = spec_dict

@staticmethod
def from_ordering_file(filename, default_mem_gb=8):
Expand All @@ -285,12 +288,12 @@ def from_ordering_file(filename, default_mem_gb=8):
for row in reader:
try:
# TODO: add checks for keys
coord = Coordinate(row['zoom'], row['x'], row['y'])
spec_dict[serialize_coord(coord)] = \
{TileSpecifier.MEM_GB_KEY: row[TileSpecifier.MEM_GB_KEY], TileSpecifier.ORDER_KEY: order_idx}
coord_str = serialize_coord(Coordinate(int(row['y']), int(row['x']), int(row['zoom'])))
spec_dict[coord_str] = \
{TileSpecifier.MEM_GB_KEY: float(row[TileSpecifier.MEM_GB_KEY]), TileSpecifier.ORDER_KEY: order_idx}
order_idx += 1
except:
print("Error for line parsed as %s in TileSpecifier.from_ordering_file" % row)
print("Error for line parsed as %s in TileSpecifier.from_ordering_file - details %s" % (row, sys.exc_info()[0]))
continue

return TileSpecifier(default_mem_gb, spec_dict)
Expand All @@ -301,16 +304,16 @@ def reorder(self, coord_list):
coords that are in the coord_list that aren't mentioned in the ordering will go first.
:return:
"""
return sorted(coord_list, lambda coord: self.get_ordering_val(coord))
return sorted(coord_list, key=lambda coord: self.get_ordering_val(coord))

def get_ordering_val(self, coord):
"""
coord is type string
returns ordering location for coord. The lower it is the earlier in the order.
If there is no ordering specified for this coordinate, returns 0
"""
key = serialize_coord(coord)
if key in self.spec_dict:
return self.spec_dict[key][self.ORDER_KEY]
if coord in self.spec_dict:
return self.spec_dict[coord][self.ORDER_KEY]

return 0

Expand Down Expand Up @@ -378,24 +381,20 @@ def enqueue_tiles(config_file, tile_list_file, check_metatile_exists, retry_numb
cfg = make_config_from_argparse(fh)

with open(tile_list_file, 'r') as tile_list:
coord_lines = tile_list.readlines()
coord_lines = [line.strip() for line in tile_list.readlines()]

reordered_lines = tile_specifier.reorder(coord_lines)

if retry_number == 0:
overprovision_multiplier = 1.2 # overprovision by 20% at the start
else:
overprovision_multiplier = (2.0 ** retry_number) # double memory each time we retry

max_mem_mb = 32 * 1024
overprovision_multiplier = 1.2 # overprovision by 20%
print("[%s] Starting to enqueue %d tile batches" % (time.ctime(), len(reordered_lines)))
for coord_line in reordered_lines:
# override memory requirements for this job with what the tile_specifier tells us
cfg["batch"]["memory"] = max(int(tile_specifier.get_mem_reqs_mb(coord_line, overprovision_multiplier)),
max_mem_mb)
memory_mb = int(tile_specifier.get_mem_reqs_mb(coord_line, overprovision_multiplier))
cfg.yml["batch"]["memory"] = memory_mb

args = BatchEnqueueArgs(config_file, coord_line, None)
args = BatchEnqueueArgs(config_file, coord_line, None, None)
tilequeue_batch_enqueue(cfg, args)

print("[%s] Done enqueuing tile batches" % time.ctime())

# adaptor class for MissingTiles to see just the high zoom parts, this is used
# along with the LowZoomLense to loop over missing tiles generically but
Expand Down Expand Up @@ -429,6 +428,7 @@ def __init__(self, tile_finder, big_jobs, split_zoom, zoom_max, tile_specifier=T
self.split_zoom = split_zoom
self.zoom_max = zoom_max
self.allowed_missing_tiles = allowed_missing_tiles
self.tile_specifier = tile_specifier

def _missing(self):
return self.tile_finder.missing_tiles_split(
Expand All @@ -455,7 +455,7 @@ def render(self, num_retries, lense):
print("Enqueueing %d %s tiles (e.g. %s)" %
(count, lense.description, ', '.join(sample)))
enqueue_tiles(lense.config, missing_tile_file,
check_metatile_exists, retry_number, tile_specifier)
check_metatile_exists, retry_number, self.tile_specifier)

else:
with self._missing() as missing:
Expand All @@ -468,11 +468,10 @@ def render(self, num_retries, lense):


def create_tile_specifier(tile_specifier_file):
default_mem_gb=8
tile_specifier = TileSpecifier()
if tile_specifier_file is not None:
try:
tile_specifier = TileSpecifier.from_ordering_file(tile_specifier_filename, default_mem_gb=8)
tile_specifier = TileSpecifier.from_ordering_file(tile_specifier_file, default_mem_gb=8)
except:
print("Error creating TileSpecifier, will use the default. Error details: {}".format(sys.exc_info()[0]))

Expand Down Expand Up @@ -556,7 +555,7 @@ def create_tile_specifier(tile_specifier_file):
buckets.rawr, missing_bucket_date_prefix, args.key_format_type,
split_zoom, zoom_max, args.size_threshold)

tile_renderer = TileRenderer(tile_finder, big_jobs, split_zoom, zoom_max, args.allowed_missing_tiles, tile_specifier)
tile_renderer = TileRenderer(tile_finder, big_jobs, split_zoom, zoom_max, tile_specifier, args.allowed_missing_tiles)

tile_renderer.render(args.retries, LowZoomLense(args.low_zoom_config))

Expand Down

0 comments on commit 3cd0e4d

Please sign in to comment.