Skip to content

Commit

Permalink
Merge pull request #284 from hotosm/revert-282-feature/post-processing
Browse files Browse the repository at this point in the history
Revert "Feature/post processing"
  • Loading branch information
kshitijrajsharma authored Dec 9, 2024
2 parents bcd1d66 + cf06114 commit f445df8
Show file tree
Hide file tree
Showing 17 changed files with 39 additions and 1,045 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,3 @@ Pipfile.lock
#backend
backend/data
backend/.env

.DS_Store
85 changes: 6 additions & 79 deletions API/api_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

# Reader imports
from src.app import CustomExport, PolygonStats, RawData, S3FileTransfer
from src.post_processing.processor import PostProcessor
from src.config import ALLOW_BIND_ZIP_FILTER
from src.config import CELERY_BROKER_URL as celery_broker_uri
from src.config import CELERY_RESULT_BACKEND as celery_backend
Expand All @@ -40,7 +39,6 @@
RawDataCurrentParams,
RawDataOutputType,
)
from src.post_processing.processor import PostProcessor

if ENABLE_SOZIP:
# Third party imports
Expand Down Expand Up @@ -77,12 +75,7 @@ def create_readme_content(default_readme, polygon_stats):


def zip_binding(
working_dir,
exportname_parts,
geom_dump,
polygon_stats,
geojson_stats,
default_readme,
working_dir, exportname_parts, geom_dump, polygon_stats, default_readme
):
logging.debug("Zip Binding Started!")
upload_file_path = os.path.join(
Expand All @@ -95,9 +88,6 @@ def zip_binding(
),
}

if geojson_stats:
additional_files["stats.json"] = geojson_stats

for name, content in additional_files.items():
temp_path = os.path.join(working_dir, name)
with open(temp_path, "w") as f:
Expand Down Expand Up @@ -219,60 +209,11 @@ def process_raw_data(self, params, user=None):
file_parts,
)

# Post-proccessing: Generate GeoJSON/HTML stats and transliterations
polygon_stats = None
geojson_stats_html = None
geojson_stats_json = None
download_html_url = None
if "include_stats" or "include_translit" in params.dict():
post_processor = PostProcessor(
{
"include_stats": params.include_stats,
"include_translit": params.include_translit,
}
)

if params.include_stats:
post_processor.filters = params.filters

post_processor.init()

geom_area, geom_dump, working_dir = RawData(
params, str(self.request.id)
).extract_current_data(file_parts, post_processor.post_process_line)

if params.include_stats:
geojson_stats_json = json.dumps(post_processor.geoJSONStats.dict())

# Create a HTML summary of stats
if params.include_stats_html:
tpl = "stats"
if "waterway" in post_processor.geoJSONStats.config.keys:
tpl = "stats_waterway"
elif "highway" in post_processor.geoJSONStats.config.keys:
tpl = "stats_highway"
elif "building" in post_processor.geoJSONStats.config.keys:
tpl = "stats_building"
project_root = pathlib.Path(__file__).resolve().parent
tpl_path = os.path.join(
project_root,
"../src/post_processing/{tpl}_tpl.html".format(tpl=tpl),
)
geojson_stats_html = post_processor.geoJSONStats.html(
tpl_path
).build()
upload_html_path = os.path.join(
working_dir, os.pardir, f"{exportname_parts[-1]}.html"
)
with open(upload_html_path, "w") as f:
f.write(geojson_stats_html)

else:
geom_area, geom_dump, working_dir = RawData(
params, str(self.request.id)
).extract_current_data(file_parts)

geom_area, geom_dump, working_dir = RawData(
params, str(self.request.id)
).extract_current_data(file_parts)
inside_file_size = 0
polygon_stats = None
if "include_stats" in params.dict():
if params.include_stats:
feature = {
Expand All @@ -281,14 +222,12 @@ def process_raw_data(self, params, user=None):
"properties": {},
}
polygon_stats = PolygonStats(feature).get_summary_stats()

if bind_zip:
upload_file_path, inside_file_size = zip_binding(
working_dir=working_dir,
exportname_parts=exportname_parts,
geom_dump=geom_dump,
polygon_stats=polygon_stats,
geojson_stats=geojson_stats_json,
default_readme=DEFAULT_README_TEXT,
)

Expand All @@ -301,7 +240,6 @@ def process_raw_data(self, params, user=None):
upload_file_path = file_path
inside_file_size += os.path.getsize(file_path)
break # only take one file inside dir , if contains many it should be inside zip

# check if download url will be generated from s3 or not from config
if use_s3_to_upload:
file_transfer_obj = S3FileTransfer()
Expand All @@ -315,6 +253,7 @@ def process_raw_data(self, params, user=None):
pattern = r"(hotosm_project_)(\d+)"
match = re.match(pattern, exportname)
if match:
prefix = match.group(1)
project_number = match.group(2)
if project_number:
upload_name = f"TM/{project_number}/{exportname}"
Expand All @@ -333,15 +272,6 @@ def process_raw_data(self, params, user=None):
upload_name,
file_suffix="zip" if bind_zip else params.output_type.lower(),
)

# If there's an HTML file, upload it too
if geojson_stats_html:
download_html_url = file_transfer_obj.upload(
upload_html_path,
upload_name,
file_suffix="html",
)

else:
# give the static file download url back to user served from fastapi static export path
download_url = str(upload_file_path)
Expand All @@ -367,9 +297,6 @@ def process_raw_data(self, params, user=None):
}
if polygon_stats:
final_response["stats"] = polygon_stats
if download_html_url:
final_response["download_html_url"] = download_html_url

return final_response

except Exception as ex:
Expand Down
1 change: 1 addition & 0 deletions backend/field_update
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class Database:
try:
self.cursor.execute(query)
self.conn.commit()
# print(query)
try:
result = self.cursor.fetchall()

Expand Down
2 changes: 1 addition & 1 deletion backend/raw_backend
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ if __name__ == "__main__":

if not args.replication:
osm2pgsql.append("--drop")

print(osm2pgsql)
run_subprocess_cmd(osm2pgsql)

basic_index_cmd = [
Expand Down
7 changes: 0 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,3 @@ psutil==5.9.8

## logging
tqdm==4.66.2

# stats for geojson data
geojson-stats==0.2.4

# transliterations
transliterate==1.10.2

33 changes: 4 additions & 29 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
from psycopg2.extras import DictCursor
from slugify import slugify
from tqdm import tqdm
from .post_processing.processor import PostProcessor

# Reader imports
from src.config import (
Expand Down Expand Up @@ -641,7 +640,7 @@ def ogr_export(query, outputtype, working_dir, dump_temp_path, params):
os.remove(query_path)

@staticmethod
def query2geojson(con, extraction_query, dump_temp_file_path, plugin_fn=None):
def query2geojson(con, extraction_query, dump_temp_file_path):
"""Function written from scratch without being dependent on any library, Provides better performance for geojson binding"""
# creating geojson file
pre_geojson = """{"type": "FeatureCollection","features": ["""
Expand All @@ -661,12 +660,10 @@ def query2geojson(con, extraction_query, dump_temp_file_path, plugin_fn=None):
for row in cursor:
if first:
first = False
f.write(row[0])
else:
f.write(",")
if plugin_fn:
f.write(plugin_fn(row[0]))
else:
f.write((row[0]))
f.write(row[0])
cursor.close() # closing connection to avoid memory issues
# close the writing geojson with last part
f.write(post_geojson)
Expand Down Expand Up @@ -714,7 +711,7 @@ def get_grid_id(geom, cur):
country_export,
)

def extract_current_data(self, exportname, plugin_fn=None):
def extract_current_data(self, exportname):
"""Responsible for Extracting rawdata current snapshot, Initially it creates a geojson file , Generates query , run it with 1000 chunk size and writes it directly to the geojson file and closes the file after dump
Args:
exportname: takes filename as argument to create geojson file passed from routers
Expand Down Expand Up @@ -780,7 +777,6 @@ def extract_current_data(self, exportname, plugin_fn=None):
country_export=country_export,
),
dump_temp_file_path,
plugin_fn,
) # uses own conversion class
if output_type == RawDataOutputType.SHAPEFILE.value:
(
Expand Down Expand Up @@ -1492,29 +1488,8 @@ def process_export_format(export_format):
layer_creation_options=layer_creation_options_str,
query_dump_path=export_format_path,
)

run_ogr2ogr_cmd(ogr2ogr_cmd)

# Post-processing GeoJSON files
# Adds: stats, HTML stats summary and transliterations
if export_format.driver_name == "GeoJSON" and (
self.params.include_stats or self.params.include_translit
):
post_processor = PostProcessor(
{
"include_stats": self.params.include_stats,
"include_translit": self.params.include_translit,
"include_stats_html": self.params.include_stats_html,
}
)
post_processor.init()
post_processor.custom(
categories=self.params.categories,
export_format_path=export_format_path,
export_filename=export_filename,
file_export_path=file_export_path,
)

zip_file_path = os.path.join(file_export_path, f"{export_filename}.zip")
zip_path = self.file_to_zip(export_format_path, zip_file_path)

Expand Down
1 change: 0 additions & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@ def not_raises(func, *args, **kwargs):
logging.error(
"Error creating HDX configuration: %s, Disabling the hdx exports feature", e
)

ENABLE_HDX_EXPORTS = False

if ENABLE_HDX_EXPORTS:
Expand Down
Empty file removed src/post_processing/__init__.py
Empty file.
61 changes: 0 additions & 61 deletions src/post_processing/geojson_stats.py

This file was deleted.

Loading

0 comments on commit f445df8

Please sign in to comment.