Skip to content

Commit

Permalink
Update also RoadJunctionKM from suburban junctions file.
Browse files Browse the repository at this point in the history
Adding suburban_junctions.py and a command process suburban-junctions to load SuburbanJunction and RoadJunctionKM db tables from added file in prev commit. Removed updating these tables from accidents. Remove trailing blank from hebrew name of junction 7039 in suburban_junctions.xlsx
  • Loading branch information
ziv17 committed Jan 15, 2024
1 parent a6d9789 commit 04a39c9
Show file tree
Hide file tree
Showing 7 changed files with 221 additions and 125 deletions.
112 changes: 0 additions & 112 deletions anyway/parsers/cbs/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
RoadSign,
RoadLight,
RoadControl,
RoadJunctionKM,
Weather,
RoadSurface,
RoadObjecte,
Expand Down Expand Up @@ -78,7 +77,6 @@
ProviderCode,
VehicleDamage,
Streets,
SuburbanJunction,
AccidentMarkerView,
InvolvedView,
InvolvedMarkerView,
Expand Down Expand Up @@ -570,8 +568,6 @@ def import_accidents(provider_code, accidents, streets, roads, non_urban_interse
accidents_result = []
for _, accident in accidents.iterrows():
marker = create_marker(provider_code, accident, streets, roads, non_urban_intersection)
add_suburban_junction_from_marker(marker)
add_road_junction_km_from_marker(marker)
accidents_result.append(marker)
db.session.bulk_insert_mappings(AccidentMarker, accidents_result)
db.session.commit()
Expand Down Expand Up @@ -795,10 +791,6 @@ def import_streets_into_db():

yishuv_street_dict: Dict[Tuple[int, int], str] = {}
yishuv_name_dict: Dict[Tuple[int, str], int] = {}
suburban_junctions_dict: Dict[int, dict] = {}
SUBURBAN_JUNCTION = "suburban_junction"
# (road, junction) -> km
road_junction_km_dict: Dict[Tuple[int, int], int] = {}


def load_existing_streets():
Expand Down Expand Up @@ -846,106 +838,6 @@ def add_street_remove_name_duplicates(street: Dict[str, Any]):
yishuv_name_dict[k] = street["street"]


def import_suburban_junctions_into_db():
items = [{"non_urban_intersection": k,
NON_URBAN_INTERSECTION_HEBREW: fix_name_len(v[NON_URBAN_INTERSECTION_HEBREW]),
ROADS: v[ROADS]} for
k, v in suburban_junctions_dict.items()]
logging.debug(
f"Writing to db: {len(items)} suburban junctions"
)
db.session.query(SuburbanJunction).delete()
db.session.bulk_insert_mappings(SuburbanJunction, items)
db.session.commit()
logging.debug(f"Done.")


def fix_name_len(name: str) -> str:
if not isinstance(name, str):
return name
if len(name) > SuburbanJunction.MAX_NAME_LEN:
logging.error(f"Suburban_junction name too long ({len(name)}>"
f"{SuburbanJunction.MAX_NAME_LEN}):{name}.")
return name[: SuburbanJunction.MAX_NAME_LEN]

def load_existing_suburban_junctions():
junctions: List[SuburbanJunction] = db.session.query(SuburbanJunction).all()
for j in junctions:
add_suburban_junction(j)
logging.debug(f"Loaded suburban junctions: {len(suburban_junctions_dict)}.")


def add_suburban_junction(added: SuburbanJunction):
if added.non_urban_intersection in suburban_junctions_dict:
existing_junction = suburban_junctions_dict[added.non_urban_intersection]
added_heb = added.non_urban_intersection_hebrew
if existing_junction[NON_URBAN_INTERSECTION_HEBREW] != added_heb and added_heb is not None:
logging.error(
f"Duplicate non-urban intersection name: {added.non_urban_intersection}: existing:"
f"{existing_junction[NON_URBAN_INTERSECTION_HEBREW]}, added: {added_heb}"
)
existing_junction[NON_URBAN_INTERSECTION_HEBREW] = added_heb
existing_junction[ROADS].update(set(added.roads))
else:
suburban_junctions_dict[added.non_urban_intersection] = {
NON_URBAN_INTERSECTION_HEBREW: added.non_urban_intersection_hebrew,
ROADS: set(added.roads),
}


def add_suburban_junction_from_marker(marker: dict):
intersection = marker[NON_URBAN_INTERSECTION]
if intersection is not None:
j = SuburbanJunction()
j.non_urban_intersection = intersection
j.non_urban_intersection_hebrew = marker[NON_URBAN_INTERSECTION_HEBREW]
roads = set()
for k in ["road1", "road2"]:
if marker[k] is not None:
roads.add(marker[k])
j.roads = roads
add_suburban_junction(j)


def load_existing_road_junction_km_data():
rows: List[RoadJunctionKM] = db.session.query(RoadJunctionKM).all()
tmp = {(r.road, r.non_urban_intersection): r.km for r in rows}
road_junction_km_dict.update(tmp)
logging.debug(f"Loaded road-junction-km rows: {len(tmp)}.")


def import_road_junction_km_into_db():
items = [{"road": k[0], "non_urban_intersection": k[1], "km": v} for
k, v in road_junction_km_dict.items()]
logging.debug(
f"Writing to db: {len(items)} road junction km rows"
)
db.session.query(RoadJunctionKM).delete()
db.session.bulk_insert_mappings(RoadJunctionKM, items)
db.session.commit()
logging.debug(f"Done.")


def add_road_junction_km_from_marker(marker: dict):
intersection = marker[NON_URBAN_INTERSECTION]
if intersection is not None:
road1 = marker["road1"]
if not road1:
logging.warning(f"Not adding: marker entry {marker['id']} has non_urban"
f" intersection:{intersection},road1: {road1}.")
return
km_accurate, km = marker.get("km_accurate"), marker.get("km")
if not km_accurate or not km:
logging.warning(f"Not adding: marker entry {marker['id']} has non "
f"accurate km: km_accurate:{km_accurate},km:{km}.")
return
k, v = (road1, intersection), km/10
exists = road_junction_km_dict.get(k)
if exists is not None and exists != v:
logging.warning(f"Changed road junction km: from {exists} to {v}.")
road_junction_km_dict[k] = v


def delete_invalid_entries(batch_size):
"""
deletes all markers in the database with null latitude or longitude
Expand Down Expand Up @@ -1220,8 +1112,6 @@ def get_file_type_and_year(file_path):
def main(batch_size, source, load_start_year=None):
try:
load_existing_streets()
load_existing_suburban_junctions()
load_existing_road_junction_km_data()
total = 0
started = datetime.now()
if source == "s3":
Expand Down Expand Up @@ -1278,8 +1168,6 @@ def main(batch_size, source, load_start_year=None):
add_to_streets(streets)

import_streets_into_db()
import_suburban_junctions_into_db()
import_road_junction_km_into_db()

fill_db_geo_data()

Expand Down
134 changes: 134 additions & 0 deletions anyway/parsers/suburban_junctions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-
import sys
from typing import Dict, Tuple, Iterator
import logging
from openpyxl import load_workbook
from anyway.app_and_db import db
from anyway.models import SuburbanJunction, RoadJunctionKM


SUBURBAN_JUNCTION = "suburban_junction"
ACCIDENTS = "accidents"
CITIES = "cities"
STREETS = "streets"
ROADS = "roads"
URBAN_INTERSECTION = "urban_intersection"
NON_URBAN_INTERSECTION = "non_urban_intersection"
NON_URBAN_INTERSECTION_HEBREW = "non_urban_intersection_hebrew"
DICTIONARY = "dictionary"
INVOLVED = "involved"
VEHICLES = "vehicles"
ID = "id"
NAME = "name"
KM = "km"
ROAD1 = "road1"
suburban_junctions_dict: Dict[int, dict] = {}
# (road, junction) -> km
road_junction_km_dict: Dict[Tuple[int, int], int] = {}


def parse(filename):
read_from_file(filename)
import_suburban_junctions_into_db()
import_road_junction_km_into_db()


def read_from_file(filename: str):
for j in _iter_rows(filename):
add_suburban_junction(j)
add_road_junction_km(j)


def _iter_rows(filename) -> Iterator[dict]:
workbook = load_workbook(filename, read_only=True)
sheet = workbook["מילון צמתים לא עירוניים"]
rows = sheet.rows
first_row = next(rows)
headers = [
"ZOMET",
"SUG_DEREH",
"REHOV1_KVISH1",
"REHOV2_KVISH2",
"KM",
"IKS",
"IGREK",
"IDF",
"SHEM_ZOMET",
"SUG_ZOMET",
"KVISH_RASHI",
"KM_RASHI",
"SHNAT_ZOMET_SGIRA",
"MAHOZ",
"NAFA",
"EZOR_TIVI",
"METROPOLIN",
"MAAMAD_MINIZIPALI",
"EZOR_STAT",
]
assert [cell.value for cell in first_row] == headers, "File does not have expected headers"
for row in rows:
# In order to ignore empty lines
if not row[0].value:
continue
yield {ID: row[0].value, NAME: row[8].value, ROAD1: row[2].value, KM: row[4].value}


def add_road_junction_km(junction: dict):
road_junction_km_dict[(junction[ROAD1], junction[ID])] = junction[KM] / 10


def import_suburban_junctions_into_db():
items = [
{
"non_urban_intersection": k,
NON_URBAN_INTERSECTION_HEBREW: fix_name_len(v[NON_URBAN_INTERSECTION_HEBREW]),
ROADS: v[ROADS],
}
for k, v in suburban_junctions_dict.items()
]
logging.debug(f"Writing to db: {len(items)} suburban junctions")
db.session.query(SuburbanJunction).delete()
db.session.bulk_insert_mappings(SuburbanJunction, items)
db.session.commit()
logging.debug(f"Done writing SuburbanJunction.")


def import_road_junction_km_into_db():
items = [
{"road": k[0], "non_urban_intersection": k[1], "km": v}
for k, v in road_junction_km_dict.items()
]
logging.debug(f"Writing to db: {len(items)} road junction km rows")
db.session.query(RoadJunctionKM).delete()
db.session.bulk_insert_mappings(RoadJunctionKM, items)
db.session.commit()
logging.debug(f"Done writing RoadJunctionKM.")


def fix_name_len(name: str) -> str:
if not isinstance(name, str):
return name
if len(name) > SuburbanJunction.MAX_NAME_LEN:
logging.error(
f"Suburban_junction name too long ({len(name)}>"
f"{SuburbanJunction.MAX_NAME_LEN}):{name}."
)
return name[: SuburbanJunction.MAX_NAME_LEN]


def add_suburban_junction(junction: dict):
j_id = junction[ID]
j_name = junction[NAME]
road1 = junction[ROAD1]
if j_id in suburban_junctions_dict:
existing_junction = suburban_junctions_dict[j_id]
existing_junction[ROADS].add(road1)
else:
suburban_junctions_dict[j_id] = {
NON_URBAN_INTERSECTION_HEBREW: j_name,
ROADS: {road1},
}


if __name__ == "__main__":
parse(sys.argv[1])
8 changes: 5 additions & 3 deletions anyway/request_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,12 @@ def fill_missing_non_urban_intersection_values(vals: dict) -> dict:
else:
raise ValueError(f"Cannot get non_urban_intersection from input: {vals}")
# TODO: temporarily removing "roads" field, as it is not used correctly in the filters.
if res.get("road1") is None or res.get("road2") is None and len(res.get("roads")) > 2:
if res.get("road1") is None or res.get("road2") is None:
roads = list(res["roads"])
res["road1"] = roads[0]
res["road2"] = roads[1]
if len(roads) > 0:
res["road1"] = roads[0]
if len(roads) > 1:
res["road2"] = roads[1]
if "roads" in res:
res.pop("roads")
return res
Expand Down
2 changes: 1 addition & 1 deletion anyway/widgets/widget_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from anyway.request_params import RequestParams
from anyway.widgets.segment_junctions import SegmentJunctions

# todo: check after rebase

def get_query(table_obj, filters, start_time, end_time):
query = db.session.query(table_obj)
if start_time:
Expand Down
9 changes: 8 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,14 @@ def road_segments(filename):
return parse(filename)


@process.command()
@click.argument("filename", type=str, default="static/data/suburban_junctions/suburban_junctions.xlsx")
def suburban_junctions(filename):
from anyway.parsers.suburban_junctions import parse

return parse(filename)


@process.command()
@click.argument("filepath", type=str, default="static/data/schools/schools.csv")
@click.option("--batch_size", type=int, default=5000)
Expand Down Expand Up @@ -528,4 +536,3 @@ def trigger_dag(id):

if __name__ == "__main__":
cli(sys.argv[1:]) # pylint: disable=too-many-function-args

Binary file modified static/data/suburban_junctions/suburban_junctions.xlsx
Binary file not shown.
Loading

0 comments on commit 04a39c9

Please sign in to comment.