Skip to content

Commit

Permalink
Move WoW db config out to a YAML file. (#102)
Browse files Browse the repository at this point in the history
* Move WoW db config out to a YAML file.

* Change help text for --update option.
  • Loading branch information
toolness authored and sraby committed Feb 11, 2019
1 parent 32b1bcd commit db45d61
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 25 deletions.
46 changes: 21 additions & 25 deletions dbtool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import subprocess
import argparse
import time
import yaml
import nycdb.dataset
from nycdb.utility import list_wrap
from urllib.parse import urlparse
from typing import NamedTuple, Any, Tuple, Optional, Dict, List
from pathlib import Path
Expand All @@ -18,6 +21,7 @@

ROOT_DIR = Path(__file__).parent.resolve()
SQL_DIR = ROOT_DIR / 'sql'
WOW_YML = yaml.load((ROOT_DIR / 'who-owns-what.yml').read_text())

# Just an alias for our database connection.
DbConnection = Any
Expand Down Expand Up @@ -146,9 +150,15 @@ def delete_downloaded_data(self, *tables: str) -> None:
print(f"Removing {csv_file.name} so it can be re-downloaded.")
csv_file.unlink()

def ensure_dataset(self, name: str, force_refresh: bool=False,
extra_tables: Optional[Tuple[str]]=None) -> None:
tables = [name, *(extra_tables or ())]
def ensure_dataset(self, name: str, force_refresh: bool=False) -> None:
dataset = nycdb.dataset.datasets()[name]
tables: List[str] = [
schema['table_name']
for schema in list_wrap(dataset['schema'])
]
tables_str = 'table' if len(tables) == 1 else 'tables'
print(f"Ensuring NYCDB dataset '{name}' is loaded with {len(tables)} {tables_str}...")

if force_refresh:
self.drop_tables(*tables)
self.delete_downloaded_data(*tables)
Expand All @@ -173,29 +183,15 @@ def build(self, force_refresh: bool) -> None:
else:
print("Loading the database with real data (this could take a while).")

self.ensure_dataset('pluto_17v1')
self.ensure_dataset('pluto_18v1')
self.ensure_dataset('rentstab_summary')
self.ensure_dataset('marshal_evictions_17', force_refresh=force_refresh)
self.ensure_dataset('hpd_registrations', force_refresh=force_refresh,
extra_tables=('hpd_contacts',))

print("Running custom SQL for HPD registrations...")
self.run_sql_file(SQL_DIR / 'registrations_with_contacts.sql')
datasets: List[str] = WOW_YML['dependencies']
sqlfiles: List[str] = WOW_YML['sql']

self.ensure_dataset('hpd_violations', force_refresh=force_refresh)

WOW_SCRIPTS = [
("Creating WoW buildings table...", "create_bldgs_table.sql"),
("Adding helper functions...", "helper_functions.sql"),
("Creating WoW search function...", "search_function.sql"),
("Creating WoW agg function...", "agg_function.sql"),
("Creating hpd landlord contact table...", "landlord_contact.sql"),
]
for dataset in datasets:
self.ensure_dataset(dataset, force_refresh=force_refresh)

for desc, filename in WOW_SCRIPTS:
print(desc)
self.run_sql_file(SQL_DIR / filename)
for sqlfile in sqlfiles:
print(f"Running {sqlfile}...")
self.run_sql_file(SQL_DIR / sqlfile)


def dbshell(db: DbContext):
Expand Down Expand Up @@ -345,7 +341,7 @@ def selftest():
)
parser_builddb.add_argument(
'--update', action='store_true',
help=('Delete downloaded data & tables for the most frequently-updated '
help=('Delete downloaded data & tables for the '
'data sets so they can be re-downloaded and re-installed.')
)
parser_builddb.set_defaults(cmd='builddb')
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
nycdb==0.1.18
psycopg2-binary==2.7.7
mypy==0.660
PyYAML>=4.2b1
17 changes: 17 additions & 0 deletions who-owns-what.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
dependencies:
# These are NYCDB datasets needed by the SQL scripts.
- pluto_18v1
- rentstab_summary
- marshal_evictions_17
- hpd_registrations
- hpd_violations
sql:
# These SQL scripts must be executed in order, as
# some of them depend on others.
- registrations_with_contacts.sql
- create_bldgs_table.sql
- helper_functions.sql
- search_function.sql
- agg_function.sql
- landlord_contact.sql

0 comments on commit db45d61

Please sign in to comment.