Skip to content

Commit

Permalink
Importer - updates for btr changes
Browse files Browse the repository at this point in the history
Signed-off-by: Kial Jinnah <kialj876@gmail.com>
  • Loading branch information
kialj876 committed Sep 26, 2024
1 parent a08842f commit f24bfe9
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 22 deletions.
6 changes: 5 additions & 1 deletion search-solr-importer/data_import_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def load_search_core(): # pylint: disable=too-many-statements,too-many-locals,t
current_app.logger.debug('---------- Collecting/Importing BTR Data ----------')
btr_fetch_count = 0
batch_limit = current_app.config.get('BTR_BATCH_LIMIT')
btr_data_descs = []
loop_count = 0

while loop_count < 100: # NOTE: should never get to this condition
Expand All @@ -110,7 +111,10 @@ def load_search_core(): # pylint: disable=too-many-statements,too-many-locals,t
break

current_app.logger.debug('********** Mapping BTR data **********')
prepped_btr_data = prep_data_btr(btr_data)
if not btr_data_descs:
# just need to do once
btr_data_descs = [desc[0].lower() for desc in btr_data_cur.description]
prepped_btr_data = prep_data_btr(btr_data, btr_data_descs)
current_app.logger.debug(f'{len(prepped_btr_data)} BTR records ready for import.')

current_app.logger.debug('********** Importing BTR entities **********')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def collect_btr_data(limit: int = None, offset: int = None):
limit_clause += f' OFFSET {offset}'
if limit_clause:
# NOTE: needed in order to make sure we get every record when doing batch loads
limit_clause = f'ORDER BY id {limit_clause}'
limit_clause = f'ORDER BY p.id {limit_clause}'

current_app.logger.debug('Connecting to BTR GCP Postgres instance...')
conn = psycopg2.connect(host=current_app.config.get('BTR_DB_HOST'),
Expand All @@ -155,5 +155,11 @@ def collect_btr_data(limit: int = None, offset: int = None):
password=current_app.config.get('BTR_DB_PASSWORD'))
cur = conn.cursor()
current_app.logger.debug('Collecting BTR data...')
cur.execute(f'SELECT payload FROM submission {limit_clause}')
cur.execute(f"""
SELECT s.business_identifier, p.person_json
FROM submission s
JOIN ownership o on s.id = o.submission_id
JOIN person p on p.id = o.person_id
{limit_clause}
""")
return cur
38 changes: 19 additions & 19 deletions search-solr-importer/src/search_solr_importer/utils/data_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,33 +187,33 @@ def prep_data(data: list, data_descs: list[str], source: str) -> list[dict]: #
return solr_docs


def prep_data_btr(data: list[dict]) -> list[dict]:
def prep_data_btr(data: list[dict], data_descs: list[str]) -> list[dict]:
"""Return the list of partial business docs containing the SI party information."""
prepped_data: list[dict] = []

for item in data:
submission = item[0]
identifier = submission['businessIdentifier']
item_dict = dict(zip(data_descs, item))
identifier = item_dict['business_identifier']

business = {'id': identifier, 'parties': {'add': []}}

# collect current SIs.
for person in submission.get('personStatements', []):
party_name = ''
for name in person.get('names'):
if name.get('type') == 'individual': # expecting this to be 'individual' or 'alternative'
party_name = name.get('fullName')
break
if not party_name:
current_app.logger.debug('Person names: %s', person.get('names'))
current_app.logger.error('Error parsing SI name for %s', identifier)

business['parties']['add'].append({
PartyField.UNIQUE_KEY.value: identifier + '_' + person['uuid'],
PartyField.PARTY_NAME.value: party_name,
PartyField.PARTY_ROLE.value: ['significant individual'],
PartyField.PARENT_TYPE.value: 'person'
})
person = item_dict['person_json']
party_name = ''
for name in person.get('names'):
if name.get('type') == 'individual': # expecting this to be 'individual' or 'alternative'
party_name = name.get('fullName')
break
if not party_name:
current_app.logger.debug('Person names: %s', person.get('names'))
current_app.logger.error('Error parsing SI name for %s', identifier)

business['parties']['add'].append({
PartyField.UNIQUE_KEY.value: identifier + '_' + person['statementID'],
PartyField.PARTY_NAME.value: party_name,
PartyField.PARTY_ROLE.value: ['significant individual'],
PartyField.PARENT_TYPE.value: 'person'
})

prepped_data.append(business)

Expand Down

0 comments on commit f24bfe9

Please sign in to comment.