diff --git a/api/src/data_migration/transformation/__init__.py b/api/src/data_migration/transformation/__init__.py index e69de29bb..d6ff9946f 100644 --- a/api/src/data_migration/transformation/__init__.py +++ b/api/src/data_migration/transformation/__init__.py @@ -0,0 +1,6 @@ +from typing import TypeAlias + +from src.db.models.staging.forecast import Tforecast, TforecastHist +from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist + +SourceSummary: TypeAlias = Tforecast | Tsynopsis | TforecastHist | TsynopsisHist diff --git a/api/src/data_migration/transformation/transform_oracle_data_task.py b/api/src/data_migration/transformation/transform_oracle_data_task.py index 05b120f11..a7ec6cf35 100644 --- a/api/src/data_migration/transformation/transform_oracle_data_task.py +++ b/api/src/data_migration/transformation/transform_oracle_data_task.py @@ -3,21 +3,28 @@ from enum import StrEnum from typing import Sequence, Tuple, Type, TypeVar, cast -from sqlalchemy import select +from sqlalchemy import and_, select from src.adapters import db -from src.constants.lookup_constants import OpportunityCategory -from src.db.models.base import ApiSchemaTable, TimestampMixin -from src.db.models.opportunity_models import Opportunity, OpportunityAssistanceListing +from src.data_migration.transformation import transform_util +from src.db.models.base import ApiSchemaTable +from src.db.models.opportunity_models import ( + Opportunity, + OpportunityAssistanceListing, + OpportunitySummary, +) +from src.db.models.staging.forecast import Tforecast, TforecastHist from src.db.models.staging.opportunity import Topportunity, TopportunityCfda -from src.db.models.staging.staging_base import StagingBase, StagingParamMixin +from src.db.models.staging.staging_base import StagingParamMixin +from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist from src.task.task import Task from src.util import datetime_util +from . import SourceSummary + S = TypeVar("S", bound=StagingParamMixin) D = TypeVar("D", bound=ApiSchemaTable) - logger = logging.getLogger(__name__) @@ -63,7 +70,7 @@ def fetch( list[Tuple[S, D | None]], self.db_session.execute( select(source_model, destination_model) - .join(destination_model, *join_clause, isouter=True) + .join(destination_model, and_(*join_clause), isouter=True) .where(source_model.transformed_at.is_(None)) .execution_options(yield_per=5000) ), @@ -79,7 +86,7 @@ def fetch_with_opportunity( list[Tuple[S, D | None, Opportunity | None]], self.db_session.execute( select(source_model, destination_model, Opportunity) - .join(destination_model, *join_clause, isouter=True) + .join(destination_model, and_(*join_clause), isouter=True) .join( Opportunity, source_model.opportunity_id == Opportunity.opportunity_id, # type: ignore[attr-defined] @@ -131,7 +138,9 @@ def process_opportunity( is_insert = target_opportunity is None logger.info("Transforming and upserting opportunity", extra=extra) - transformed_opportunity = transform_opportunity(source_opportunity, target_opportunity) + transformed_opportunity = transform_util.transform_opportunity( + source_opportunity, target_opportunity + ) self.db_session.merge(transformed_opportunity) if is_insert: @@ -212,7 +221,7 @@ def process_assistance_listing( is_insert = target_assistance_listing is None logger.info("Transforming and upserting assistance listing", extra=extra) - transformed_assistance_listing = transform_assistance_listing( + transformed_assistance_listing = transform_util.transform_assistance_listing( source_assistance_listing, target_assistance_listing ) self.db_session.merge(transformed_assistance_listing) @@ -226,136 +235,113 @@ def process_assistance_listing( source_assistance_listing.transformed_at = self.transform_time def process_opportunity_summaries(self) -> None: - # TODO - https://github.com/HHS/simpler-grants-gov/issues/1747 - pass - - def process_one_to_many_lookup_tables(self) -> None: - # TODO - https://github.com/HHS/simpler-grants-gov/issues/1749 - pass - - -############################### -# Transformations -############################### - - -def transform_opportunity( - source_opportunity: Topportunity, existing_opportunity: Opportunity | None -) -> Opportunity: - log_extra = {"opportunity_id": source_opportunity.opportunity_id} - - if existing_opportunity is None: - logger.info("Creating new opportunity record", extra=log_extra) - - # We always create a new opportunity record here and merge it in the calling function - # this way if there is any error doing the transformation, we don't modify the existing one. - target_opportunity = Opportunity(opportunity_id=source_opportunity.opportunity_id) - - target_opportunity.opportunity_number = source_opportunity.oppnumber - target_opportunity.opportunity_title = source_opportunity.opptitle - target_opportunity.agency = source_opportunity.owningagency - target_opportunity.category = transform_opportunity_category(source_opportunity.oppcategory) - target_opportunity.category_explanation = source_opportunity.category_explanation - target_opportunity.revision_number = source_opportunity.revision_number - target_opportunity.modified_comments = source_opportunity.modified_comments - target_opportunity.publisher_user_id = source_opportunity.publisheruid - target_opportunity.publisher_profile_id = source_opportunity.publisher_profile_id - - # The legacy system doesn't actually have this value as a boolean. There are several - # different letter codes. However, their API implementation also does this for their draft flag. - target_opportunity.is_draft = source_opportunity.is_draft != "N" - transform_update_create_timestamp(source_opportunity, target_opportunity, log_extra=log_extra) - - return target_opportunity - - -OPPORTUNITY_CATEGORY_MAP = { - "D": OpportunityCategory.DISCRETIONARY, - "M": OpportunityCategory.MANDATORY, - "C": OpportunityCategory.CONTINUATION, - "E": OpportunityCategory.EARMARK, - "O": OpportunityCategory.OTHER, -} - - -def transform_opportunity_category(value: str | None) -> OpportunityCategory | None: - if value is None or value == "": - return None + logger.info("Processing opportunity summaries") + logger.info("Processing synopsis records") + synopsis_records = self.fetch_with_opportunity( + Tsynopsis, + OpportunitySummary, + [ + Tsynopsis.opportunity_id == OpportunitySummary.opportunity_id, + OpportunitySummary.is_forecast.is_(False), + OpportunitySummary.revision_number.is_(None), + ], + ) + self.process_opportunity_summary_group(synopsis_records) - if value not in OPPORTUNITY_CATEGORY_MAP: - raise ValueError("Unrecognized opportunity category: %s" % value) + logger.info("Processing synopsis hist records") + synopsis_hist_records = self.fetch_with_opportunity( + TsynopsisHist, + OpportunitySummary, + [ + TsynopsisHist.opportunity_id == OpportunitySummary.opportunity_id, + TsynopsisHist.revision_number == OpportunitySummary.revision_number, + OpportunitySummary.is_forecast.is_(False), + ], + ) + self.process_opportunity_summary_group(synopsis_hist_records) - return OPPORTUNITY_CATEGORY_MAP[value] + logger.info("Processing forecast records") + forecast_records = self.fetch_with_opportunity( + Tforecast, + OpportunitySummary, + [ + Tforecast.opportunity_id == OpportunitySummary.opportunity_id, + OpportunitySummary.is_forecast.is_(True), + OpportunitySummary.revision_number.is_(None), + ], + ) + self.process_opportunity_summary_group(forecast_records) + logger.info("Processing forecast hist records") + forecast_hist_records = self.fetch_with_opportunity( + TforecastHist, + OpportunitySummary, + [ + TforecastHist.opportunity_id == OpportunitySummary.opportunity_id, + TforecastHist.revision_number == OpportunitySummary.revision_number, + OpportunitySummary.is_forecast.is_(True), + ], + ) + self.process_opportunity_summary_group(forecast_hist_records) -def transform_assistance_listing( - source_assistance_listing: TopportunityCfda, - existing_assistance_listing: OpportunityAssistanceListing | None, -) -> OpportunityAssistanceListing: - log_extra = {"opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id} + def process_opportunity_summary_group( + self, records: Sequence[Tuple[SourceSummary, OpportunitySummary | None, Opportunity | None]] + ) -> None: + for source_summary, target_summary, opportunity in records: + try: + self.process_opportunity_summary(source_summary, target_summary, opportunity) + except ValueError: + self.increment(self.Metrics.TOTAL_ERROR_COUNT) + logger.exception( + "Failed to process opportunity summary", + extra=transform_util.get_log_extra_summary(source_summary), + ) - if existing_assistance_listing is None: - logger.info("Creating new assistance listing record", extra=log_extra) + def process_opportunity_summary( + self, + source_summary: SourceSummary, + target_summary: OpportunitySummary | None, + opportunity: Opportunity | None, + ) -> None: + self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED) + extra = transform_util.get_log_extra_summary(source_summary) + logger.info("Processing opportunity summary", extra=extra) - # We always create a new assistance listing record here and merge it in the calling function - # this way if there is any error doing the transformation, we don't modify the existing one. - target_assistance_listing = OpportunityAssistanceListing( - opportunity_assistance_listing_id=source_assistance_listing.opp_cfda_id, - opportunity_id=source_assistance_listing.opportunity_id, - ) + if opportunity is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Opportunity summary cannot be processed as the opportunity for it does not exist" + ) - target_assistance_listing.assistance_listing_number = source_assistance_listing.cfdanumber - target_assistance_listing.program_title = source_assistance_listing.programtitle + if source_summary.is_deleted: + logger.info("Deleting opportunity summary", extra=extra) - transform_update_create_timestamp( - source_assistance_listing, target_assistance_listing, log_extra=log_extra - ) + if target_summary is None: + raise ValueError("Cannot delete opportunity summary as it does not exist") - return target_assistance_listing + self.increment(self.Metrics.TOTAL_RECORDS_DELETED) + self.db_session.delete(target_summary) + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_summary is None -def convert_est_timestamp_to_utc(timestamp: datetime | None) -> datetime | None: - if timestamp is None: - return None + logger.info("Transforming and upserting opportunity summary", extra=extra) + transformed_opportunity_summary = transform_util.transform_opportunity_summary( + source_summary, target_summary + ) + self.db_session.merge(transformed_opportunity_summary) - # The timestamps we get from the legacy system have no timezone info - # but we know the database uses US Eastern timezone by default - # - # First add the America/New_York timezone without any other modification - aware_timestamp = datetime_util.make_timezone_aware(timestamp, "US/Eastern") - # Then adjust the timezone to UTC this will handle any DST or other conversion complexities - return datetime_util.adjust_timezone(aware_timestamp, "UTC") + if is_insert: + self.increment(self.Metrics.TOTAL_RECORDS_INSERTED) + else: + self.increment(self.Metrics.TOTAL_RECORDS_UPDATED) + logger.info("Processed opportunity summary", extra=extra) + source_summary.transformed_at = self.transform_time -def transform_update_create_timestamp( - source: StagingBase, target: TimestampMixin, log_extra: dict | None = None -) -> None: - # Convert the source timestamps to UTC - # Note: the type ignores are because created_date/last_upd_date are added - # on the individual class definitions, not the base class - due to how - # we need to maintain the column order of the legacy system. - # Every legacy table does have these columns. - created_timestamp = convert_est_timestamp_to_utc(source.created_date) # type: ignore[attr-defined] - updated_timestamp = convert_est_timestamp_to_utc(source.last_upd_date) # type: ignore[attr-defined] - - if created_timestamp is not None: - target.created_at = created_timestamp - else: - # This is incredibly rare, but possible - because our system requires - # we set something, we'll default to the current time and log a warning. - if log_extra is None: - log_extra = {} - - logger.warning( - f"{source.__class__} does not have a created_date timestamp set, setting value to now.", - extra=log_extra, - ) - target.created_at = datetime_util.utcnow() - - if updated_timestamp is not None: - target.updated_at = updated_timestamp - else: - # In the legacy system, they don't set whether something was updated - # until it receives an update. We always set the value, and on initial insert - # want it to be the same as the created_at. - target.updated_at = target.created_at + def process_one_to_many_lookup_tables(self) -> None: + # TODO - https://github.com/HHS/simpler-grants-gov/issues/1749 + pass diff --git a/api/src/data_migration/transformation/transform_util.py b/api/src/data_migration/transformation/transform_util.py new file mode 100644 index 000000000..675fc677a --- /dev/null +++ b/api/src/data_migration/transformation/transform_util.py @@ -0,0 +1,283 @@ +import logging +from datetime import datetime + +from src.constants.lookup_constants import OpportunityCategory +from src.db.models.base import TimestampMixin +from src.db.models.opportunity_models import ( + Opportunity, + OpportunityAssistanceListing, + OpportunitySummary, +) +from src.db.models.staging.forecast import TforecastHist +from src.db.models.staging.opportunity import Topportunity, TopportunityCfda +from src.db.models.staging.staging_base import StagingBase +from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist +from src.util import datetime_util + +from . import SourceSummary + +logger = logging.getLogger(__name__) + +OPPORTUNITY_CATEGORY_MAP = { + "D": OpportunityCategory.DISCRETIONARY, + "M": OpportunityCategory.MANDATORY, + "C": OpportunityCategory.CONTINUATION, + "E": OpportunityCategory.EARMARK, + "O": OpportunityCategory.OTHER, +} + + +def transform_opportunity( + source_opportunity: Topportunity, existing_opportunity: Opportunity | None +) -> Opportunity: + log_extra = {"opportunity_id": source_opportunity.opportunity_id} + + if existing_opportunity is None: + logger.info("Creating new opportunity record", extra=log_extra) + + # We always create a new opportunity record here and merge it in the calling function + # this way if there is any error doing the transformation, we don't modify the existing one. + target_opportunity = Opportunity(opportunity_id=source_opportunity.opportunity_id) + + target_opportunity.opportunity_number = source_opportunity.oppnumber + target_opportunity.opportunity_title = source_opportunity.opptitle + target_opportunity.agency = source_opportunity.owningagency + target_opportunity.category = transform_opportunity_category(source_opportunity.oppcategory) + target_opportunity.category_explanation = source_opportunity.category_explanation + target_opportunity.revision_number = source_opportunity.revision_number + target_opportunity.modified_comments = source_opportunity.modified_comments + target_opportunity.publisher_user_id = source_opportunity.publisheruid + target_opportunity.publisher_profile_id = source_opportunity.publisher_profile_id + + # The legacy system doesn't actually have this value as a boolean. There are several + # different letter codes. However, their API implementation also does this for their draft flag. + target_opportunity.is_draft = source_opportunity.is_draft != "N" + transform_update_create_timestamp(source_opportunity, target_opportunity, log_extra=log_extra) + + return target_opportunity + + +def transform_opportunity_category(value: str | None) -> OpportunityCategory | None: + if value is None or value == "": + return None + + if value not in OPPORTUNITY_CATEGORY_MAP: + raise ValueError("Unrecognized opportunity category: %s" % value) + + return OPPORTUNITY_CATEGORY_MAP[value] + + +def transform_assistance_listing( + source_assistance_listing: TopportunityCfda, + existing_assistance_listing: OpportunityAssistanceListing | None, +) -> OpportunityAssistanceListing: + log_extra = {"opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id} + + if existing_assistance_listing is None: + logger.info("Creating new assistance listing record", extra=log_extra) + + # We always create a new assistance listing record here and merge it in the calling function + # this way if there is any error doing the transformation, we don't modify the existing one. + target_assistance_listing = OpportunityAssistanceListing( + opportunity_assistance_listing_id=source_assistance_listing.opp_cfda_id, + opportunity_id=source_assistance_listing.opportunity_id, + ) + + target_assistance_listing.assistance_listing_number = source_assistance_listing.cfdanumber + target_assistance_listing.program_title = source_assistance_listing.programtitle + + transform_update_create_timestamp( + source_assistance_listing, target_assistance_listing, log_extra=log_extra + ) + + return target_assistance_listing + + +def transform_opportunity_summary( + source_summary: SourceSummary, incoming_summary: OpportunitySummary | None +) -> OpportunitySummary: + log_extra = get_log_extra_summary(source_summary) + + if incoming_summary is None: + logger.info("Creating new opportunity summary record", extra=log_extra) + target_summary = OpportunitySummary( + opportunity_id=source_summary.opportunity_id, + is_forecast=source_summary.is_forecast, + revision_number=None, + ) + + # Revision number is only found in the historical table + if isinstance(source_summary, (TsynopsisHist, TforecastHist)): + target_summary.revision_number = source_summary.revision_number + else: + # We create a new summary object and merge it outside this function + # that way if any modifications occur on the object and then it errors + # they aren't actually applied + target_summary = OpportunitySummary( + opportunity_summary_id=incoming_summary.opportunity_summary_id + ) + + # Fields in all 4 source tables + target_summary.version_number = source_summary.version_nbr + target_summary.is_cost_sharing = convert_yn_bool(source_summary.cost_sharing) + target_summary.post_date = source_summary.posting_date + target_summary.archive_date = source_summary.archive_date + target_summary.expected_number_of_awards = convert_numeric_str_to_int( + source_summary.number_of_awards + ) + target_summary.estimated_total_program_funding = convert_numeric_str_to_int( + source_summary.est_funding + ) + target_summary.award_floor = convert_numeric_str_to_int(source_summary.award_floor) + target_summary.award_ceiling = convert_numeric_str_to_int(source_summary.award_ceiling) + target_summary.additional_info_url = source_summary.fd_link_url + target_summary.additional_info_url_description = source_summary.fd_link_desc + target_summary.modification_comments = source_summary.modification_comments + target_summary.funding_category_description = source_summary.oth_cat_fa_desc + target_summary.applicant_eligibility_description = source_summary.applicant_elig_desc + target_summary.agency_name = source_summary.ac_name + target_summary.agency_email_address = source_summary.ac_email_addr + target_summary.agency_email_address_description = source_summary.ac_email_desc + target_summary.can_send_mail = convert_yn_bool(source_summary.sendmail) + target_summary.publisher_profile_id = source_summary.publisher_profile_id + target_summary.publisher_user_id = source_summary.publisheruid + target_summary.updated_by = source_summary.last_upd_id + target_summary.created_by = source_summary.creator_id + + # Some fields either are named different in synopsis/forecast + # or only come from one of those tables, so handle those here + if isinstance(source_summary, (Tsynopsis, TsynopsisHist)): + target_summary.summary_description = source_summary.syn_desc + target_summary.agency_code = source_summary.a_sa_code + target_summary.agency_phone_number = source_summary.ac_phone_number + + # Synopsis only fields + target_summary.agency_contact_description = source_summary.agency_contact_desc + target_summary.close_date = source_summary.response_date + target_summary.close_date_description = source_summary.response_date_desc + target_summary.unarchive_date = source_summary.unarchive_date + + else: # TForecast & TForecastHist + target_summary.summary_description = source_summary.forecast_desc + target_summary.agency_code = source_summary.agency_code + target_summary.agency_phone_number = source_summary.ac_phone + + # Forecast only fields + target_summary.forecasted_post_date = source_summary.est_synopsis_posting_date + target_summary.forecasted_close_date = source_summary.est_appl_response_date + target_summary.forecasted_close_date_description = ( + source_summary.est_appl_response_date_desc + ) + target_summary.forecasted_award_date = source_summary.est_award_date + target_summary.forecasted_project_start_date = source_summary.est_project_start_date + target_summary.fiscal_year = source_summary.fiscal_year + + # Historical only + if isinstance(source_summary, (TsynopsisHist, TforecastHist)): + target_summary.is_deleted = convert_action_type_to_is_deleted(source_summary.action_type) + else: + target_summary.is_deleted = False + + transform_update_create_timestamp(source_summary, target_summary, log_extra=log_extra) + + return target_summary + + +def convert_est_timestamp_to_utc(timestamp: datetime | None) -> datetime | None: + if timestamp is None: + return None + + # The timestamps we get from the legacy system have no timezone info + # but we know the database uses US Eastern timezone by default + # + # First add the America/New_York timezone without any other modification + aware_timestamp = datetime_util.make_timezone_aware(timestamp, "US/Eastern") + # Then adjust the timezone to UTC this will handle any DST or other conversion complexities + return datetime_util.adjust_timezone(aware_timestamp, "UTC") + + +def transform_update_create_timestamp( + source: StagingBase, target: TimestampMixin, log_extra: dict | None = None +) -> None: + # Convert the source timestamps to UTC + # Note: the type ignores are because created_date/last_upd_date are added + # on the individual class definitions, not the base class - due to how + # we need to maintain the column order of the legacy system. + # Every legacy table does have these columns. + created_timestamp = convert_est_timestamp_to_utc(source.created_date) # type: ignore[attr-defined] + updated_timestamp = convert_est_timestamp_to_utc(source.last_upd_date) # type: ignore[attr-defined] + + if created_timestamp is not None: + target.created_at = created_timestamp + else: + # This is incredibly rare, but possible - because our system requires + # we set something, we'll default to the current time and log a warning. + if log_extra is None: + log_extra = {} + + logger.warning( + f"{source.__class__} does not have a created_date timestamp set, setting value to now.", + extra=log_extra, + ) + target.created_at = datetime_util.utcnow() + + if updated_timestamp is not None: + target.updated_at = updated_timestamp + else: + # In the legacy system, they don't set whether something was updated + # until it receives an update. We always set the value, and on initial insert + # want it to be the same as the created_at. + target.updated_at = target.created_at + + +def convert_yn_bool(value: str | None) -> bool | None: + # Booleans in the Oracle database are stored as varchar/char + # columns with the values as Y/N + if value is None or value == "": + return None + + if value == "Y": + return True + + if value == "N": + return False + + # Just in case the column isn't actually a boolean + raise ValueError("Unexpected Y/N bool value: %s" % value) + + +def convert_action_type_to_is_deleted(value: str | None) -> bool | None: + if value is None or value == "": + return None + + if value == "D": # D = Delete + return True + + if value == "U": # U = Update + return False + + raise ValueError("Unexpected action type value: %s" % value) + + +def convert_numeric_str_to_int(value: str | None) -> int | None: + if value is None or value == "": + return None + + try: + return int(value) + except ValueError: + # From what we've found in the legacy data, some of these numeric strings + # are written out as "none", "not available", "n/a" or similar. All of these + # we're fine with collectively treating as null-equivalent + return None + + +def get_log_extra_summary(source_summary: SourceSummary) -> dict: + return { + "opportunity_id": source_summary.opportunity_id, + "is_forecast": source_summary.is_forecast, + # This value only exists on non-historical records + # use getattr instead of an isinstance if/else for simplicity + "revision_number": getattr(source_summary, "revision_number", None), + "table_name": source_summary.__tablename__, + } diff --git a/api/src/db/foreign/dialect.py b/api/src/db/foreign/dialect.py index 8603a04c3..4e96206f8 100644 --- a/api/src/db/foreign/dialect.py +++ b/api/src/db/foreign/dialect.py @@ -6,6 +6,7 @@ import re import sqlalchemy +import sqlalchemy.dialects.postgresql class ForeignTableDDLCompiler(sqlalchemy.sql.compiler.DDLCompiler): diff --git a/api/src/db/migrations/env.py b/api/src/db/migrations/env.py index 5ef78cc40..74968ef49 100644 --- a/api/src/db/migrations/env.py +++ b/api/src/db/migrations/env.py @@ -44,6 +44,7 @@ def include_object( if type_ == "schema" and getattr(object, "schema", None) is not None: return False + if type_ == "table" and name is not None and name.startswith("foreign_"): # We create foreign tables to an Oracle database, if we see those locally # just ignore them as they aren't something we want included in Alembic diff --git a/api/src/db/models/staging/forecast.py b/api/src/db/models/staging/forecast.py index 1fa99bdc4..f80163302 100644 --- a/api/src/db/models/staging/forecast.py +++ b/api/src/db/models/staging/forecast.py @@ -1,14 +1,40 @@ +from sqlalchemy.orm import Mapped, relationship + from src.db.legacy_mixin import forecast_mixin from src.db.models.staging.staging_base import StagingBase, StagingParamMixin +from .opportunity import Topportunity + class Tforecast(StagingBase, forecast_mixin.TforecastMixin, StagingParamMixin): __tablename__ = "tforecast" + opportunity: Mapped[Topportunity | None] = relationship( + Topportunity, + primaryjoin="Tforecast.opportunity_id == foreign(Topportunity.opportunity_id)", + uselist=False, + overlaps="opportunity", + ) + + @property + def is_forecast(self) -> bool: + return True + class TforecastHist(StagingBase, forecast_mixin.TforecastHistMixin, StagingParamMixin): __tablename__ = "tforecast_hist" + opportunity: Mapped[Topportunity | None] = relationship( + Topportunity, + primaryjoin="TforecastHist.opportunity_id == foreign(Topportunity.opportunity_id)", + uselist=False, + overlaps="opportunity", + ) + + @property + def is_forecast(self) -> bool: + return True + class TapplicanttypesForecast( StagingBase, forecast_mixin.TapplicanttypesForecastMixin, StagingParamMixin diff --git a/api/src/db/models/staging/synopsis.py b/api/src/db/models/staging/synopsis.py index 574c1b1c1..e1e828045 100644 --- a/api/src/db/models/staging/synopsis.py +++ b/api/src/db/models/staging/synopsis.py @@ -1,14 +1,40 @@ +from sqlalchemy.orm import Mapped, relationship + from src.db.legacy_mixin import synopsis_mixin from src.db.models.staging.staging_base import StagingBase, StagingParamMixin +from .opportunity import Topportunity + class Tsynopsis(StagingBase, synopsis_mixin.TsynopsisMixin, StagingParamMixin): __tablename__ = "tsynopsis" + opportunity: Mapped[Topportunity | None] = relationship( + Topportunity, + primaryjoin="Tsynopsis.opportunity_id == foreign(Topportunity.opportunity_id)", + uselist=False, + overlaps="opportunity", + ) + + @property + def is_forecast(self) -> bool: + return False + class TsynopsisHist(StagingBase, synopsis_mixin.TsynopsisHistMixin, StagingParamMixin): __tablename__ = "tsynopsis_hist" + opportunity: Mapped[Topportunity | None] = relationship( + Topportunity, + primaryjoin="TsynopsisHist.opportunity_id == foreign(Topportunity.opportunity_id)", + uselist=False, + overlaps="opportunity", + ) + + @property + def is_forecast(self) -> bool: + return False + class TapplicanttypesSynopsis( StagingBase, synopsis_mixin.TapplicanttypesSynopsisMixin, StagingParamMixin diff --git a/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py b/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py index 8f5f56f5b..2449c23f6 100644 --- a/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py +++ b/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py @@ -1,23 +1,27 @@ -from datetime import datetime from typing import Tuple import pytest -from freezegun import freeze_time -from src.constants.lookup_constants import OpportunityCategory -from src.data_migration.transformation.transform_oracle_data_task import ( - TransformOracleDataTask, - transform_opportunity_category, - transform_update_create_timestamp, +from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask +from src.db.models.opportunity_models import ( + Opportunity, + OpportunityAssistanceListing, + OpportunitySummary, ) -from src.db.models.opportunity_models import Opportunity, OpportunityAssistanceListing +from src.db.models.staging.forecast import TforecastHist from src.db.models.staging.opportunity import Topportunity, TopportunityCfda +from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist from tests.conftest import BaseTestClass from tests.src.db.models.factories import ( OpportunityAssistanceListingFactory, OpportunityFactory, + OpportunitySummaryFactory, + StagingTforecastFactory, + StagingTforecastHistFactory, StagingTopportunityCfdaFactory, StagingTopportunityFactory, + StagingTsynopsisFactory, + StagingTsynopsisHistFactory, ) @@ -85,6 +89,48 @@ def setup_cfda( return source_cfda +def setup_synopsis_forecast( + is_forecast: bool, + revision_number: int | None, + create_existing: bool, + opportunity: Opportunity, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if source_values is None: + source_values = {} + + if is_forecast: + if revision_number is None: + factory_cls = StagingTforecastFactory + else: + factory_cls = StagingTforecastHistFactory + else: + if revision_number is None: + factory_cls = StagingTsynopsisFactory + else: + factory_cls = StagingTsynopsisHistFactory + + if revision_number is not None: + source_values["revision_number"] = revision_number + + source_summary = factory_cls.create( + **source_values, + opportunity=None, # To override the factory trying to create something + opportunity_id=opportunity.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + ) + + if create_existing: + OpportunitySummaryFactory.create( + opportunity=opportunity, is_forecast=is_forecast, revision_number=revision_number + ) + + return source_summary + + def validate_matching_fields( source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool ): @@ -186,6 +232,88 @@ def validate_assistance_listing( ) +def validate_opportunity_summary( + db_session, source_summary, expect_in_db: bool = True, expect_values_to_match: bool = True +): + revision_number = None + is_forecast = source_summary.is_forecast + if isinstance(source_summary, (TsynopsisHist, TforecastHist)): + revision_number = source_summary.revision_number + + opportunity_summary = ( + db_session.query(OpportunitySummary) + .filter( + OpportunitySummary.opportunity_id == source_summary.opportunity_id, + OpportunitySummary.revision_number == revision_number, + OpportunitySummary.is_forecast == is_forecast, + ) + .one_or_none() + ) + + if not expect_in_db: + assert opportunity_summary is None + return + + matching_fields = [ + ("version_nbr", "version_number"), + ("posting_date", "post_date"), + ("archive_date", "archive_date"), + ("fd_link_url", "additional_info_url"), + ("fd_link_desc", "additional_info_url_description"), + ("modification_comments", "modification_comments"), + ("oth_cat_fa_desc", "funding_category_description"), + ("applicant_elig_desc", "applicant_eligibility_description"), + ("ac_name", "agency_name"), + ("ac_email_addr", "agency_email_address"), + ("ac_email_desc", "agency_email_address_description"), + ("publisher_profile_id", "publisher_profile_id"), + ("publisheruid", "publisher_user_id"), + ("last_upd_id", "updated_by"), + ("creator_id", "created_by"), + ] + + if isinstance(source_summary, (Tsynopsis, TsynopsisHist)): + matching_fields.extend( + [ + ("syn_desc", "summary_description"), + ("a_sa_code", "agency_code"), + ("ac_phone_number", "agency_phone_number"), + ("agency_contact_desc", "agency_contact_description"), + ("response_date", "close_date"), + ("response_date_desc", "close_date_description"), + ("unarchive_date", "unarchive_date"), + ] + ) + else: # Forecast+ForecastHist + matching_fields.extend( + [ + ("forecast_desc", "summary_description"), + ("agency_code", "agency_code"), + ("ac_phone", "agency_phone_number"), + ("est_synopsis_posting_date", "forecasted_post_date"), + ("est_appl_response_date", "forecasted_close_date"), + ("est_appl_response_date_desc", "forecasted_close_date_description"), + ("est_award_date", "forecasted_award_date"), + ("est_project_start_date", "forecasted_project_start_date"), + ("fiscal_year", "fiscal_year"), + ] + ) + + # History only fields + is_deleted = False + if isinstance(source_summary, (TsynopsisHist, TforecastHist)): + matching_fields.extend([("revision_number", "revision_number")]) + + is_deleted = source_summary.action_type == "D" + + assert opportunity_summary is not None + validate_matching_fields( + source_summary, opportunity_summary, matching_fields, expect_values_to_match + ) + + assert opportunity_summary.is_deleted == is_deleted + + class TestTransformOpportunity(BaseTestClass): @pytest.fixture() def transform_oracle_data_task( @@ -431,71 +559,224 @@ def test_process_assistance_listing_delete_but_current_missing( validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False) -@pytest.mark.parametrize( - "value,expected_value", - [ - # Just check a few - ("D", OpportunityCategory.DISCRETIONARY), - ("M", OpportunityCategory.MANDATORY), - ("O", OpportunityCategory.OTHER), - (None, None), - ("", None), - ], -) -def test_transform_opportunity_category(value, expected_value): - assert transform_opportunity_category(value) == expected_value - - -@pytest.mark.parametrize("value", ["A", "B", "mandatory", "other", "hello"]) -def test_transform_opportunity_category_unexpected_value(value): - with pytest.raises(ValueError, match="Unrecognized opportunity category"): - transform_opportunity_category(value) - - -@pytest.mark.parametrize( - "created_date,last_upd_date,expected_created_at,expected_updated_at", - [ - ### Using string timestamps rather than defining the dates directly for readability - # A few happy scenarios - ( - "2020-01-01T12:00:00", - "2020-06-01T12:00:00", - "2020-01-01T17:00:00+00:00", - "2020-06-01T16:00:00+00:00", - ), - ( - "2021-01-31T21:30:15", - "2021-12-31T23:59:59", - "2021-02-01T02:30:15+00:00", - "2022-01-01T04:59:59+00:00", - ), - # Leap year handling - ( - "2024-02-28T23:00:59", - "2024-02-29T19:10:10", - "2024-02-29T04:00:59+00:00", - "2024-03-01T00:10:10+00:00", - ), - # last_upd_date is None, created_date is used for both - ("2020-05-31T16:32:08", None, "2020-05-31T20:32:08+00:00", "2020-05-31T20:32:08+00:00"), - ("2020-07-15T20:00:00", None, "2020-07-16T00:00:00+00:00", "2020-07-16T00:00:00+00:00"), - # both input values are None, the current time is used (which we set for the purposes of this test below) - (None, None, "2023-05-10T12:00:00+00:00", "2023-05-10T12:00:00+00:00"), - ], -) -@freeze_time("2023-05-10 12:00:00", tz_offset=0) -def test_transform_update_create_timestamp( - created_date, last_upd_date, expected_created_at, expected_updated_at -): - created_datetime = datetime.fromisoformat(created_date) if created_date is not None else None - last_upd_datetime = datetime.fromisoformat(last_upd_date) if last_upd_date is not None else None +class TestTransformOpportunitySummary(BaseTestClass): + @pytest.fixture() + def transform_oracle_data_task( + self, db_session, enable_factory_create, truncate_opportunities + ) -> TransformOracleDataTask: + return TransformOracleDataTask(db_session) + + def test_process_opportunity_summaries(self, db_session, transform_oracle_data_task): + # Basic inserts + opportunity1 = OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_insert1 = setup_synopsis_forecast( + is_forecast=True, revision_number=None, create_existing=False, opportunity=opportunity1 + ) + synopsis_insert1 = setup_synopsis_forecast( + is_forecast=False, revision_number=None, create_existing=False, opportunity=opportunity1 + ) + forecast_hist_insert1 = setup_synopsis_forecast( + is_forecast=True, revision_number=1, create_existing=False, opportunity=opportunity1 + ) + synopsis_hist_insert1 = setup_synopsis_forecast( + is_forecast=False, revision_number=1, create_existing=False, opportunity=opportunity1 + ) - source = StagingTopportunityFactory.build( - created_date=created_datetime, last_upd_date=last_upd_datetime + # Mix of updates and inserts, somewhat resembling what happens when summary objects + # get moved to the historical table (we'd update the synopsis/forecast records, and create new historical) + opportunity2 = OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_update1 = setup_synopsis_forecast( + is_forecast=True, revision_number=None, create_existing=True, opportunity=opportunity2 + ) + synopsis_update1 = setup_synopsis_forecast( + is_forecast=False, revision_number=None, create_existing=True, opportunity=opportunity2 + ) + forecast_hist_update1 = setup_synopsis_forecast( + is_forecast=True, revision_number=1, create_existing=True, opportunity=opportunity2 + ) + synopsis_hist_update1 = setup_synopsis_forecast( + is_forecast=False, revision_number=1, create_existing=True, opportunity=opportunity2 + ) + forecast_hist_insert2 = setup_synopsis_forecast( + is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity2 + ) + synopsis_hist_insert2 = setup_synopsis_forecast( + is_forecast=False, revision_number=2, create_existing=False, opportunity=opportunity2 + ) + + # Mix of inserts, updates, and deletes + opportunity3 = OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_delete1 = setup_synopsis_forecast( + is_forecast=True, + revision_number=None, + create_existing=True, + is_delete=True, + opportunity=opportunity3, + ) + synopsis_delete1 = setup_synopsis_forecast( + is_forecast=False, + revision_number=None, + create_existing=True, + is_delete=True, + opportunity=opportunity3, + ) + forecast_hist_insert3 = setup_synopsis_forecast( + is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity3 + ) + synopsis_hist_update2 = setup_synopsis_forecast( + is_forecast=False, + revision_number=1, + create_existing=True, + source_values={"action_type": "D"}, + opportunity=opportunity3, + ) + + # A few error scenarios + opportunity4 = OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_delete_but_current_missing = setup_synopsis_forecast( + is_forecast=True, + revision_number=None, + create_existing=False, + is_delete=True, + opportunity=opportunity4, + ) + synopsis_update_invalid_yn_field = setup_synopsis_forecast( + is_forecast=False, + revision_number=None, + create_existing=True, + source_values={"sendmail": "E"}, + opportunity=opportunity4, + ) + synopsis_hist_insert_invalid_yn_field = setup_synopsis_forecast( + is_forecast=False, + revision_number=1, + create_existing=False, + source_values={"cost_sharing": "1"}, + opportunity=opportunity4, + ) + forecast_hist_update_invalid_action_type = setup_synopsis_forecast( + is_forecast=True, + revision_number=2, + create_existing=True, + source_values={"action_type": "X"}, + opportunity=opportunity4, + ) + + transform_oracle_data_task.process_opportunity_summaries() + + validate_opportunity_summary(db_session, forecast_insert1) + validate_opportunity_summary(db_session, synopsis_insert1) + validate_opportunity_summary(db_session, forecast_hist_insert1) + validate_opportunity_summary(db_session, synopsis_hist_insert1) + validate_opportunity_summary(db_session, forecast_hist_insert2) + validate_opportunity_summary(db_session, synopsis_hist_insert2) + validate_opportunity_summary(db_session, forecast_hist_insert3) + + validate_opportunity_summary(db_session, forecast_update1) + validate_opportunity_summary(db_session, synopsis_update1) + validate_opportunity_summary(db_session, forecast_hist_update1) + validate_opportunity_summary(db_session, synopsis_hist_update1) + validate_opportunity_summary(db_session, synopsis_hist_update2) + + validate_opportunity_summary(db_session, forecast_delete1, expect_in_db=False) + validate_opportunity_summary(db_session, synopsis_delete1, expect_in_db=False) + + validate_opportunity_summary( + db_session, forecast_delete_but_current_missing, expect_in_db=False + ) + validate_opportunity_summary( + db_session, + synopsis_update_invalid_yn_field, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_opportunity_summary( + db_session, synopsis_hist_insert_invalid_yn_field, expect_in_db=False + ) + validate_opportunity_summary( + db_session, + forecast_hist_update_invalid_action_type, + expect_in_db=True, + expect_values_to_match=False, + ) + + metrics = transform_oracle_data_task.metrics + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 18 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 7 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 5 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 4 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 4 + transform_oracle_data_task.process_opportunity_summaries() + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 22 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 7 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 5 + assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 8 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] ) - destination = OpportunityFactory.build() + def test_process_opportunity_summary_delete_but_current_missing( + self, db_session, transform_oracle_data_task, is_forecast, revision_number + ): + opportunity = OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + delete_but_current_missing = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + is_delete=True, + opportunity=opportunity, + ) + + with pytest.raises( + ValueError, match="Cannot delete opportunity summary as it does not exist" + ): + transform_oracle_data_task.process_opportunity_summary( + delete_but_current_missing, None, opportunity + ) - transform_update_create_timestamp(source, destination) + @pytest.mark.parametrize( + "is_forecast,revision_number,source_values,expected_error", + [ + (True, None, {"sendmail": "z"}, "Unexpected Y/N bool value: z"), + (False, None, {"cost_sharing": "v"}, "Unexpected Y/N bool value: v"), + (True, 5, {"action_type": "T"}, "Unexpected action type value: T"), + (False, 10, {"action_type": "5"}, "Unexpected action type value: 5"), + ], + ) + def test_process_opportunity_summary_invalid_value_errors( + self, + db_session, + transform_oracle_data_task, + is_forecast, + revision_number, + source_values, + expected_error, + ): + opportunity = OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + source_summary = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + opportunity=opportunity, + source_values=source_values, + ) - assert destination.created_at == datetime.fromisoformat(expected_created_at) - assert destination.updated_at == datetime.fromisoformat(expected_updated_at) + with pytest.raises(ValueError, match=expected_error): + transform_oracle_data_task.process_opportunity_summary( + source_summary, None, opportunity + ) diff --git a/api/tests/src/data_migration/transformation/test_transform_util.py b/api/tests/src/data_migration/transformation/test_transform_util.py new file mode 100644 index 000000000..98bfcdb68 --- /dev/null +++ b/api/tests/src/data_migration/transformation/test_transform_util.py @@ -0,0 +1,122 @@ +from datetime import datetime + +import pytest +from freezegun import freeze_time + +from src.constants.lookup_constants import OpportunityCategory +from src.data_migration.transformation import transform_util +from tests.src.db.models.factories import OpportunityFactory, StagingTopportunityFactory + + +@pytest.mark.parametrize( + "value,expected_value", + [ + # Just check a few + ("D", OpportunityCategory.DISCRETIONARY), + ("M", OpportunityCategory.MANDATORY), + ("O", OpportunityCategory.OTHER), + (None, None), + ("", None), + ], +) +def test_transform_opportunity_category(value, expected_value): + assert transform_util.transform_opportunity_category(value) == expected_value + + +@pytest.mark.parametrize("value", ["A", "B", "mandatory", "other", "hello"]) +def test_transform_opportunity_category_unexpected_value(value): + with pytest.raises(ValueError, match="Unrecognized opportunity category"): + transform_util.transform_opportunity_category(value) + + +@pytest.mark.parametrize( + "created_date,last_upd_date,expected_created_at,expected_updated_at", + [ + ### Using string timestamps rather than defining the dates directly for readability + # A few happy scenarios + ( + "2020-01-01T12:00:00", + "2020-06-01T12:00:00", + "2020-01-01T17:00:00+00:00", + "2020-06-01T16:00:00+00:00", + ), + ( + "2021-01-31T21:30:15", + "2021-12-31T23:59:59", + "2021-02-01T02:30:15+00:00", + "2022-01-01T04:59:59+00:00", + ), + # Leap year handling + ( + "2024-02-28T23:00:59", + "2024-02-29T19:10:10", + "2024-02-29T04:00:59+00:00", + "2024-03-01T00:10:10+00:00", + ), + # last_upd_date is None, created_date is used for both + ("2020-05-31T16:32:08", None, "2020-05-31T20:32:08+00:00", "2020-05-31T20:32:08+00:00"), + ("2020-07-15T20:00:00", None, "2020-07-16T00:00:00+00:00", "2020-07-16T00:00:00+00:00"), + # both input values are None, the current time is used (which we set for the purposes of this test below) + (None, None, "2023-05-10T12:00:00+00:00", "2023-05-10T12:00:00+00:00"), + ], +) +@freeze_time("2023-05-10 12:00:00", tz_offset=0) +def test_transform_update_create_timestamp( + created_date, last_upd_date, expected_created_at, expected_updated_at +): + created_datetime = datetime.fromisoformat(created_date) if created_date is not None else None + last_upd_datetime = datetime.fromisoformat(last_upd_date) if last_upd_date is not None else None + + source = StagingTopportunityFactory.build( + created_date=created_datetime, last_upd_date=last_upd_datetime + ) + destination = OpportunityFactory.build() + + transform_util.transform_update_create_timestamp(source, destination) + + assert destination.created_at == datetime.fromisoformat(expected_created_at) + assert destination.updated_at == datetime.fromisoformat(expected_updated_at) + + +@pytest.mark.parametrize( + "value,expected_value", [("Y", True), ("N", False), ("", None), (None, None)] +) +def test_convert_yn_boolean(value, expected_value): + assert transform_util.convert_yn_bool(value) == expected_value + + +@pytest.mark.parametrize("value", ["X", "Z", "y", "n", "1", "0"]) +def test_convert_yn_boolean_unexpected_value(value): + with pytest.raises(ValueError, match="Unexpected Y/N bool value"): + transform_util.convert_yn_bool(value) + + +@pytest.mark.parametrize( + "value,expected_value", [("D", True), ("U", False), ("", None), (None, None)] +) +def test_convert_action_type_to_is_deleted(value, expected_value): + assert transform_util.convert_action_type_to_is_deleted(value) == expected_value + + +@pytest.mark.parametrize("value", ["A", "B", "d", "u"]) +def test_convert_action_type_to_is_deleted_unexpected_value(value): + with pytest.raises(ValueError, match="Unexpected action type value"): + transform_util.convert_action_type_to_is_deleted(value) + + +@pytest.mark.parametrize( + "value,expected_value", + [ + ("1", 1), + ("0", 0), + ("123123123", 123123123), + ("-5", -5), + ("", None), + (None, None), + ("words", None), + ("zero", None), + ("n/a", None), + ], +) +def test_convert_numeric_str_to_int(value, expected_value): + assert transform_util.convert_numeric_str_to_int(value) == expected_value diff --git a/api/tests/src/db/models/factories.py b/api/tests/src/db/models/factories.py index f7d6e9804..86e2a9990 100644 --- a/api/tests/src/db/models/factories.py +++ b/api/tests/src/db/models/factories.py @@ -15,6 +15,7 @@ import factory.fuzzy import faker from faker.providers import BaseProvider +from sqlalchemy import func from sqlalchemy.orm import scoped_session import src.adapters.db as db @@ -33,6 +34,11 @@ def sometimes_none(factory_value, none_chance: float = 0.5): + return factory.Maybe( + decider=factory.LazyAttribute(lambda s: random.random() > none_chance), + yes_declaration=factory_value, + no_declaration=None, + ) if random.random() > none_chance: return factory_value @@ -135,6 +141,8 @@ class CustomProvider(BaseProvider): "{{word}}-###-##", ] + YN_BOOLEAN_VALUES = ["Y", "N"] + def agency(self) -> str: return self.random_element(self.AGENCIES) @@ -170,6 +178,9 @@ def summary_description(self) -> str: pattern = self.random_element(self.SUMMARY_DESCRIPTION_FORMATS) return self.generator.parse(pattern) + def yn_boolean(self) -> str: + return self.random_element(self.YN_BOOLEAN_VALUES) + fake = faker.Faker() fake.add_provider(CustomProvider) @@ -221,6 +232,20 @@ class OpportunityFactory(BaseFactory): class Meta: model = opportunity_models.Opportunity + @classmethod + def _setup_next_sequence(cls): + try: + value = ( + get_db_session() + .query(func.max(opportunity_models.Opportunity.opportunity_id)) + .scalar() + ) + if value is not None: + return value + 1 + return 1 + except Exception: + return 1 + opportunity_id = factory.Sequence(lambda n: n) opportunity_number = factory.Faker("opportunity_number") @@ -694,6 +719,173 @@ class Params: ) +class StagingTsynopsisFactory(BaseFactory): + class Meta: + model = staging.synopsis.Tsynopsis + + opportunity = factory.SubFactory(StagingTopportunityFactory) + opportunity_id = factory.LazyAttribute(lambda s: s.opportunity.opportunity_id) + + posting_date = factory.Faker("date_between", start_date="-3w", end_date="now") + response_date = factory.Faker("date_between", start_date="+2w", end_date="+3w") + archive_date = factory.Faker("date_between", start_date="+3w", end_date="+4w") + unarchive_date = sometimes_none( + factory.Faker("date_between", start_date="+6w", end_date="+7w"), none_chance=0.9 + ) + syn_desc = factory.Faker("summary_description") + oth_cat_fa_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) + + cost_sharing = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.1) + # These int values are stored as strings + number_of_awards = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(1, 25))), none_chance=0.1 + ) + est_funding = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(25_000, 25_000_000, step=5_000))), + none_chance=0.1, + ) + award_ceiling = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(10_000, 25_000, step=5_000))), + none_chance=0.1, + ) + award_floor = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(0, 10_000, step=5_000))), none_chance=0.1 + ) + + fd_link_url = factory.Faker("relevant_url") + fd_link_desc = factory.Faker("additional_info_desc") + agency_contact_desc = factory.Faker("agency_contact_description") + ac_email_addr = factory.Faker("email") + ac_email_desc = factory.LazyAttribute(lambda s: f"Contact {s.ac_name} via email") + a_sa_code = factory.Faker("agency") + ac_phone_number = Generators.PhoneNumber + ac_name = factory.Faker("agency_name") + + created_date = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") + last_upd_date = sometimes_none( + factory.Faker("date_time_between", start_date="-5y", end_date="now") + ) + create_ts = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") + sendmail = sometimes_none(factory.Faker("yn_boolean")) + response_date_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=2)) + applicant_elig_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=5)) + version_nbr = factory.Faker("random_int", min=0, max=10) + modification_comments = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) + publisheruid = sometimes_none(factory.Faker("first_name")) + publisher_profile_id = sometimes_none(factory.Faker("random_int", min=1, max=99_999)) + + # Default to being a new insert/update + is_deleted = False + transformed_at = None + + class Params: + already_transformed = factory.Trait( + transformed_at=factory.Faker("date_time_between", start_date="-7d", end_date="-1d") + ) + + +class StagingTsynopsisHistFactory(StagingTsynopsisFactory): + class Meta: + model = staging.synopsis.TsynopsisHist + + revision_number = factory.Faker("random_int", min=1, max=25) + action_type = "U" # Update, put D for deleted + + class Params: + already_transformed = factory.Trait( + transformed_at=factory.Faker("date_time_between", start_date="-7d", end_date="-1d") + ) + + +class StagingTforecastFactory(BaseFactory): + class Meta: + model = staging.forecast.Tforecast + + opportunity = factory.SubFactory(StagingTopportunityFactory) + opportunity_id = factory.LazyAttribute(lambda s: s.opportunity.opportunity_id) + + posting_date = factory.Faker("date_between", start_date="-3w", end_date="now") + archive_date = factory.Faker("date_between", start_date="+3w", end_date="+4w") + forecast_desc = factory.Faker("summary_description") + oth_cat_fa_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) + + cost_sharing = sometimes_none(factory.Faker("yn_boolean"), none_chance=0.1) + # These int values are stored as strings + number_of_awards = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(1, 25))), none_chance=0.1 + ) + est_funding = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(25_000, 25_000_000, step=5_000))), + none_chance=0.1, + ) + award_ceiling = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(10_000, 25_000, step=5_000))), + none_chance=0.1, + ) + award_floor = sometimes_none( + factory.LazyFunction(lambda: str(fake.random_int(0, 10_000, step=5_000))), none_chance=0.1 + ) + + fd_link_url = factory.Faker("relevant_url") + fd_link_desc = factory.Faker("additional_info_desc") + ac_email_addr = factory.Faker("email") + ac_email_desc = factory.LazyAttribute(lambda s: f"Contact {s.ac_name} via email") + agency_code = factory.Faker("agency") + ac_phone = Generators.PhoneNumber + ac_name = factory.Faker("agency_name") + + created_date = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") + last_upd_date = sometimes_none( + factory.Faker("date_time_between", start_date="-5y", end_date="now") + ) + create_ts = factory.Faker("date_time_between", start_date="-10y", end_date="-5y") + sendmail = sometimes_none(factory.Faker("yn_boolean")) + applicant_elig_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=5)) + version_nbr = factory.Faker("random_int", min=0, max=10) + modification_comments = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) + publisheruid = sometimes_none(factory.Faker("first_name")) + publisher_profile_id = sometimes_none(factory.Faker("random_int", min=1, max=99_999)) + + est_synopsis_posting_date = sometimes_none( + factory.Faker("date_between", start_date="+2w", end_date="+3w") + ) + est_appl_response_date = sometimes_none( + factory.Faker("date_between", start_date="+4w", end_date="+6w") + ) + est_appl_response_date_desc = sometimes_none(factory.Faker("paragraph", nb_sentences=1)) + est_award_date = sometimes_none( + factory.Faker("date_between", start_date="+26w", end_date="+30w") + ) + est_project_start_date = sometimes_none( + factory.Faker("date_between", start_date="+30w", end_date="+52w") + ) + fiscal_year = factory.LazyAttribute( + lambda f: f.est_project_start_date.year if f.est_project_start_date else None + ) + + # Default to being a new insert/update + is_deleted = False + transformed_at = None + + class Params: + already_transformed = factory.Trait( + transformed_at=factory.Faker("date_time_between", start_date="-7d", end_date="-1d") + ) + + +class StagingTforecastHistFactory(StagingTforecastFactory): + class Meta: + model = staging.forecast.TforecastHist + + revision_number = factory.Faker("random_int", min=1, max=25) + action_type = "U" # Update, put D for deleted + + class Params: + already_transformed = factory.Trait( + transformed_at=factory.Faker("date_time_between", start_date="-7d", end_date="-1d") + ) + + #################################### # Transfer Table Factories ####################################