Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUMULUS-3833: Migrate ReconciliationReports data from DynamoDB to Postgres #3797

Merged
merged 14 commits into from
Oct 3, 2024
Merged
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

## Phase 2 Release

### Migration Notes

#### CUMULUS-3833 Migration of ReconciliationReports from DynamoDB to Postgres after Cumulus is upgraded.

To invoke the Lambda and start the ReconciliationReport migration, you can use the AWS Console or CLI:

```bash
aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE
```

- `PREFIX` is your Cumulus deployment prefix.
- `OUTFILE` (**optional**) is the filepath where the Lambda output will be saved.
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved

### Replace ElasticSearch Phase 2

- **CUMULUS-3229**
Expand All @@ -26,6 +39,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
Elasticsearch
- Update `@cumlus/api/ecs/async-operation` to not update Elasticsearch index when
reporting status of async operation
- **CUMULUS-3833**
- Added `ReconciliationReportMigration` lambda to migrate ReconciliationReports from DynamoDB
to Postgres
- **CUMULUS-3837**
- Added `reconciliation_reports` table in RDS, including indexes
- Created pg model, types, and translation for `reconciliationReports` in `@cumulus/db`
Expand Down
7 changes: 7 additions & 0 deletions lambdas/reconciliation-report-migration/.nycrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"extends": "../../nyc.config.js",
"lines": 95.0,
"branches": 80.0,
"statements": 95.0,
"functions": 98.0
}
18 changes: 18 additions & 0 deletions lambdas/reconciliation-report-migration/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# ReconciliationReportMigration Lambda

The lambda migrates existing ReconciliationReports data from DynamoDB to PostgreSQL.

To invoke the Lambda and start the ReconciliationReport migration, you can use the AWS Console or CLI:

```bash
aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE
```

- `PREFIX` is your Cumulus deployment prefix.
- `OUTFILE` (**optional**) is the filepath where the Lambda output will be saved.

The result will be a migration summary. For example:

```
{"reconciliation_reports":{"total_dynamo_db_records":36,"migrated":36,"failed":0,"skipped":0}}
```
70 changes: 70 additions & 0 deletions lambdas/reconciliation-report-migration/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
data "aws_iam_policy_document" "lambda_assume_role_policy" {
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["lambda.amazonaws.com"]
}
}
}

resource "aws_iam_role" "reconciliation_report_migration" {
name = "${var.prefix}-reconciliation-report-migration"
assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json
permissions_boundary = var.permissions_boundary_arn

tags = var.tags
}

data "aws_iam_policy_document" "reconciliation_report_migration" {
statement {
actions = [
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DescribeNetworkInterfaces",
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:DescribeLogStreams",
"logs:PutLogEvents"
]
resources = ["*"]
}

statement {
actions = [
"dynamodb:Scan",
]
resources = [
var.dynamo_tables.reconciliation_reports.arn,
]
}

statement {
actions = [
"secretsmanager:GetSecretValue"
]
resources = [var.rds_user_access_secret_arn]
}
}

resource "aws_iam_role_policy" "reconciliation_report_migration" {
name = "${var.prefix}_reconciliation_report_migration"
role = aws_iam_role.reconciliation_report_migration.id
policy = data.aws_iam_policy_document.reconciliation_report_migration.json
}

resource "aws_security_group" "reconciliation_report_migration" {
count = length(var.lambda_subnet_ids) == 0 ? 0 : 1

name = "${var.prefix}-reconciliation-report-migration"
vpc_id = var.vpc_id

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}

tags = var.tags
}
35 changes: 35 additions & 0 deletions lambdas/reconciliation-report-migration/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
locals {
lambda_path = "${path.module}/dist/webpack/lambda.zip"
}

resource "aws_lambda_function" "reconciliation_report_migration" {
function_name = "${var.prefix}-ReconciliationReportMigration"
filename = local.lambda_path
source_code_hash = filebase64sha256(local.lambda_path)
handler = "index.handler"
role = aws_iam_role.reconciliation_report_migration.arn
runtime = "nodejs20.x"
timeout = lookup(var.lambda_timeouts, "ReconciliationReportMigration", 900)
memory_size = lookup(var.lambda_memory_sizes, "ReconciliationReportMigration", 1024)
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved

environment {
variables = {
databaseCredentialSecretArn = var.rds_user_access_secret_arn
ReconciliationReportsTable = var.dynamo_tables.reconciliation_reports.name
stackName = var.prefix
}
}

dynamic "vpc_config" {
for_each = length(var.lambda_subnet_ids) == 0 ? [] : [1]
content {
subnet_ids = var.lambda_subnet_ids
security_group_ids = compact([
aws_security_group.reconciliation_report_migration[0].id,
var.rds_security_group_id
])
}
}

tags = var.tags
}
3 changes: 3 additions & 0 deletions lambdas/reconciliation-report-migration/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "reconciliation_report_migration_function_arn" {
value = aws_lambda_function.reconciliation_report_migration.arn
}
45 changes: 45 additions & 0 deletions lambdas/reconciliation-report-migration/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name": "@cumulus/reconciliation-report-migration",
"version": "19.0.0",
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved
"description": "Lambda function for reconciliation report migration from DynamoDB to Postgres",
"author": "Cumulus Authors",
"license": "Apache-2.0",
"engines": {
"node": ">=20.12.2"
},
"private": true,
"main": "./dist/lambda/index.js",
"types": "./dist/lambda/index.d.ts",
"scripts": {
"clean": "rm -rf dist",
"build": "rm -rf dist && mkdir dist && npm run prepare && npm run webpack",
"build-lambda-zip": "cd dist/webpack && node ../../../../bin/zip.js lambda.zip index.js",
"package": "npm run clean && npm run prepare && npm run webpack && npm run build-lambda-zip",
"test": "../../node_modules/.bin/ava",
"test:ci": "../../scripts/run_package_ci_unit.sh",
"test:coverage": "../../node_modules/.bin/nyc npm test",
"prepare": "npm run tsc",
"tsc": "../../node_modules/.bin/tsc",
"tsc:listEmittedFiles": "../../node_modules/.bin/tsc --listEmittedFiles",
"webpack": "../../node_modules/.bin/webpack"
},
"ava": {
"files": [
"tests/**/*.js"
],
"timeout": "15m",
"failFast": true
},
"dependencies": {
"@cumulus/api": "19.0.0",
"@cumulus/aws-client": "19.0.0",
"@cumulus/common": "19.0.0",
"@cumulus/db": "19.0.0",
"@cumulus/errors": "19.0.0",
"@cumulus/logger": "19.0.0",
"@cumulus/types": "19.0.0",
"knex": "2.4.1",
"lodash": "^4.17.21",
"pg": "~8.12"
}
}
24 changes: 24 additions & 0 deletions lambdas/reconciliation-report-migration/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { getKnexClient } from '@cumulus/db';
import Logger from '@cumulus/logger';

import { migrateReconciliationReports } from './reconciliation-reports';
import { MigrationSummary } from './types';

const logger = new Logger({ sender: '@cumulus/reconciliation-report-migration' });

export interface HandlerEvent {
env?: NodeJS.ProcessEnv
}

export const handler = async (event: HandlerEvent): Promise<MigrationSummary> => {
const env = event.env ?? process.env;
const knex = await getKnexClient({ env });

try {
const migrationSummary = await migrateReconciliationReports(env, knex);
logger.info(JSON.stringify(migrationSummary));
return { reconciliation_reports: migrationSummary };
} finally {
await knex.destroy();
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import { Knex } from 'knex';

import { DynamoDbSearchQueue } from '@cumulus/aws-client';
import { envUtils } from '@cumulus/common';
import {
ReconciliationReportPgModel,
translateApiReconReportToPostgresReconReport,
} from '@cumulus/db';
import { RecordAlreadyMigrated, RecordDoesNotExist } from '@cumulus/errors';
import Logger from '@cumulus/logger';
import { ApiReconciliationReportRecord } from '@cumulus/types/api/reconciliation_reports';

import { MigrationResult } from './types';

const logger = new Logger({ sender: '@cumulus/data-migration/reconciliation-reports' });

export const migrateReconciliationReportRecord = async (
dynamoRecord: ApiReconciliationReportRecord,
knex: Knex
): Promise<void> => {
const reconReportPgModel = new ReconciliationReportPgModel();

let existingRecord;
try {
existingRecord = await reconReportPgModel.get(knex, { name: dynamoRecord.name });
} catch (error) {
if (!(error instanceof RecordDoesNotExist)) {
throw error;
}
}

if (existingRecord
&& dynamoRecord.updatedAt
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved
&& existingRecord.updated_at >= new Date(dynamoRecord.updatedAt)) {
throw new RecordAlreadyMigrated(`Reconciliation report ${dynamoRecord.name} was already migrated, skipping`);
}

const updatedRecord = translateApiReconReportToPostgresReconReport(
<ApiReconciliationReportRecord>dynamoRecord
);

await reconReportPgModel.upsert(knex, updatedRecord);
};

export const migrateReconciliationReports = async (
env: NodeJS.ProcessEnv,
knex: Knex
): Promise<MigrationResult> => {
const reconciliationReportsTable = envUtils.getRequiredEnvVar('ReconciliationReportsTable', env);

const searchQueue = new DynamoDbSearchQueue({
TableName: reconciliationReportsTable,
});

const migrationSummary = {
total_dynamo_db_records: 0,
migrated: 0,
failed: 0,
skipped: 0,
};

let record = await searchQueue.peek();
/* eslint-disable no-await-in-loop */
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved
while (record) {
migrationSummary.total_dynamo_db_records += 1;

try {
await migrateReconciliationReportRecord(record as any, knex);
migrationSummary.migrated += 1;
} catch (error) {
if (error instanceof RecordAlreadyMigrated) {
migrationSummary.skipped += 1;
} else {
migrationSummary.failed += 1;
logger.error(
`Could not create reconciliationReport record in RDS for Dynamo reconciliationReport name ${record.name}:`,
error
Nnaga1 marked this conversation as resolved.
Show resolved Hide resolved
);
}
}

await searchQueue.shift();
record = await searchQueue.peek();
}
/* eslint-enable no-await-in-loop */
logger.info(`successfully migrated ${migrationSummary.migrated} reconciliationReport records`);
return migrationSummary;
};
10 changes: 10 additions & 0 deletions lambdas/reconciliation-report-migration/src/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export type MigrationResult = {
total_dynamo_db_records: number,
skipped: number,
migrated: number,
failed: number,
};

export type MigrationSummary = {
reconciliation_reports: MigrationResult
};
Loading