From 0934d734813046034186e5c66c691516dc65bd74 Mon Sep 17 00:00:00 2001 From: Michelle Brier Date: Tue, 6 Feb 2024 11:16:19 -0800 Subject: [PATCH] DDEX crawler (#7470) --- dev-tools/compose/docker-compose.ddex.yml | 10 ++ packages/ddex/.env.dev | 13 ++ packages/ddex/{webapp/server => }/.env.stage | 7 + packages/ddex/README.md | 43 ++++++ packages/ddex/ingester/.air.toml | 6 +- packages/ddex/ingester/README.md | 1 + packages/ddex/ingester/cmd/main.go | 38 ++++-- packages/ddex/ingester/common/common.go | 54 ++++++++ packages/ddex/ingester/crawler/crawler.go | 124 +++++++++++++++++- packages/ddex/ingester/go.mod | 3 + packages/ddex/ingester/go.sum | 8 ++ packages/ddex/ingester/indexer/indexer.go | 43 +++--- packages/ddex/ingester/parser/parser.go | 66 ++++------ packages/ddex/publisher/.env.stage | 3 - packages/ddex/publisher/README.md | 5 +- packages/ddex/publisher/package.json | 1 + packages/ddex/publisher/src/index.ts | 5 +- packages/ddex/webapp/README.md | 39 +----- .../src/components/Collection/Collection.tsx | 120 +++-------------- packages/ddex/webapp/server/.env.dev | 4 - packages/ddex/webapp/server/package.json | 1 + packages/ddex/webapp/server/src/app.ts | 3 + packages/ddex/webapp/server/src/index.ts | 7 +- .../webapp/server/src/services/dbService.ts | 19 --- .../ddex/webapp/server/src/services/s3.ts | 2 +- 25 files changed, 375 insertions(+), 250 deletions(-) create mode 100644 packages/ddex/.env.dev rename packages/ddex/{webapp/server => }/.env.stage (56%) create mode 100644 packages/ddex/ingester/common/common.go delete mode 100644 packages/ddex/publisher/.env.stage delete mode 100644 packages/ddex/webapp/server/.env.dev diff --git a/dev-tools/compose/docker-compose.ddex.yml b/dev-tools/compose/docker-compose.ddex.yml index 38b177b9d9d..9c4fd683aef 100644 --- a/dev-tools/compose/docker-compose.ddex.yml +++ b/dev-tools/compose/docker-compose.ddex.yml @@ -13,6 +13,8 @@ services: environment: - DDEX_PORT=9000 - DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0 + env_file: + - ${PROJECT_ROOT}/packages/ddex/.env ports: - "9000:9000" networks: @@ -27,6 +29,8 @@ services: dockerfile: ${PROJECT_ROOT}/packages/ddex/ingester/Dockerfile environment: - DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0 + env_file: + - ${PROJECT_ROOT}/packages/ddex/.env depends_on: ddex-mongo: condition: service_healthy @@ -43,6 +47,8 @@ services: dockerfile: ${PROJECT_ROOT}/packages/ddex/ingester/Dockerfile environment: - DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0 + env_file: + - ${PROJECT_ROOT}/packages/ddex/.env depends_on: ddex-mongo: condition: service_healthy @@ -59,6 +65,8 @@ services: dockerfile: ${PROJECT_ROOT}/packages/ddex/ingester/Dockerfile environment: - DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0 + env_file: + - ${PROJECT_ROOT}/packages/ddex/.env depends_on: ddex-mongo: condition: service_healthy @@ -79,6 +87,8 @@ services: TURBO_TOKEN: '${TURBO_TOKEN}' environment: - DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0 + env_file: + - ${PROJECT_ROOT}/packages/ddex/.env depends_on: ddex-mongo: condition: service_healthy diff --git a/packages/ddex/.env.dev b/packages/ddex/.env.dev new file mode 100644 index 00000000000..8a89825b9ce --- /dev/null +++ b/packages/ddex/.env.dev @@ -0,0 +1,13 @@ +NODE_ENV='dev' + +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION= +AWS_BUCKET_RAW= +AWS_BUCKET_INDEXED= + +DDEX_KEY= +DDEX_SECRET= + +# use stage optimizely +OPTIMIZELY_SDK_KEY='MX4fYBgANQetvmBXGpuxzF' diff --git a/packages/ddex/webapp/server/.env.stage b/packages/ddex/.env.stage similarity index 56% rename from packages/ddex/webapp/server/.env.stage rename to packages/ddex/.env.stage index 03713b9f835..d5c3cbeb2ce 100644 --- a/packages/ddex/webapp/server/.env.stage +++ b/packages/ddex/.env.stage @@ -1,4 +1,11 @@ NODE_ENV='stage' + +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION='us-west-2' +AWS_BUCKET_RAW='ddex-dev-audius-raw' +AWS_BUCKET_INDEXED='ddex-dev-audius-indexed' + DDEX_KEY='49d5e13d355709b615b7cce7369174fb240b6b39' DDEX_SECRET='2b2c2b90d9a489234ae629a5284de84fb0633306257f17667aaebf2345d92152' OPTIMIZELY_SDK_KEY='MX4fYBgANQetvmBXGpuxzF' diff --git a/packages/ddex/README.md b/packages/ddex/README.md index 25192af3a85..64df2aa076d 100644 --- a/packages/ddex/README.md +++ b/packages/ddex/README.md @@ -5,6 +5,15 @@ Processes and uploads DDEX releases to Audius. ## Local Dev DDEX requires these services: `ddex-webapp`, `ddex-crawler`, `ddex-indexer`, `ddex-parser`, `ddex-publisher`, `ddex-mongo`. +### Env configuration +All services read from `.env`. + +To use stage envs: `cp .env.stage .env` + +To use dev envs: `cp .env.dev .env` + +Fill in all missing values. See the `Creating a bucket in S3` section below for how to set up S3. + ### Setup 1. (At the monorepo root) Generate a keyfile for mongodb: ``` @@ -24,3 +33,37 @@ To access the ddex db via the mongo shell: `docker exec -it ddex-mongo mongosh - ### Develop with hot reloading Each service can be run independently as long as `ddex-mongo` is up. See the respective subdirectories' READMEs. + +### Creating a bucket in S3 +1. Create a new bucket in the S3 console with the name `ddex-[dev|staging]-