Skip to content

Commit

Permalink
Combine ddex crawler+parser, and add dev cmd (#8203)
Browse files Browse the repository at this point in the history
  • Loading branch information
theoilie authored Apr 23, 2024
1 parent ffba9e9 commit a9e0313
Show file tree
Hide file tree
Showing 13 changed files with 347 additions and 405 deletions.
31 changes: 16 additions & 15 deletions dev-tools/audius-compose
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ def load_env(protocol_dir, service, environment):
is_flag=True,
help="Bring up the pedalboard containers",
)
@click.option("--ddex-deps", is_flag=True, help="Bring up only ddex dependency containers (Mongo and S3)")
@click.option("--ddex-release-by-release", is_flag=True, help="Bring up only ddex containers (release-by-release choreography)")
@click.option("--ddex-batched", is_flag=True, help="Bring up only ddex containers (batch choreography)")
@click.option("-a", "--args", type=str, multiple=True)
Expand All @@ -509,6 +510,7 @@ def up(
dashboard,
notifs,
pedalboard,
ddex_deps,
ddex_release_by_release,
ddex_batched,
args,
Expand All @@ -519,9 +521,9 @@ def up(
protocol_dir, discovery_provider_replicas, elasticsearch_replicas, prod
)

if ddex_release_by_release or ddex_batched:
if ddex_deps or ddex_release_by_release or ddex_batched:
generate_ddex_mongo_key(protocol_dir)
profiles = ["--profile=ddex"]
profiles = ["--profile=ddex-deps", *(["--profile=ddex"] if not ddex_deps else [])]
os.environ["DDEX_CHOREOGRAPHY"] = "ERNReleaseByRelease" if ddex_release_by_release else "ERNBatched"
else:
AAO_DIR = pathlib.Path(
Expand Down Expand Up @@ -689,19 +691,18 @@ def test_run(protocol_dir, service, args):
)

if result.returncode != 0 and service.startswith("ddex"):
for container in ["ddex-crawler", "ddex-parser"]:
logs_result = subprocess.run(
f"docker logs {container}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if logs_result.returncode == 0:
print(f"\nLogs for {container}:\n")
print(logs_result.stdout.decode("utf-8"))
else:
print(f"\nFailed to retrieve logs for {container}\n")
print(logs_result.stderr.decode("utf-8"))
logs_result = subprocess.run(
f"docker logs ddex-ingester",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if logs_result.returncode == 0:
print(f"\nLogs for ddex-ingester':\n")
print(logs_result.stdout.decode("utf-8"))
else:
print(f"\nFailed to retrieve logs for ddex-ingester\n")
print(logs_result.stderr.decode("utf-8"))

sys.exit(result.returncode)

Expand Down
48 changes: 10 additions & 38 deletions dev-tools/compose/docker-compose.ddex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,6 @@ services:
profiles:
- ddex

ddex-crawler:
container_name: ddex-crawler
build:
context: ${PROJECT_ROOT}/packages/ddex/ingester
dockerfile: ${PROJECT_ROOT}/packages/ddex/ingester/Dockerfile
environment:
- DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0
env_file: .env
entrypoint: ./ingester --service crawler
healthcheck:
test: ["CMD-SHELL", "pgrep ./ingester || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- ddex-network
profiles:
- ddex

ddex-parser:
container_name: ddex-parser
build:
context: ${PROJECT_ROOT}/packages/ddex/ingester
dockerfile: ${PROJECT_ROOT}/packages/ddex/ingester/Dockerfile
environment:
- DDEX_MONGODB_URL=mongodb://mongo:mongo@ddex-mongo:27017/ddex?authSource=admin&replicaSet=rs0
env_file: .env
entrypoint: ./ingester --service parser
healthcheck:
test: ["CMD-SHELL", "pgrep ./ingester || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- ddex-network
profiles:
- ddex

ddex-publisher:
container_name: ddex-publisher
build:
Expand Down Expand Up @@ -178,6 +140,16 @@ services:
build:
context: ${PROJECT_ROOT}/packages/ddex/ingester
dockerfile: ${PROJECT_ROOT}/packages/ddex/ingester/Dockerfile
entrypoint: ./ingester
healthcheck:
test: ["CMD-SHELL", "pgrep ./ingester || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- ddex-network
profiles:
- ddex

ddex-test:
container_name: ddex-test
Expand Down
31 changes: 6 additions & 25 deletions dev-tools/compose/docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,11 @@ services:

# ddex

ddex-crawler:
ddex-ingester:
extends:
file: docker-compose.yml
service: ddex-crawler
container_name: ddex-crawler
service: ddex-ingester
container_name: ddex-ingester
logging: *default-logging
environment:
AWS_ENDPOINT: 'http://ddex-s3:4566'
Expand All @@ -458,21 +458,6 @@ services:
ddex-s3:
condition: service_healthy

ddex-parser:
extends:
file: docker-compose.yml
service: ddex-parser
container_name: ddex-parser
logging: *default-logging
environment:
AWS_ENDPOINT: 'http://ddex-s3:4566'
DDEX_CHOREOGRAPHY: ${DDEX_CHOREOGRAPHY}
depends_on:
ddex-mongo-init:
condition: service_completed_successfully
ddex-s3:
condition: service_healthy

ddex-publisher:
extends:
file: docker-compose.yml
Expand Down Expand Up @@ -527,9 +512,7 @@ services:
networks:
- ddex-network
depends_on:
ddex-crawler:
condition: service_healthy
ddex-parser:
ddex-ingester:
condition: service_healthy
# ddex-publisher:
# condition: service_healthy
Expand All @@ -549,11 +532,9 @@ services:
networks:
- ddex-network
depends_on:
ddex-crawler:
condition: service_healthy
ddex-parser:
ddex-ingester:
condition: service_healthy

test-ddex-unittests:
container_name: test-ddex-unittests
extends:
Expand Down
35 changes: 7 additions & 28 deletions dev-tools/compose/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ services:
<<: *common
profiles:
- ddex
- ddex-deps

ddex-mongo-init:
extends:
Expand All @@ -202,14 +203,7 @@ services:
condition: service_healthy
profiles:
- ddex

ddex-ingester:
extends:
file: docker-compose.ddex.yml
service: ddex-ingester
<<: *common
profiles:
- ddex-build-only
- ddex-deps

ddex-web:
extends:
Expand Down Expand Up @@ -240,12 +234,12 @@ services:
condition: service_healthy
profiles:
- ddex
ddex-crawler:

ddex-ingester:
extends:
file: docker-compose.ddex.yml
service: ddex-crawler
container_name: ddex-crawler
service: ddex-ingester
container_name: ddex-ingester
<<: *common
environment:
DDEX_CHOREOGRAPHY: ${DDEX_CHOREOGRAPHY}
Expand All @@ -257,22 +251,6 @@ services:
condition: service_healthy
profiles:
- ddex

ddex-parser:
extends:
file: docker-compose.ddex.yml
service: ddex-parser
container_name: ddex-parser
<<: *common
environment:
DDEX_CHOREOGRAPHY: ${DDEX_CHOREOGRAPHY}
depends_on:
ddex-mongo-init:
condition: service_completed_successfully
ddex-s3:
condition: service_healthy
profiles:
- ddex

ddex-s3:
extends:
Expand All @@ -287,6 +265,7 @@ services:
- "/var/run/docker.sock:/var/run/docker.sock"
profiles:
- ddex
- ddex-deps

# Storage (content node)

Expand Down
3 changes: 1 addition & 2 deletions monitoring/vector/vector.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
"relay",
"ddex-publisher",
"ddex-web",
"ddex-parser",
"ddex-crawler"
"ddex-ingester"
]
exclude_containers = [
# System containers
Expand Down
2 changes: 1 addition & 1 deletion packages/ddex/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ audius-cli launch ddex
```

## Local dev
DDEX requires these services: `ddex-webapp`, `ddex-crawler`, `ddex-parser`, `ddex-publisher`, `ddex-mongo`.
DDEX requires these services: `ddex-webapp`, `ddex-ingester`, `ddex-publisher`, `ddex-mongo`.

### Env configuration
All services read from `packages/ddex/.env`.
Expand Down
46 changes: 0 additions & 46 deletions packages/ddex/ingester/.air.toml

This file was deleted.

10 changes: 3 additions & 7 deletions packages/ddex/ingester/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,9 @@
Crawls and parses new DDEX uploads.

### Local Dev
`crawler` and `parser` are independent ingester services. Each handles a stage in the DDEX ingestion pipeline.

The easiest way to test DDEX locally is via `audius-compose up --ddex-[release-by-release|batched]`. If you want to enable hot reloading for an ingester service:

1. Make sure the DDEX stack is running. See `packages/ddex/README.md` for instructions on how to bring up the DDEX stack locally.
2. `docker stop ddex-crawler` or `docker stop ddex-parser` (assuming it's running as part of the whole DDEX stack)
3. `IS_DEV=true AWS_ENDPOINT=http://ingress:4566 DDEX_CHOREOGRAPHY=ERNReleaseByRelease air -c .air.toml -- --service [crawler|parser]`
1. Make sure the DDEX dependencies are running: `audius-compose up --ddex-deps`
2. (Optional) See the webapp README to start that server and go through the OAuth flow with a staging user
3. Parse a file: `IS_DEV=true AWS_ENDPOINT=http://ingress:4566 DDEX_CHOREOGRAPHY=ERNReleaseByRelease IS_DEV=true AWS_ENDPOINT=http://ingress:4566 DDEX_CHOREOGRAPHY=ERNBatched go run cmd/main.go ./e2e_test/fixtures/batch/fuga/20240305090456555 --wipe`


### Getting files
Expand Down
Loading

0 comments on commit a9e0313

Please sign in to comment.