Skip to content

Crawl Repos

Crawl Repos #297

Workflow file for this run

name: Crawl Repos
on:
schedule:
- cron: '0 7 * * *'
workflow_dispatch:
env:
DB_NAME: metadata.sqlite3
METADATA_DIR: metadata
jobs:
crawl:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 1.22
- name: Install sqldiff
run: sudo apt-get install -y sqlite3-tools
- name: Crawl repos
id: crawl
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
./scripts/crawl.sh ${{ env.DB_NAME }}.new ${{ env.METADATA_DIR }}.new
- name: Diff against previous crawl
id: diff
run: |
changes=$(sqldiff --primarykey ${{ env.DB_NAME }} ${{ env.DB_NAME }}.new)
if [ -z "$changes" ]; then
echo "No changes detected."
echo "changed=false" >> $GITHUB_OUTPUT
else
echo "Changes detected!"
echo "changed=true" >> $GITHUB_OUTPUT
echo "$changes"
{ echo 'changes<<END'; echo "$changes"; echo 'END'; } >> $GITHUB_OUTPUT
fi
- name: Update data
id: update
if: steps.diff.outputs.changed == 'true'
run: |
mv ${{ env.DB_NAME }}.new ${{ env.DB_NAME }}
rm -rf ${{ env.METADATA_DIR }}
mv ${{ env.METADATA_DIR }}.new ${{ env.METADATA_DIR }}
- name: Generate products
id: generate
if: steps.update.outcome == 'success'
run: |
./scripts/generate-products.sh
- name: Create PR
if: steps.generate.outcome == 'success'
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.GITHUB_TOKEN }}
add-paths: |
${{ env.DB_NAME }}
${{ env.METADATA_DIR }}
products
api/sdkmeta/data
api-js/src/data
branch: "metadata-update"
title: "fix(metadata): ingest new data and regenerate products"
commit-message: "fix(metadata): ingest new data and regenerate products"
body: |
Metadata has changed. Ensure that the changes obey patch semantics (data added or modified in a non-breaking
way), as upstream services may consume these changes automatically.
```
${{ steps.diff.outputs.changes }}
```