Sync search Elasticsearch #5299
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Sync search Elasticsearch | |
# **What it does**: It scrapes the whole site and dumps the records in a | |
# temp directory. Then it indexes that into Elasticsearch. | |
# **Why we have it**: We want our search indexes kept up to date. | |
# **Who does it impact**: Anyone using search on docs. | |
on: | |
workflow_dispatch: | |
inputs: | |
version: | |
description: "Version to exclusively generate the search index for. E.g. 'dotcom', 'ghes-3.7', 'ghae'" | |
required: false | |
default: '' | |
schedule: | |
- cron: '23 */4 * * *' # Run every 4 hours at 23 minutes past the hour | |
permissions: | |
contents: read | |
# This allows a subsequently queued workflow run to cancel previous runs | |
concurrency: | |
group: '${{ github.workflow }} @ ${{ github.head_ref }}' | |
cancel-in-progress: true | |
env: | |
FREEZE: ${{ secrets.FREEZE }} | |
ELASTICSEARCH_URL: ${{ secrets.ELASTICSEARCH_URL }} | |
jobs: | |
updateElasticsearchIndexes: | |
name: Update indexes | |
if: ${{ github.repository == 'github/docs-internal' }} | |
runs-on: ubuntu-20.04-xl | |
strategy: | |
fail-fast: false | |
matrix: | |
# This needs to match the languages we support | |
language: [en, ja, es, pt, cn] | |
steps: | |
- if: ${{ env.FREEZE == 'true' }} | |
run: | | |
echo 'The repo is currently frozen! Exiting this workflow.' | |
exit 1 # prevents further steps from running | |
- name: Check out repo | |
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748 | |
- name: Setup Node | |
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048 | |
with: | |
node-version: '16.17.0' | |
cache: npm | |
- name: Install dependencies | |
run: npm ci | |
- name: Cache nextjs build | |
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09 | |
with: | |
path: .next/cache | |
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }} | |
- name: Run build scripts | |
run: npm run build | |
- name: Start the server in the background | |
env: | |
ENABLE_DEV_LOGGING: false | |
run: | | |
npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log & | |
# first sleep to give it a chance to start | |
sleep 6 | |
curl --retry-connrefused --retry 4 -I http://localhost:4002/ | |
- if: ${{ failure() }} | |
name: Debug server outputs on errors | |
run: | | |
echo "____STDOUT____" | |
cat /tmp/stdout.log | |
echo "____STDERR____" | |
cat /tmp/stderr.log | |
- name: Scrape records into a temp directory | |
env: | |
# If a reusable, or anything in the `data/*` directory is deleted | |
# you might get a | |
# | |
# RenderError: Can't find the key 'site.data.reusables...' in the scope | |
# | |
# But that'll get fixed in the next translation pipeline. For now, | |
# let's just accept an empty string instead. | |
THROW_ON_EMPTY: false | |
# Note that by default, this is '' (empty string) and that means | |
# the same as not set within the script. | |
VERSION: ${{ github.event.inputs.version }} | |
run: | | |
mkdir /tmp/records | |
npm run sync-search-indices -- \ | |
--language ${{ matrix.language }} \ | |
--out-directory /tmp/records \ | |
--no-compression --no-lunr-index | |
ls -lh /tmp/records | |
- name: Check that Elasticsearch is accessible | |
run: | | |
curl --fail --retry-connrefused --retry 5 -I ${{ env.ELASTICSEARCH_URL }} | |
- name: Index into Elasticsearch | |
env: | |
# Must match what we used when scraping (npm run sync-search-indices) | |
# otherwise the script will seek other versions from disk that might | |
# not exist. | |
VERSION: ${{ github.event.inputs.version }} | |
run: | | |
./script/search/index-elasticsearch.js \ | |
--language ${{ matrix.language }} -- /tmp/records | |
- name: Check created indexes and aliases | |
run: | | |
# Not using `--fail` here because I've observed that it can fail | |
# with a rather cryptic 404 error when it should, if anything, be | |
# a 200 OK with a list of no indices. | |
curl --retry-connrefused --retry 5 ${{ env.ELASTICSEARCH_URL }}/_cat/indices?v | |
curl --retry-connrefused --retry 5 ${{ env.ELASTICSEARCH_URL }}/_cat/indices?v | |
- name: Send Slack notification if workflow fails | |
uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340 | |
if: failure() | |
with: | |
channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} | |
bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} | |
color: failure | |
text: The last 'Sync search Elasticsearch' run failed. See https://github.com/${{github.repository}}/actions?query=workflow%3A%22Repo+Sync%22 |