Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to use make #4

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ FROM python:3.8-slim-bookworm
WORKDIR /
RUN apt-get update
RUN apt-get upgrade -y
RUN apt-get install curl git bats -y
RUN apt-get install curl git bats make sqlite3 -y

RUN pip install awscli
COPY requirements/requirements.txt /requirements.txt
Expand Down
41 changes: 41 additions & 0 deletions task/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
all::

include makerules/makerules.mk
include makerules/datapackage.mk

organisation-package:: $(PACKAGE_DIR)organisation.csv $(PACKAGE_DIR)organisation-check.csv

ifneq ($(READ_S3_BUCKET),)
save-organisation-package:: organisation-package
@echo Saving package to S3 bucket $(WRITE_S3_BUCKET)
aws s3 sync $(PACKAGE_DIR) s3://$(WRITE_S3_BUCKET)/organisation-package/$(PACKAGE_DIR)--no-progress
else
save-organisation-package:: organisation-package
@echo WRITE_S3_BUCKET not set. Package will not be saved.
endif

$(CACHE_DIR)local-planning-authority.csv:
@mkdir -p $(CACHE_DIR)
curl -qfs "https://files.planning.data.gov.uk/dataset/local-planning-authority.csv" > $(CACHE_DIR)local-planning-authority.csv

ifneq ($(READ_S3_BUCKET),)
$(PACKAGE_DIR)organisation.csv:
@echo Building organisation data package - using collection files from S3 bucket $(READ_S3_BUCKET)
@mkdir -p $(PACKAGE_DIR)
@mkdir -p $(CACHE_DIR)/organisation-collection/dataset
@aws s3 sync s3://$(READ_S3_BUCKET)/organisation-collection/dataset $(CACHE_DIR)/organisation-collection/dataset/ --no-progress
digital-land organisation-create
--dataset-dir $(CACHE_DIR)/organisation-collection/dataset \
--output-path $(PACKAGE_DIR)/organisation.csv
else
$(PACKAGE_DIR)organisation.csv:
@echo Building organisation data package - using collection files from CDN
@mkdir -p $(PACKAGE_DIR)
digital-land organisation-create \
--cache-dir $(CACHE_DIR)/organisation-collection/dataset \
--download-url 'https://files.planning.data.gov.uk/organisation-collection/dataset' \
--output-path $(PACKAGE_DIR)/organisation.csv
endif

$(PACKAGE_DIR)organisation-check.csv: $(PACKAGE_DIR)organisation.csv $(CACHE_DIR)local-planning-authority.csv
digital-land organisation-check --input-path $(PACKAGE_DIR)organisation.csv --output-path $@
11 changes: 11 additions & 0 deletions task/makerules/datapackage.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
ifeq ($(VAR_DIR),)
VAR_DIR=var/
endif

ifeq ($(CACHE_DIR),)
CACHE_DIR=$(VAR_DIR)cache/
endif

ifeq ($(PACKAGE_DIR),)
PACKAGE_DIR=package/
endif
168 changes: 168 additions & 0 deletions task/makerules/makerules.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# deduce the repository
ifeq ($(REPOSITORY),)
REPOSITORY=$(shell basename -s .git `git config --get remote.origin.url`)
endif

ifeq ($(ENVIRONMENT),)
ENVIRONMENT=production
endif

ifeq ($(SOURCE_URL),)
SOURCE_URL=https://raw.githubusercontent.com/digital-land/
endif

ifeq ($(MAKERULES_URL),)
MAKERULES_URL=$(SOURCE_URL)makerules/main/
endif

ifeq ($(CONFIG_URL),)
CONFIG_URL=https://raw.githubusercontent.com/digital-land/config/main/
endif

ifeq ($(COLLECTION_NAME),)
COLLECTION_NAME=$(shell echo "$(REPOSITORY)"|sed 's/-collection$$//')
endif

ifeq ($(COLLECTION_DATASET_BUCKET_NAME),)
COLLECTION_DATASET_BUCKET_NAME=digital-land-$(ENVIRONMENT)-collection-dataset
endif

ifeq ($(HOISTED_COLLECTION_DATASET_BUCKET_NAME),)
HOISTED_COLLECTION_DATASET_BUCKET_NAME=digital-land-$(ENVIRONMENT)-collection-dataset-hoisted
endif

define dataset_url
'https://$(COLLECTION_DATASET_BUCKET_NAME).s3.eu-west-2.amazonaws.com/$(2)-collection/dataset/$(1).sqlite3'
endef

ifeq ($(VAR_DIR),)
VAR_DIR=var/
endif

ifeq ($(CACHE_DIR),)
CACHE_DIR=$(VAR_DIR)cache/
endif


.PHONY: \
makerules\
specification\
config\
init\
first-pass\
second-pass\
third-pass\
clobber\
clean\
commit-makerules\
prune

# keep intermediate files
.SECONDARY:

# don't keep targets build with an error
.DELETE_ON_ERROR:

# work in UTF-8
LANGUAGE := en_GB.UTF-8
LANG := C.UTF-8

# for consistent collation on different machines
LC_COLLATE := C.UTF-8

# current git branch
BRANCH := $(shell git rev-parse --abbrev-ref HEAD)

UNAME := $(shell uname)

# detect location of spatialite library, for linux add to path so python can pick up thhe files
ifndef SPATIALITE_EXTENSION
ifeq ($(UNAME), Linux)
SPATIALITE_EXTENSION="/usr/lib/x86_64-linux-gnu/mod_spatialite.so"
endif
ifeq ($(UNAME), Darwin)
SPATIALITE_EXTENSION="/usr/local/lib/mod_spatialite.dylib"
endif
endif

all:: first-pass second-pass third-pass

first-pass::
@:

# restart the make process to pick-up collected files
second-pass::
@:

third-pass::
@:

# initialise
init::
pip install --upgrade pip
ifneq (,$(wildcard requirements.txt))
pip3 install --upgrade -r requirements.txt
endif
ifneq (,$(wildcard setup.py))
pip install -e .$(PIP_INSTALL_PACKAGE)
endif
sqlite3 --version

submodules::
git submodule update --init --recursive --remote

# remove targets, force relink
clobber::
@:

# remove intermediate files
clean::
@:

# prune back to source code
prune::
rm -rf ./$(VAR_DIR) $(VALIDATION_DIR)

# update makerules from source
makerules::
curl -qfsL '$(MAKERULES_URL)makerules.mk' > makerules/makerules.mk

ifeq (,$(wildcard ./makerules/specification.mk))
# update local copies of specification files
specification::
@mkdir -p specification/
curl -qfsL '$(SOURCE_URL)/specification/main/specification/attribution.csv' > specification/attribution.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/licence.csv' > specification/licence.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/typology.csv' > specification/typology.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/theme.csv' > specification/theme.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/collection.csv' > specification/collection.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/dataset.csv' > specification/dataset.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/dataset-field.csv' > specification/dataset-field.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/field.csv' > specification/field.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/datatype.csv' > specification/datatype.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/prefix.csv' > specification/prefix.csv
# deprecated ..
curl -qfsL '$(SOURCE_URL)/specification/main/specification/pipeline.csv' > specification/pipeline.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/dataset-schema.csv' > specification/dataset-schema.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/schema.csv' > specification/schema.csv
curl -qfsL '$(SOURCE_URL)/specification/main/specification/schema-field.csv' > specification/schema-field.csv


init:: specification
endif

# local copy of organsiation datapackage
$(CACHE_DIR)organisation.csv:
@mkdir -p $(CACHE_DIR)
curl -qfs "https://files.planning.data.gov.uk/organisation-collection/dataset/organisation.csv" > $(CACHE_DIR)organisation.csv

init:: config

config::;

commit-makerules::
git add makerules
git diff --quiet && git diff --staged --quiet || (git commit -m "Updated makerules $(shell date +%F)"; git push origin $(BRANCH))

commit-collection::
@:
81 changes: 6 additions & 75 deletions task/run.sh
Original file line number Diff line number Diff line change
@@ -1,83 +1,14 @@
set -e

export SOURCE_URL='https://raw.githubusercontent.com/digital-land/'
export DATASET_DIR=dataset
export CACHE_DIR=var/cache

if [ -z "$DATA_PACKAGE_NAME" ]; then
echo DATA_PACKAGE_NAME not set
exit 1
fi

if [ -z "$READ_S3_BUCKET" ]; then
echo READ_S3_BUCKET not set so files will be downloaded from the production files cdn
fi

if [ -z "$WRITE_S3_BUCKET" ]; then
echo WRITE_S3_BUCKET not set so files will not be uploaded to an S3 Bucket
fi

# TODO should be embedded into package creation code
if [ "$DATA_PACKAGE_NAME" != 'organisation' ]; then
echo Unspoorted package.
exit 1
fi

# update digital-land-python
pip install -r ./requirements.txt

TODAY=$(date +%Y-%m-%d)
echo "Running package builder for $DATA_PACKAGE_NAME on $TODAY"
# Setup
make makerules
make init

echo Downloading specification
mkdir -p specification/
curl -qfsL $SOURCE_URL/specification/main/specification/attribution.csv > specification/attribution.csv
curl -qfsL $SOURCE_URL/specification/main/specification/licence.csv > specification/licence.csv
curl -qfsL $SOURCE_URL/specification/main/specification/typology.csv > specification/typology.csv
curl -qfsL $SOURCE_URL/specification/main/specification/theme.csv > specification/theme.csv
curl -qfsL $SOURCE_URL/specification/main/specification/collection.csv > specification/collection.csv
curl -qfsL $SOURCE_URL/specification/main/specification/dataset.csv > specification/dataset.csv
curl -qfsL $SOURCE_URL/specification/main/specification/dataset-field.csv > specification/dataset-field.csv
curl -qfsL $SOURCE_URL/specification/main/specification/field.csv > specification/field.csv
curl -qfsL $SOURCE_URL/specification/main/specification/datatype.csv > specification/datatype.csv
curl -qfsL $SOURCE_URL/specification/main/specification/prefix.csv > specification/prefix.csv
# deprecated ..
curl -qfsL $SOURCE_URL/specification/main/specification/pipeline.csv > specification/pipeline.csv
curl -qfsL $SOURCE_URL/specification/main/specification/dataset-schema.csv > specification/dataset-schema.csv
curl -qfsL $SOURCE_URL/specification/main/specification/schema.csv > specification/schema.csv
curl -qfsL $SOURCE_URL/specification/main/specification/schema-field.csv > specification/schema-field.csv
curl -qfsL $SOURCE_URL/specification/main/specification/datapackage.csv > specification/datapackage.csv
curl -qfsL $SOURCE_URL/specification/main/specification/datapackage-dataset.csv > specification/datapackage-dataset.csv

echo Building data package
mkdir -p $CACHE_DIR

export COLLECTION_NAME=$DATA_PACKAGE_NAME-collection
export COLLECTION_DATASET_DIR=$CACHE_DIR/$COLLECTION_NAME/dataset/

if [ -n "$READ_S3_BUCKET" ]; then
echo Building organisation data package - using collection files from S3 bucket $READ_S3_BUCKET
mkdir -p $COLLECTION_DATASET_DIR
aws s3 sync s3://$READ_S3_BUCKET/$COLLECTION_NAME/$DATASET_DIR $COLLECTION_DATASET_DIR --no-progress
digital-land organisation-create \
--dataset-dir $COLLECTION_DATASET_DIR \
--output-path $DATASET_DIR/organisation.csv
else
echo Building organisation data package - using collection files from CDN
digital-land organisation-create \
--cache-dir $COLLECTION_DATASET_DIR \
--download-url 'https://files.planning.data.gov.uk/organisation-collection/dataset' \
--output-path $DATASET_DIR/organisation.csv
fi

echo Checking data package
curl -qfs https://files.planning.data.gov.uk/dataset/local-planning-authority.csv > $CACHE_DIR/local-planning-authority.csv
digital-land organisation-check --output-path $DATASET_DIR/organisation-check.csv

ls -l $DATASET_DIR || true

# TODO where to permenantly store data packages, also this uploads all the filels in datasets
if [ -n "$WRITE_S3_BUCKET" ]; then
echo Pushing package to S3 bucket $WRITE_S3_BUCKET
aws s3 sync $DATASET_DIR s3://$WRITE_S3_BUCKET/$DATA_PACKAGE_NAME/$DATASET_DIR --no-progress
fi
# Run
make $DATA_PACKAGE_NAME-package
make save-$DATA_PACKAGE_NAME-package
Loading