diff --git a/Pipfile b/Pipfile index b4d56fdf2..169957fbc 100644 --- a/Pipfile +++ b/Pipfile @@ -3,6 +3,7 @@ # update these accordingly [dev-packages] +boto3 = "*" pytest=">=4.6" wheel="*" pytest-cov="*" diff --git a/dev-requirements.txt b/dev-requirements.txt index 4b75d1bbc..828cd7064 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,6 @@ # Note that there is also a Pipfile for this project if you are updating this # file do not forget to update the Pipfile accordingly +boto3 pyyaml pytest>=4.6 wheel diff --git a/repo2docker/app.py b/repo2docker/app.py index 470e9c0a9..b943adf85 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -22,7 +22,7 @@ import escapism from pythonjsonlogger import jsonlogger -from traitlets import Any, Dict, Int, List, Unicode, Bool, default +from traitlets import Any, Dict, Instance, Int, List, Type, Unicode, Bool, default from traitlets.config import Application from . import __version__ @@ -38,6 +38,7 @@ RBuildPack, ) from . import contentproviders +from .logstore import LogStore from .utils import ByteSpecification, chdir @@ -408,6 +409,14 @@ def get_engine(self): engine_class = entry.load() return engine_class(parent=self) + logstore = Type(LogStore, help="Log store for build logs", config=True) + + _logstore = Instance(LogStore) + + @default("_logstore") + def _default_logstore(self): + return self.logstore(parent=self) + def fetch(self, url, ref, checkout_path): """Fetch the contents of `url` and place it in `checkout_path`. @@ -765,6 +774,7 @@ def build(self): bp.__class__.__name__, extra=dict(phase="building"), ) + self._logstore.write("Using %s builder\n" % bp.__class__.__name__) for l in picked_buildpack.build( docker_client, @@ -779,8 +789,10 @@ def build(self): # else this is Docker output elif "stream" in l: self.log.info(l["stream"], extra=dict(phase="building")) + self._logstore.write(l["stream"]) elif "error" in l: self.log.info(l["error"], extra=dict(phase="failure")) + self._logstore.write(l["error"]) raise BuildError(l["error"]) elif "status" in l: self.log.info( @@ -795,7 +807,13 @@ def build(self): shutil.rmtree(checkout_path, ignore_errors=True) def start(self): - self.build() + try: + self.build() + finally: + try: + r = self._logstore.close() + except Exception as e: + self.log.error("Failed to save log: {}".format(e)) if self.push: self.push_image() diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py new file mode 100644 index 000000000..6ee522c68 --- /dev/null +++ b/repo2docker/logstore.py @@ -0,0 +1,138 @@ +import logging +import os +from tempfile import NamedTemporaryFile +import re +from traitlets import Any, Dict, Unicode +from traitlets.config import LoggingConfigurable + +try: + import boto3 + + S3_ENABLED = True +except ImportError: + S3_ENABLED = False + + +"""Match all ANSI escape codes https://superuser.com/a/380778""" +ansi_escape_regex = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]") + + +class LogStore(LoggingConfigurable): + """Abstract interface for a class that stores a build log. + This default implementation does nothing.""" + + def write(self, s): + """Write to the log""" + pass + + def close(self): + """Finish logging. Implementations may save or copy the log.""" + pass + + +class S3LogStore(LogStore): + """Store a build log and upload to a S3 bucket on close + + If metadata is provided keys must be valid HTML headers, and values must be strings + https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#object-metadata + + Example bucket policy to allow public read of objects, but prevent listing + + { + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "s3:GetObject" + ], + "Effect": "Allow", + "Principal": { + "AWS": [ + "*" + ] + }, + "Resource": [ + "arn:aws:s3:::mybinder/*" + ], + "Sid": "" + } + ] + } + + Source: https://gist.github.com/harshavardhana/400558963e4dfe3709623203222ed30c#granting-read-only-permission-to-an-anonymous-user + + """ + + # Connection details + endpoint = Unicode(help="S3 endpoint", config=True) + access_key = Unicode(help="S3 access key ", config=True) + secret_key = Unicode(help="S3 secret key", config=True) + session_token = Unicode("", help="S3 session token (optional)", config=True) + region = Unicode("", help="S3 region (optional)", config=True) + + # Where to store the log + bucket = Unicode(help="S3 bucket", config=True) + logname = Unicode( + "repo2docker.log", help="The name and/or path of the log", config=True + ) + + metadata = Dict( + {}, + help="Metadata to be associated with the log file", + config=True, + ) + + _logfile = Any(allow_none=True) + + def __init__(self, **kwargs): + if not S3_ENABLED: + raise RuntimeError("S3LogStore requires the boto3 library") + super().__init__(**kwargs) + self.log = logging.getLogger("repo2docker") + + def _s3_credentials(self): + creds = dict( + endpoint_url=self.endpoint, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=self.region, + ) + if self.session_token: + creds["aws_session_token"] = self.session_token + return creds + + def write(self, s): + """Write a log, newlines are not automatically added, + removes ANSI terminal escape codes""" + if s and not self._logfile: + self._logfile = NamedTemporaryFile("w", delete=False) + cleaned = ansi_escape_regex.sub("", str(s)) + self._logfile.write(cleaned) + + def close(self): + """Upload the logfile to S3""" + if not self._logfile: + # No log means image already exists so nothing was built + self.log.debug("No log file") + return + self._logfile.close() + self.log.info( + f"Uploading log to {self.endpoint} bucket:{self.bucket} key:{self.logname}" + ) + try: + s3 = boto3.resource( + "s3", + config=boto3.session.Config(signature_version="s3v4"), + **self._s3_credentials(), + ) + s3.Bucket(self.bucket).upload_file( + self._logfile.name, + self.logname, + ExtraArgs={ + "ContentType": "text/plain; charset=utf-8", + "Metadata": self.metadata, + }, + ) + os.remove(self._logfile.name) + finally: + self._logfile = None diff --git a/setup.py b/setup.py index 27c19c93c..2d620668f 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,9 @@ def get_identifier(json): "toml", "traitlets", ], + extras_require={ + "s3log": ["boto3"], + }, python_requires=">=3.6", author="Project Jupyter Contributors", author_email="jupyter@googlegroups.com", diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py new file mode 100644 index 000000000..8c832c037 --- /dev/null +++ b/tests/unit/test_logstore.py @@ -0,0 +1,65 @@ +""" +Test S3LogStore +""" +# botocore includes a stub +# https://botocore.amazonaws.com/v1/documentation/api/latest/reference/stubber.html +# but it doesn't work with upload_file so use mock instead +# https://sgillies.net/2017/10/19/mock-is-magic.html +import os +from tempfile import NamedTemporaryFile +from unittest.mock import patch +from repo2docker import logstore + + +@patch("repo2docker.logstore.boto3") +def test_s3logstore_upload(boto3): + tmp_logfile = NamedTemporaryFile("w", delete=False) + store = logstore.S3LogStore( + endpoint="http://localhost:9000", + access_key="access", + secret_key="secret", + bucket="bucket", + logname="test/build.log", + metadata={"test-key": "test value"}, + # Override for testing so we know the name of the tempfile + _logfile=tmp_logfile, + ) + + store.write("hello\n") + store.close() + + boto3.resource.assert_called_with( + "s3", + config=boto3.session.Config(signature_version="s3v4"), + endpoint_url="http://localhost:9000", + aws_access_key_id="access", + aws_secret_access_key="secret", + region_name="", + ) + boto3.resource().Bucket.assert_called_with("bucket") + boto3.resource().Bucket().upload_file.assert_called_with( + tmp_logfile.name, + "test/build.log", + ExtraArgs={ + "ContentType": "text/plain; charset=utf-8", + "Metadata": {"test-key": "test value"}, + }, + ) + assert not os.path.exists(tmp_logfile.name) + + +@patch("repo2docker.logstore.boto3") +def test_s3logstore_empty(boto3): + store = logstore.S3LogStore( + endpoint="http://localhost:9000", + access_key="access", + secret_key="secret", + bucket="bucket", + logname="test/build.log", + ) + + r = store.close() + + assert not boto3.resource.called + assert not boto3.resource().Bucket.called + assert not boto3.resource().Bucket().upload_file.called