Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LogStore to retain build logs, and a S3LogStore implementation #967

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# update these accordingly

[dev-packages]
boto3 = "*"
pytest=">=4.6"
wheel="*"
pytest-cov="*"
Expand Down
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Note that there is also a Pipfile for this project if you are updating this
# file do not forget to update the Pipfile accordingly
boto3
pyyaml
pytest>=4.6
wheel
Expand Down
22 changes: 20 additions & 2 deletions repo2docker/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import escapism
from pythonjsonlogger import jsonlogger

from traitlets import Any, Dict, Int, List, Unicode, Bool, default
from traitlets import Any, Dict, Instance, Int, List, Type, Unicode, Bool, default
from traitlets.config import Application

from . import __version__
Expand All @@ -38,6 +38,7 @@
RBuildPack,
)
from . import contentproviders
from .logstore import LogStore
from .utils import ByteSpecification, chdir


Expand Down Expand Up @@ -408,6 +409,14 @@ def get_engine(self):
engine_class = entry.load()
return engine_class(parent=self)

logstore = Type(LogStore, help="Log store for build logs", config=True)

_logstore = Instance(LogStore)

@default("_logstore")
def _default_logstore(self):
return self.logstore(parent=self)

def fetch(self, url, ref, checkout_path):
"""Fetch the contents of `url` and place it in `checkout_path`.

Expand Down Expand Up @@ -765,6 +774,7 @@ def build(self):
bp.__class__.__name__,
extra=dict(phase="building"),
)
self._logstore.write("Using %s builder\n" % bp.__class__.__name__)

for l in picked_buildpack.build(
docker_client,
Expand All @@ -779,8 +789,10 @@ def build(self):
# else this is Docker output
elif "stream" in l:
self.log.info(l["stream"], extra=dict(phase="building"))
self._logstore.write(l["stream"])
elif "error" in l:
self.log.info(l["error"], extra=dict(phase="failure"))
self._logstore.write(l["error"])
raise BuildError(l["error"])
elif "status" in l:
self.log.info(
Expand All @@ -795,7 +807,13 @@ def build(self):
shutil.rmtree(checkout_path, ignore_errors=True)

def start(self):
self.build()
try:
self.build()
finally:
try:
r = self._logstore.close()
except Exception as e:
self.log.error("Failed to save log: {}".format(e))

if self.push:
self.push_image()
Expand Down
138 changes: 138 additions & 0 deletions repo2docker/logstore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import logging
import os
from tempfile import NamedTemporaryFile
import re
from traitlets import Any, Dict, Unicode
from traitlets.config import LoggingConfigurable

try:
import boto3

S3_ENABLED = True
except ImportError:
S3_ENABLED = False


"""Match all ANSI escape codes https://superuser.com/a/380778"""
ansi_escape_regex = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]")


class LogStore(LoggingConfigurable):
"""Abstract interface for a class that stores a build log.
This default implementation does nothing."""

def write(self, s):
"""Write to the log"""
pass

def close(self):
"""Finish logging. Implementations may save or copy the log."""
pass


class S3LogStore(LogStore):
"""Store a build log and upload to a S3 bucket on close

If metadata is provided keys must be valid HTML headers, and values must be strings
https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#object-metadata

Example bucket policy to allow public read of objects, but prevent listing

{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"s3:GetObject"
],
"Effect": "Allow",
"Principal": {
"AWS": [
"*"
]
},
"Resource": [
"arn:aws:s3:::mybinder/*"
],
"Sid": ""
}
]
}

Source: https://gist.github.com/harshavardhana/400558963e4dfe3709623203222ed30c#granting-read-only-permission-to-an-anonymous-user

"""

# Connection details
endpoint = Unicode(help="S3 endpoint", config=True)
access_key = Unicode(help="S3 access key ", config=True)
secret_key = Unicode(help="S3 secret key", config=True)
session_token = Unicode("", help="S3 session token (optional)", config=True)
region = Unicode("", help="S3 region (optional)", config=True)

# Where to store the log
bucket = Unicode(help="S3 bucket", config=True)
logname = Unicode(
"repo2docker.log", help="The name and/or path of the log", config=True
)

metadata = Dict(
{},
help="Metadata to be associated with the log file",
config=True,
)

_logfile = Any(allow_none=True)

def __init__(self, **kwargs):
if not S3_ENABLED:
raise RuntimeError("S3LogStore requires the boto3 library")
super().__init__(**kwargs)
self.log = logging.getLogger("repo2docker")

def _s3_credentials(self):
creds = dict(
endpoint_url=self.endpoint,
aws_access_key_id=self.access_key,
aws_secret_access_key=self.secret_key,
region_name=self.region,
)
if self.session_token:
creds["aws_session_token"] = self.session_token
return creds

def write(self, s):
"""Write a log, newlines are not automatically added,
removes ANSI terminal escape codes"""
if s and not self._logfile:
self._logfile = NamedTemporaryFile("w", delete=False)
cleaned = ansi_escape_regex.sub("", str(s))
self._logfile.write(cleaned)

def close(self):
"""Upload the logfile to S3"""
if not self._logfile:
# No log means image already exists so nothing was built
self.log.debug("No log file")
return
self._logfile.close()
self.log.info(
f"Uploading log to {self.endpoint} bucket:{self.bucket} key:{self.logname}"
)
try:
s3 = boto3.resource(
"s3",
config=boto3.session.Config(signature_version="s3v4"),
**self._s3_credentials(),
)
s3.Bucket(self.bucket).upload_file(
self._logfile.name,
self.logname,
ExtraArgs={
"ContentType": "text/plain; charset=utf-8",
"Metadata": self.metadata,
},
)
os.remove(self._logfile.name)
finally:
self._logfile = None
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ def get_identifier(json):
"toml",
"traitlets",
],
extras_require={
"s3log": ["boto3"],
},
python_requires=">=3.6",
author="Project Jupyter Contributors",
author_email="jupyter@googlegroups.com",
Expand Down
65 changes: 65 additions & 0 deletions tests/unit/test_logstore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
Test S3LogStore
"""
# botocore includes a stub
# https://botocore.amazonaws.com/v1/documentation/api/latest/reference/stubber.html
# but it doesn't work with upload_file so use mock instead
# https://sgillies.net/2017/10/19/mock-is-magic.html
import os
from tempfile import NamedTemporaryFile
from unittest.mock import patch
from repo2docker import logstore


@patch("repo2docker.logstore.boto3")
def test_s3logstore_upload(boto3):
tmp_logfile = NamedTemporaryFile("w", delete=False)
store = logstore.S3LogStore(
endpoint="http://localhost:9000",
access_key="access",
secret_key="secret",
bucket="bucket",
logname="test/build.log",
metadata={"test-key": "test value"},
# Override for testing so we know the name of the tempfile
_logfile=tmp_logfile,
)

store.write("hello\n")
store.close()

boto3.resource.assert_called_with(
"s3",
config=boto3.session.Config(signature_version="s3v4"),
endpoint_url="http://localhost:9000",
aws_access_key_id="access",
aws_secret_access_key="secret",
region_name="",
)
boto3.resource().Bucket.assert_called_with("bucket")
boto3.resource().Bucket().upload_file.assert_called_with(
tmp_logfile.name,
"test/build.log",
ExtraArgs={
"ContentType": "text/plain; charset=utf-8",
"Metadata": {"test-key": "test value"},
},
)
assert not os.path.exists(tmp_logfile.name)


@patch("repo2docker.logstore.boto3")
def test_s3logstore_empty(boto3):
store = logstore.S3LogStore(
endpoint="http://localhost:9000",
access_key="access",
secret_key="secret",
bucket="bucket",
logname="test/build.log",
)

r = store.close()

assert not boto3.resource.called
assert not boto3.resource().Bucket.called
assert not boto3.resource().Bucket().upload_file.called