From 344bb57abc08db9a2dbe01d4fb3a53ca2ef7056a Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:00:23 -0700 Subject: [PATCH 01/19] Add initial eventlog hook --- MANIFEST.in | 3 ++ jupyter_server/base/handlers.py | 4 ++ .../contentsmanager-actions.json | 30 +++++++++++++++ jupyter_server/serverapp.py | 20 +++++++++- jupyter_server/services/contents/handlers.py | 37 ++++++++++++++++++- 5 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 jupyter_server/event-schemas/contentsmanager-actions.json diff --git a/MANIFEST.in b/MANIFEST.in index 9d4060fc69..b81a6d5536 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,9 @@ include setupbase.py include Dockerfile graft tools +# Event Schemas +graft jupyter_server/event-schemas + # Documentation graft docs exclude docs/\#* diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 75467718c8..5185365c4d 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -204,6 +204,10 @@ def jinja_template_vars(self): """User-supplied values to supply to jinja templates.""" return self.settings.get('jinja_template_vars', {}) + @property + def eventlog(self): + return self.settings.get('eventlog') + #--------------------------------------------------------------- # URLs #--------------------------------------------------------------- diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json new file mode 100644 index 0000000000..242111722e --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -0,0 +1,30 @@ +{ + "$id": "jupyter.org/contentsmanager-actions", + "version": 1, + "title": "Contents Manager activities", + "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "type": "object", + "required": ["action", "path"], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "create", + "copy" + ], + "description": "Action performed by contents manager" + }, + "path": { + "type": "string", + "description": "Logical path the action was performed in" + }, + "source_path": { + "type": "string", + "description": "If action is 'copy', this specifies the source path" + } + } +} \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 21a4e68ca9..e10bc80967 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,7 @@ import warnings import webbrowser import urllib +from glob import glob from types import ModuleType from base64 import encodebytes @@ -99,6 +100,8 @@ ) from ipython_genutils import py3compat from jupyter_core.paths import jupyter_runtime_dir, jupyter_path +from jupyter_telemetry.eventlog import EventLog + from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp @@ -279,7 +282,8 @@ def init_settings(self, jupyter_app, kernel_manager, contents_manager, server_root_dir=root_dir, jinja2_env=env, terminals_available=False, # Set later if terminals are available - serverapp=self + serverapp=self, + eventlog=jupyter_app.eventlog ) # allow custom overrides for the tornado web app. @@ -1758,6 +1762,18 @@ def _init_asyncio_patch(): # WindowsProactorEventLoopPolicy is not compatible with tornado 6 # fallback to the pre-3.8 default of Selector asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) + def init_eventlog(self): + self.eventlog = EventLog(parent=self) + + schemas_glob = os.path.join( + os.path.dirname(__file__), + 'event-schemas', + '*.json' + ) + + for schema_file in glob(schemas_glob): + with open(schema_file) as f: + self.eventlog.register_schema(json.load(f)) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): @@ -1788,10 +1804,12 @@ def initialize(self, argv=None, find_extensions=True, new_httpserver=True): self.init_server_extensions() # Initialize all components of the ServerApp. self.init_logging() + self.init_eventlog() if self._dispatching: return self.init_configurables() self.init_components() + self.init_eventlog() self.init_webapp() if new_httpserver: self.init_httpserver() diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 53aff09078..7bdf369f11 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -112,6 +112,10 @@ async def get(self, path=''): )) validate_model(model, expect_content=content) self._finish_model(model, location=False) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'get', 'path': model['path'] } + ) @web.authenticated async def patch(self, path=''): @@ -120,10 +124,19 @@ async def patch(self, path=''): model = self.get_json_body() if model is None: raise web.HTTPError(400, u'JSON body missing') - model = cm.update(model, path) + self.log.info(model) + model = yield maybe_future(cm.update(model, path)) validate_model(model, expect_content=False) self._finish_model(model) + self.log.info(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + # FIXME: 'path' always has a leading slash, while model['path'] does not. + # What to do here for source_path? path munge manually? Eww + { 'action': 'rename', 'path': model['path'], 'source_path': path } + ) + @gen.coroutine async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -134,6 +147,10 @@ async def _copy(self, copy_from, copy_to=None): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } + ) async def _upload(self, model, path): """Handle upload of a new file to path""" @@ -142,6 +159,10 @@ async def _upload(self, model, path): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'upload', 'path': model['path'] } + ) async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" @@ -150,6 +171,11 @@ async def _new_untitled(self, path, type='', ext=''): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + # Set path to path of created object, not directory it was created in + { 'action': 'create', 'path': model['path'] } + ) async def _save(self, model, path): """Save an existing file.""" @@ -160,6 +186,11 @@ async def _save(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'save', 'path': model['path'] } + ) + @web.authenticated async def post(self, path=''): """Create a new file in the specified path. @@ -228,6 +259,10 @@ async def delete(self, path=''): cm.delete(path) self.set_status(204) self.finish() + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'delete', 'path': path } + ) class CheckpointsHandler(APIHandler): From a0f40eab99a2c407ddd13ea5e6043830946977d2 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:31:11 -0700 Subject: [PATCH 02/19] Install jupyter_telemetry from source --- .travis.yml | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..6346a3d78d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,105 @@ +# http://travis-ci.org/#!/ipython/ipython +language: python + +cache: + directories: + - $HOME/.cache/bower + - $HOME/.cache/pip +python: + - 3.6 + + +env: + global: + - PATH=$TRAVIS_BUILD_DIR/pandoc:$PATH + matrix: + - GROUP=js/notebook + - GROUP=python + - GROUP=js/base + - GROUP=js/services + +before_install: + - pip install --upgrade pip + # Manually install jupyter_telemetry, as of https://github.com/jupyter/telemetry/pull/10 + - pip install git+https://github.com/yuvipanda/telemetry@5789321 + - pip install --upgrade setuptools wheel nose coverage codecov + - nvm install 6.9.2 + - nvm use 6.9.2 + - node --version + - npm --version + - npm upgrade -g npm + - npm install + - | + if [[ $GROUP == js* ]]; then + npm install -g casperjs@1.1.3 phantomjs-prebuilt@2.1.7 + fi + - | + if [[ $GROUP == docs ]]; then + pip install -r docs/doc-requirements.txt + pip install --upgrade pytest + fi + - | + if [[ $GROUP == selenium ]]; then + pip install --upgrade selenium pytest + # Install Webdriver backend for Firefox: + wget https://github.com/mozilla/geckodriver/releases/download/v0.19.1/geckodriver-v0.19.1-linux64.tar.gz + mkdir geckodriver + tar -xzf geckodriver-v0.19.1-linux64.tar.gz -C geckodriver + export PATH=$PATH:$PWD/geckodriver + fi + - pip install "attrs>=17.4.0" + +install: + - pip install --pre .[test] $EXTRA_PIP + - pip freeze + - wget https://github.com/jgm/pandoc/releases/download/1.19.1/pandoc-1.19.1-1-amd64.deb && sudo dpkg -i pandoc-1.19.1-1-amd64.deb + + +script: + - jupyter kernelspec list + - | + symlinks=$(find . -type l| grep -v './node_modules/' | grep -v './git-hooks') + if [[ $(echo $symlinks) ]]; then + echo "Repository contains symlinks which won't work on windows:" + echo $symlinks + echo "" + false + else + true + fi + - 'if [[ $GROUP == js* ]]; then travis_retry python -m notebook.jstest ${GROUP:3}; fi' + - 'if [[ $GROUP == python ]]; then nosetests -v --exclude-dir notebook/tests/selenium --with-coverage --cover-package=notebook notebook; fi' + - 'if [[ $GROUP == selenium ]]; then py.test -sv notebook/tests/selenium; fi' + - | + if [[ $GROUP == docs ]]; then + EXIT_STATUS=0 + make -C docs/ html || EXIT_STATUS=$? + + if [[ $TRAVIS_EVENT_TYPE == cron ]]; then + make -C docs/ linkcheck || EXIT_STATUS=$?; + fi + + pytest --nbval --current-env docs || EXIT_STATUS=$? + exit $EXIT_STATUS + fi + + +matrix: + include: + - python: 3.6 + env: + - GROUP=selenium + - JUPYTER_TEST_BROWSER=firefox + - MOZ_HEADLESS=1 + addons: + firefox: 57.0 + - python: 3.5 + env: GROUP=python + - python: 3.7 + dist: xenial + env: GROUP=python + - python: 3.6 + env: GROUP=docs + +after_success: + - codecov From 96bf2f03f7ba7dc828ef64bb7ebf9016a5f82538 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:57:01 -0700 Subject: [PATCH 03/19] Set up an eventlog API endpoint Bump telemetry extension commit as well --- .travis.yml | 105 ------------------- jupyter_server/services/eventlog/__init__.py | 0 jupyter_server/services/eventlog/handlers.py | 42 ++++++++ 3 files changed, 42 insertions(+), 105 deletions(-) delete mode 100644 .travis.yml create mode 100644 jupyter_server/services/eventlog/__init__.py create mode 100644 jupyter_server/services/eventlog/handlers.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6346a3d78d..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,105 +0,0 @@ -# http://travis-ci.org/#!/ipython/ipython -language: python - -cache: - directories: - - $HOME/.cache/bower - - $HOME/.cache/pip -python: - - 3.6 - - -env: - global: - - PATH=$TRAVIS_BUILD_DIR/pandoc:$PATH - matrix: - - GROUP=js/notebook - - GROUP=python - - GROUP=js/base - - GROUP=js/services - -before_install: - - pip install --upgrade pip - # Manually install jupyter_telemetry, as of https://github.com/jupyter/telemetry/pull/10 - - pip install git+https://github.com/yuvipanda/telemetry@5789321 - - pip install --upgrade setuptools wheel nose coverage codecov - - nvm install 6.9.2 - - nvm use 6.9.2 - - node --version - - npm --version - - npm upgrade -g npm - - npm install - - | - if [[ $GROUP == js* ]]; then - npm install -g casperjs@1.1.3 phantomjs-prebuilt@2.1.7 - fi - - | - if [[ $GROUP == docs ]]; then - pip install -r docs/doc-requirements.txt - pip install --upgrade pytest - fi - - | - if [[ $GROUP == selenium ]]; then - pip install --upgrade selenium pytest - # Install Webdriver backend for Firefox: - wget https://github.com/mozilla/geckodriver/releases/download/v0.19.1/geckodriver-v0.19.1-linux64.tar.gz - mkdir geckodriver - tar -xzf geckodriver-v0.19.1-linux64.tar.gz -C geckodriver - export PATH=$PATH:$PWD/geckodriver - fi - - pip install "attrs>=17.4.0" - -install: - - pip install --pre .[test] $EXTRA_PIP - - pip freeze - - wget https://github.com/jgm/pandoc/releases/download/1.19.1/pandoc-1.19.1-1-amd64.deb && sudo dpkg -i pandoc-1.19.1-1-amd64.deb - - -script: - - jupyter kernelspec list - - | - symlinks=$(find . -type l| grep -v './node_modules/' | grep -v './git-hooks') - if [[ $(echo $symlinks) ]]; then - echo "Repository contains symlinks which won't work on windows:" - echo $symlinks - echo "" - false - else - true - fi - - 'if [[ $GROUP == js* ]]; then travis_retry python -m notebook.jstest ${GROUP:3}; fi' - - 'if [[ $GROUP == python ]]; then nosetests -v --exclude-dir notebook/tests/selenium --with-coverage --cover-package=notebook notebook; fi' - - 'if [[ $GROUP == selenium ]]; then py.test -sv notebook/tests/selenium; fi' - - | - if [[ $GROUP == docs ]]; then - EXIT_STATUS=0 - make -C docs/ html || EXIT_STATUS=$? - - if [[ $TRAVIS_EVENT_TYPE == cron ]]; then - make -C docs/ linkcheck || EXIT_STATUS=$?; - fi - - pytest --nbval --current-env docs || EXIT_STATUS=$? - exit $EXIT_STATUS - fi - - -matrix: - include: - - python: 3.6 - env: - - GROUP=selenium - - JUPYTER_TEST_BROWSER=firefox - - MOZ_HEADLESS=1 - addons: - firefox: 57.0 - - python: 3.5 - env: GROUP=python - - python: 3.7 - dist: xenial - env: GROUP=python - - python: 3.6 - env: GROUP=docs - -after_success: - - codecov diff --git a/jupyter_server/services/eventlog/__init__.py b/jupyter_server/services/eventlog/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py new file mode 100644 index 0000000000..687b2591cc --- /dev/null +++ b/jupyter_server/services/eventlog/handlers.py @@ -0,0 +1,42 @@ +import json + +from tornado import web + +from notebook.utils import url_path_join +from notebook.base.handlers import APIHandler, json_errors +from jupyter_telemetry.eventlog import EventLog + + +class EventLoggingHandler(APIHandler): + """ + A handler that receives and stores telemetry data from the client. + """ + @json_errors + @web.authenticated + def post(self, *args, **kwargs): + try: + # Parse the data from the request body + raw_event = json.loads(self.request.body.strip().decode()) + except Exception as e: + raise web.HTTPError(400, str(e)) + + required_fields = {'schema', 'version', 'event'} + for rf in required_fields: + if rf not in raw_event: + raise web.HTTPError(400, f'{rf} is a required field') + + schema_name = raw_event['schema'] + version = raw_event['version'] + event = raw_event['event'] + + # Profile, and move to a background thread if this is problematic + # FIXME: Return a more appropriate error response if validation fails + self.eventlog.record_event(schema_name, version, event) + + self.set_status(204) + self.finish() + + +default_handlers = [ + (r"/api/eventlog", EventLoggingHandler), +] \ No newline at end of file From 06b91e0c34050bfab6072b829584a63e15fff60b Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 13:00:59 -0700 Subject: [PATCH 04/19] Use different naming convention & add test for it Experiments here informed the schema naming recommendations in https://github.com/jupyter/telemetry/pull/11 --- .../contentsmanager-actions.json | 2 +- .../contentsmanager-actions/v1.json | 30 +++++++++++++++++++ jupyter_server/serverapp.py | 17 +++++------ jupyter_server/services/contents/handlers.py | 22 ++++++++------ jupyter_server/utils.py | 9 ++++++ 5 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 jupyter_server/event-schemas/contentsmanager-actions/v1.json diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json index 242111722e..5da6d68b88 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -1,5 +1,5 @@ { - "$id": "jupyter.org/contentsmanager-actions", + "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", "version": 1, "title": "Contents Manager activities", "description": "Notebook Server emits this event whenever a contentsmanager action happens", diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.json b/jupyter_server/event-schemas/contentsmanager-actions/v1.json new file mode 100644 index 0000000000..5da6d68b88 --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.json @@ -0,0 +1,30 @@ +{ + "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", + "version": 1, + "title": "Contents Manager activities", + "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "type": "object", + "required": ["action", "path"], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "create", + "copy" + ], + "description": "Action performed by contents manager" + }, + "path": { + "type": "string", + "description": "Logical path the action was performed in" + }, + "source_path": { + "type": "string", + "description": "If action is 'copy', this specifies the source path" + } + } +} \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index e10bc80967..3e7e561b49 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -1765,15 +1765,14 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) - schemas_glob = os.path.join( - os.path.dirname(__file__), - 'event-schemas', - '*.json' - ) - - for schema_file in glob(schemas_glob): - with open(schema_file) as f: - self.eventlog.register_schema(json.load(f)) + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.json'): + file_path = os.path.join(dirname, file) + with open(file_path) as f: + self.eventlog.register_schema(json.load(f)) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 7bdf369f11..d80ba9b768 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -9,10 +9,14 @@ import json from tornado import web - -from jupyter_server.utils import url_path_join, url_escape, ensure_async from jupyter_client.jsonutil import date_default +from jupyter_server.utils import ( + url_path_join, + url_escape, + ensure_async, + eventlogging_schema_fqn +) from jupyter_server.base.handlers import ( JupyterHandler, APIHandler, path_regex, ) @@ -113,7 +117,7 @@ async def get(self, path=''): validate_model(model, expect_content=content) self._finish_model(model, location=False) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'get', 'path': model['path'] } ) @@ -130,7 +134,7 @@ async def patch(self, path=''): self._finish_model(model) self.log.info(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, # FIXME: 'path' always has a leading slash, while model['path'] does not. # What to do here for source_path? path munge manually? Eww { 'action': 'rename', 'path': model['path'], 'source_path': path } @@ -148,7 +152,7 @@ async def _copy(self, copy_from, copy_to=None): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } ) @@ -160,7 +164,7 @@ async def _upload(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'upload', 'path': model['path'] } ) @@ -172,7 +176,7 @@ async def _new_untitled(self, path, type='', ext=''): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, # Set path to path of created object, not directory it was created in { 'action': 'create', 'path': model['path'] } ) @@ -187,7 +191,7 @@ async def _save(self, model, path): self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'save', 'path': model['path'] } ) @@ -260,7 +264,7 @@ async def delete(self, path=''): self.set_status(204) self.finish() self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'delete', 'path': path } ) diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 42a6ae9278..54e112f97b 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -440,3 +440,12 @@ def wrapped(): result = asyncio.ensure_future(maybe_async) return result return wrapped() + + +def eventlogging_schema_fqn(name): + """ + Return fully qualified event schema name + + Matches convention for this particular repo + """ + return 'eventlogging.jupyter.org/notebook/{}'.format(name) From 716ff1b3ef6a4f7da446ff191a89e1e93047a615 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 15:07:45 -0700 Subject: [PATCH 05/19] Don't use f-strings python 3.5 is still supported --- jupyter_server/services/eventlog/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index 687b2591cc..4665e43e8b 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -23,7 +23,7 @@ def post(self, *args, **kwargs): required_fields = {'schema', 'version', 'event'} for rf in required_fields: if rf not in raw_event: - raise web.HTTPError(400, f'{rf} is a required field') + raise web.HTTPError(400, '{} is a required field'.format(rf)) schema_name = raw_event['schema'] version = raw_event['version'] From 8e122fcaaa4d348a4edc9c1ce780742bff788d7f Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 08:08:04 -0700 Subject: [PATCH 06/19] Derive JSON Schema files from YAML files This lets us add detailed documentation & description to our schemas, which is very hard to do in JSON. We also add a lot of documentation to the one JSON schema we have --- jupyter_server/event-schemas/README.md | 19 +++++ .../contentsmanager-actions.json | 17 ++-- jupyter_server/event-schemas/generate-json.py | 39 +++++++++ jupyter_server/event-schemas/v1.yaml | 79 +++++++++++++++++++ 4 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 jupyter_server/event-schemas/README.md create mode 100755 jupyter_server/event-schemas/generate-json.py create mode 100644 jupyter_server/event-schemas/v1.yaml diff --git a/jupyter_server/event-schemas/README.md b/jupyter_server/event-schemas/README.md new file mode 100644 index 0000000000..541a9b0398 --- /dev/null +++ b/jupyter_server/event-schemas/README.md @@ -0,0 +1,19 @@ +# Event Schemas + +## Generating .json files + +Event Schemas are written in a human readable `.yaml` format. +This is primarily to get multi-line strings in our descriptions, +as documentation is very important. + +Every time you modify a `.yaml` file, you should run the following +commands. + +```bash +./generate-json.py +``` + +This needs the `ruamel.yaml` python package installed. + +Hopefully, this is extremely temporary, and we can just use YAML +with jupyter_telemetry. \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json index 5da6d68b88..065f1d5c2f 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -2,9 +2,12 @@ "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", "version": 1, "title": "Contents Manager activities", - "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "description": "Record actions on files via the ContentsManager REST API.\n\nThe notebook ContentsManager REST API is used by all frontends to retreive,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", "type": "object", - "required": ["action", "path"], + "required": [ + "action", + "path" + ], "properties": { "action": { "enum": [ @@ -13,18 +16,18 @@ "save", "upload", "rename", - "create", - "copy" + "copy", + "delete" ], - "description": "Action performed by contents manager" + "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. create\n Create a new directory or file at 'path'. Currently, name of the\n file or directory is auto generated by the ContentsManager implementation.\n\n3. save\n Save a file at path with contents from the client\n\n4. upload\n Upload a file at given path with contents from the client\n\n5. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n5. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n6. delete\n Delete a file or empty directory at given path\n" }, "path": { "type": "string", - "description": "Logical path the action was performed in" + "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" }, "source_path": { "type": "string", - "description": "If action is 'copy', this specifies the source path" + "description": "Source path of an operation when action is 'copy' or 'rename'" } } } \ No newline at end of file diff --git a/jupyter_server/event-schemas/generate-json.py b/jupyter_server/event-schemas/generate-json.py new file mode 100755 index 0000000000..a39fa0610b --- /dev/null +++ b/jupyter_server/event-schemas/generate-json.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import jsonschema +from ruamel.yaml import YAML + +from jupyter_telemetry.eventlog import EventLog + +yaml = YAML(typ='safe') + +def main(): + argparser = argparse.ArgumentParser() + argparser.add_argument( + 'directory', + help='Directory with Schema .yaml files' + ) + + args = argparser.parse_args() + + el = EventLog() + for dirname, _, files in os.walk(args.directory): + for file in files: + if not file.endswith('.yaml'): + continue + yaml_path = os.path.join(dirname, file) + print('Processing', yaml_path) + with open(yaml_path) as f: + schema = yaml.load(f) + + # validate schema + el.register_schema(schema) + + json_path = os.path.join(dirname, os.path.splitext(file)[0] + '.json') + with open(json_path, 'w') as f: + json.dump(schema, f, indent=4) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/jupyter_server/event-schemas/v1.yaml b/jupyter_server/event-schemas/v1.yaml new file mode 100644 index 0000000000..3d7e8f2fe9 --- /dev/null +++ b/jupyter_server/event-schemas/v1.yaml @@ -0,0 +1,79 @@ +"$id": eventlogging.jupyter.org/notebook/contentsmanager-actions +version: 1 +title: Contents Manager activities +description: | + Record actions on files via the ContentsManager REST API. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: +- action +- path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. create + Create a new directory or file at 'path'. Currently, name of the + file or directory is auto generated by the ContentsManager implementation. + + 3. save + Save a file at path with contents from the client + + 4. upload + Upload a file at given path with contents from the client + + 5. rename + Rename a file or directory from value in source_path to + value in path. + + 5. copy + Copy a file or directory from value in source_path to + value in path. + + 6. delete + Delete a file or empty directory at given path + path: + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file From f9a0dfb6c3ad69b541de65e053b5354b17d21d1f Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 11:10:07 -0700 Subject: [PATCH 07/19] Keep event schemas in YAML Primary advantage over JSON is that we can do multi-line strings for more detailed documentation. We also expect humans to read & write these, so YAML is a much better format there. All JSON is also valid YAML, so that helps. Depends on https://github.com/jupyter/telemetry/pull/13 --- jupyter_server/serverapp.py | 7 ++-- notebook/tests/test_eventlog.py | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 notebook/tests/test_eventlog.py diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 3e7e561b49..76896b03d3 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,7 @@ import warnings import webbrowser import urllib +from ruamel.yaml import YAML from glob import glob from types import ModuleType @@ -1765,14 +1766,14 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) + yaml = YAML(typ='safe') event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') # Recursively register all .json files under event-schemas for dirname, _, files in os.walk(event_schemas_dir): for file in files: - if file.endswith('.json'): + if file.endswith('.yaml'): file_path = os.path.join(dirname, file) - with open(file_path) as f: - self.eventlog.register_schema(json.load(f)) + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py new file mode 100644 index 0000000000..c2f74a59c9 --- /dev/null +++ b/notebook/tests/test_eventlog.py @@ -0,0 +1,57 @@ +import os +import re +import jsonschema +from ruamel.yaml import YAML +from notebook.notebookapp import NotebookApp +from notebook.utils import eventlogging_schema_fqn +from unittest import TestCase + +yaml = YAML(typ='safe') + +class RegisteredSchemasTestCase(TestCase): + def schema_files(self): + event_schemas_dir = os.path.realpath( + os.path.join(os.path.dirname(__file__), '..', 'event-schemas') + ) + schemas = [] + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + yield os.path.join(dirname, file) + + def test_eventlogging_schema_fqn(self): + self.assertEqual( + eventlogging_schema_fqn('test'), + 'eventlogging.jupyter.org/notebook/test' + ) + def test_valid_schemas(self): + """ + All schemas must be valid json schemas + """ + for schema_file in self.schema_files(): + with open(schema_file) as f: + jsonschema.Draft7Validator.check_schema(yaml.load(f)) + + def test_schema_conventions(self): + """ + Test schema naming convention for this repo. + + 1. All schemas should be under event-schamas/{name}/v{version}.yaml + 2. Schema id should be eventlogging.jupyter.org/notebook/{name} + 3. Schema version should match version in file + """ + for schema_file in self.schema_files(): + filename = os.path.basename(schema_file) + match = re.match('v(\d+)\.yaml', filename) + # All schema locations must match the following pattern + # schema-name/v(version).yaml + self.assertIsNotNone(match) + + with open(schema_file) as f: + schema = yaml.load(f) + + self.assertEqual(schema['$id'], eventlogging_schema_fqn( + os.path.basename(os.path.dirname(schema_file)) + )) + self.assertEqual(schema['version'], int(match.groups()[0])) + \ No newline at end of file From c7428e8aa778b8e2352b88af3c98fbb424cfac2e Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 16:34:36 -0700 Subject: [PATCH 08/19] Depend on the jupyter_telemetry package We made a v0.0.1 release! --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2697fad271..e402e71669 100755 --- a/setup.py +++ b/setup.py @@ -94,7 +94,8 @@ 'Send2Trash', 'terminado>=0.8.3', 'prometheus_client', - "pywin32>=1.0 ; sys_platform == 'win32'" + "pywin32>=1.0 ; sys_platform == 'win32'", + 'jupyter_telemetry' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', From 9437e88353d515b1b39a210321c92cb34667dc8c Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 12:02:13 -0700 Subject: [PATCH 09/19] read schemas from new utils function --- jupyter_server/serverapp.py | 21 +++++++++++---------- jupyter_server/utils.py | 14 ++++++++++++++ notebook/tests/test_eventlog.py | 20 ++++++-------------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 76896b03d3..18f28967f0 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -106,7 +106,14 @@ from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp -from .utils import url_path_join, check_pid, url_escape, urljoin, pathname2url +from .utils import ( + url_path_join, + check_pid, + url_escape, + urljoin, + pathname2url, + get_schema_files +) from jupyter_server.extension.serverextension import ( ServerExtensionApp, @@ -1765,15 +1772,9 @@ def _init_asyncio_patch(): asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) def init_eventlog(self): self.eventlog = EventLog(parent=self) - - yaml = YAML(typ='safe') - event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') - # Recursively register all .json files under event-schemas - for dirname, _, files in os.walk(event_schemas_dir): - for file in files: - if file.endswith('.yaml'): - file_path = os.path.join(dirname, file) - self.eventlog.register_schema_file(file_path) + # Register schemas for notebook services. + for file_path in get_schema_files(): + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 54e112f97b..55389f037a 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -449,3 +449,17 @@ def eventlogging_schema_fqn(name): Matches convention for this particular repo """ return 'eventlogging.jupyter.org/notebook/{}'.format(name) + + +def get_schema_files(): + """Yield a sequence of event schemas for jupyter services.""" + # Hardcode path to event schemas directory. + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + schema_files = [] + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + file_path = os.path.join(dirname, file) + schema_files.append(file_path) + yield schema_files diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py index c2f74a59c9..994181b73e 100644 --- a/notebook/tests/test_eventlog.py +++ b/notebook/tests/test_eventlog.py @@ -3,32 +3,25 @@ import jsonschema from ruamel.yaml import YAML from notebook.notebookapp import NotebookApp -from notebook.utils import eventlogging_schema_fqn +from notebook.utils import eventlogging_schema_fqn, get_schema_files from unittest import TestCase yaml = YAML(typ='safe') + class RegisteredSchemasTestCase(TestCase): - def schema_files(self): - event_schemas_dir = os.path.realpath( - os.path.join(os.path.dirname(__file__), '..', 'event-schemas') - ) - schemas = [] - for dirname, _, files in os.walk(event_schemas_dir): - for file in files: - if file.endswith('.yaml'): - yield os.path.join(dirname, file) def test_eventlogging_schema_fqn(self): self.assertEqual( eventlogging_schema_fqn('test'), 'eventlogging.jupyter.org/notebook/test' ) + def test_valid_schemas(self): """ All schemas must be valid json schemas """ - for schema_file in self.schema_files(): + for schema_file in get_schema_files(): with open(schema_file) as f: jsonschema.Draft7Validator.check_schema(yaml.load(f)) @@ -40,7 +33,7 @@ def test_schema_conventions(self): 2. Schema id should be eventlogging.jupyter.org/notebook/{name} 3. Schema version should match version in file """ - for schema_file in self.schema_files(): + for schema_file in get_schema_files(): filename = os.path.basename(schema_file) match = re.match('v(\d+)\.yaml', filename) # All schema locations must match the following pattern @@ -53,5 +46,4 @@ def test_schema_conventions(self): self.assertEqual(schema['$id'], eventlogging_schema_fqn( os.path.basename(os.path.dirname(schema_file)) )) - self.assertEqual(schema['version'], int(match.groups()[0])) - \ No newline at end of file + self.assertEqual(schema['version'], int(match.groups()[0])) \ No newline at end of file From 6e3c80c622352fb007c89315fa5063e5e71b9241 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 14:56:24 -0700 Subject: [PATCH 10/19] Add fix for tables in RTD theme sphinx docs. Solution came from https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html --- docs/source/_static/theme_overrides.css | 13 +++ docs/source/conf.py | 127 +++++++++++++++++++++++- jupyter_server/utils.py | 5 +- 3 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 docs/source/_static/theme_overrides.css diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 0000000000..63ee6cc74c --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,13 @@ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} diff --git a/docs/source/conf.py b/docs/source/conf.py index e105e82d40..fc538a1613 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,8 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinxcontrib.openapi', - 'sphinxemoji.sphinxemoji' + 'sphinx-jsonschema' ] # Add any paths that contain templates here, relative to this directory. @@ -208,6 +207,12 @@ # since it is needed to properly generate _static in the build directory html_static_path = ['_static'] +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], + } + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. @@ -370,4 +375,122 @@ spelling_word_list_filename='spelling_wordlist.txt' # import before any doc is built, so _ is guaranteed to be injected +<<<<<<< HEAD import jupyter_server.transutils +======= +import notebook.transutils + +# -- Autogenerate documentation for event schemas ------------------ + +from notebook.utils import get_schema_files + +# Build a dictionary that describes the event schema table of contents. +# toc = { +# schema_name : { +# src: # file path to schema +# dst: # file path to documentation +# ver: # latest version of schema +# } +# } +toc = {} + +# Iterate over schema directories and generate documentation. +# Generates documentation for the latest version of each schema. +for file_path in get_schema_files(): + # Make path relative. + file_path = os.path.relpath(file_path) + # Break apart path to its pieces + pieces = file_path.split(os.path.sep) + # Schema version. Outputs as a string that looks like "v#" + schema_ver = os.path.splitext(pieces[-1])[0] + # Strip "v" and make version an integer. + schema_int = int(schema_ver[1:]) + # Schema name. + schema_name = pieces[-2] + + # Add this version file to schema_dir + src = '../' + file_path + dst = os.path.join('events', os.path.join(schema_name + '.rst')) + + if schema_name in toc: + # If this is a later version, replace the old version. + if schema_int > toc[schema_name]['ver']: + toc[schema_name] = { + 'src': src, + 'dst': dst, + 'ver': schema_int + } + else: + toc[schema_name] = { + 'src': src, + 'dst': dst, + 'ver': schema_int + } + +# Write schema documentation +for schema_name, x in toc.items(): + with open(dst, 'w') as f: + f.write('.. jsonschema:: {}'.format(src)) + +# Write table of contents +events_index = """ +.. toctree:: + :maxdepth: 1 + :glob: + +""" + +with open(os.path.join('events', 'index.rst'), 'w') as f: + f.write(events_index) + for item in toc.keys(): + f.write(' {}'.format(item)) + + + + + + + + + + +# # create a directory for this schema if it doesn't exist: +# schema_dir = os.path.join('events', schema_name) +# if not os.path.exists(schema_dir): +# os.makedirs(schema_dir) + + +# toc[schema_name] + + + +# with open(dst, 'w') as f: +# f.write('.. jsonschema:: {}'.format(src)) + + + + + + + +# toc.append(schema_name) + + +# events_index = """ +# .. toctree:: +# :maxdepth: 1 +# :glob: + +# """ + + +# with open(os.path.join('events', 'index.rst'), 'w') as f: +# f.write(events_index) +# for item in set(toc): +# f.write(' {}/*'.format(item)) + + + + + +>>>>>>> 4fb0a0443... Add fix for tables in RTD theme sphinx docs. diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 55389f037a..8fc6e89479 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -455,11 +455,10 @@ def get_schema_files(): """Yield a sequence of event schemas for jupyter services.""" # Hardcode path to event schemas directory. event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') - schema_files = [] + #schema_files = [] # Recursively register all .json files under event-schemas for dirname, _, files in os.walk(event_schemas_dir): for file in files: if file.endswith('.yaml'): file_path = os.path.join(dirname, file) - schema_files.append(file_path) - yield schema_files + yield file_path From 4035fd557e99a7549e1aa53e5b7874f83a2587e3 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:02:22 -0700 Subject: [PATCH 11/19] add event schema auto-documentation to jupyter notebook docs --- .gitignore | 1 + docs/environment.yml | 3 +- docs/source/conf.py | 118 --------------------------------------- docs/source/eventlog.rst | 47 ++++++++++++++++ 4 files changed, 50 insertions(+), 119 deletions(-) create mode 100644 docs/source/eventlog.rst diff --git a/.gitignore b/.gitignore index d9fb5e0c6c..a69d2eeee6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ docs/man/*.gz docs/source/api/generated docs/source/config.rst docs/gh-pages +docs/source/events notebook/i18n/*/LC_MESSAGES/*.mo notebook/i18n/*/LC_MESSAGES/nbjs.json notebook/static/components diff --git a/docs/environment.yml b/docs/environment.yml index 5d77bc7bb4..1d9c9d3eb8 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -13,4 +13,5 @@ dependencies: - sphinxcontrib_github_alt - sphinxcontrib-openapi - sphinxemoji - - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file + - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master + - sphinx-jsonschema diff --git a/docs/source/conf.py b/docs/source/conf.py index fc538a1613..41b089cc07 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -375,122 +375,4 @@ spelling_word_list_filename='spelling_wordlist.txt' # import before any doc is built, so _ is guaranteed to be injected -<<<<<<< HEAD import jupyter_server.transutils -======= -import notebook.transutils - -# -- Autogenerate documentation for event schemas ------------------ - -from notebook.utils import get_schema_files - -# Build a dictionary that describes the event schema table of contents. -# toc = { -# schema_name : { -# src: # file path to schema -# dst: # file path to documentation -# ver: # latest version of schema -# } -# } -toc = {} - -# Iterate over schema directories and generate documentation. -# Generates documentation for the latest version of each schema. -for file_path in get_schema_files(): - # Make path relative. - file_path = os.path.relpath(file_path) - # Break apart path to its pieces - pieces = file_path.split(os.path.sep) - # Schema version. Outputs as a string that looks like "v#" - schema_ver = os.path.splitext(pieces[-1])[0] - # Strip "v" and make version an integer. - schema_int = int(schema_ver[1:]) - # Schema name. - schema_name = pieces[-2] - - # Add this version file to schema_dir - src = '../' + file_path - dst = os.path.join('events', os.path.join(schema_name + '.rst')) - - if schema_name in toc: - # If this is a later version, replace the old version. - if schema_int > toc[schema_name]['ver']: - toc[schema_name] = { - 'src': src, - 'dst': dst, - 'ver': schema_int - } - else: - toc[schema_name] = { - 'src': src, - 'dst': dst, - 'ver': schema_int - } - -# Write schema documentation -for schema_name, x in toc.items(): - with open(dst, 'w') as f: - f.write('.. jsonschema:: {}'.format(src)) - -# Write table of contents -events_index = """ -.. toctree:: - :maxdepth: 1 - :glob: - -""" - -with open(os.path.join('events', 'index.rst'), 'w') as f: - f.write(events_index) - for item in toc.keys(): - f.write(' {}'.format(item)) - - - - - - - - - - -# # create a directory for this schema if it doesn't exist: -# schema_dir = os.path.join('events', schema_name) -# if not os.path.exists(schema_dir): -# os.makedirs(schema_dir) - - -# toc[schema_name] - - - -# with open(dst, 'w') as f: -# f.write('.. jsonschema:: {}'.format(src)) - - - - - - - -# toc.append(schema_name) - - -# events_index = """ -# .. toctree:: -# :maxdepth: 1 -# :glob: - -# """ - - -# with open(os.path.join('events', 'index.rst'), 'w') as f: -# f.write(events_index) -# for item in set(toc): -# f.write(' {}/*'.format(item)) - - - - - ->>>>>>> 4fb0a0443... Add fix for tables in RTD theme sphinx docs. diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst new file mode 100644 index 0000000000..fd77a1b9c8 --- /dev/null +++ b/docs/source/eventlog.rst @@ -0,0 +1,47 @@ +Eventlogging and Telemetry +========================== + +The Notebook Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Notebook Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +How to emit events +------------------ + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + + +.. _below: + +Event schemas +------------- + +.. toctree:: + :maxdepth: 2 + + events/index From 23d50a38b16512503acbeee1204d2e245c0af0ac Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:14:04 -0700 Subject: [PATCH 12/19] format paths in recorded events --- jupyter_server/services/contents/handlers.py | 30 +++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index d80ba9b768..85065b21cb 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -117,7 +117,8 @@ async def get(self, path=''): validate_model(model, expect_content=content) self._finish_model(model, location=False) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'get', 'path': model['path'] } ) @@ -134,10 +135,13 @@ async def patch(self, path=''): self._finish_model(model) self.log.info(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, - # FIXME: 'path' always has a leading slash, while model['path'] does not. - # What to do here for source_path? path munge manually? Eww - { 'action': 'rename', 'path': model['path'], 'source_path': path } + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) + } ) @gen.coroutine @@ -152,8 +156,13 @@ async def _copy(self, copy_from, copy_to=None): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, - { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) + } ) async def _upload(self, model, path): @@ -164,7 +173,8 @@ async def _upload(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'upload', 'path': model['path'] } ) @@ -189,9 +199,9 @@ async def _save(self, model, path): model = self.contents_manager.save(model, path) validate_model(model, expect_content=False) self._finish_model(model) - self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'save', 'path': model['path'] } ) From 3c94970d5be68fd5b8f6bab0f5c2ae3311843f00 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:37:03 -0700 Subject: [PATCH 13/19] add documentation for eventlog endpoint --- docs/source/eventlog.rst | 24 ++++++++++++++++---- jupyter_server/services/eventlog/handlers.py | 11 +++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst index fd77a1b9c8..df5c153fb7 100644 --- a/docs/source/eventlog.rst +++ b/docs/source/eventlog.rst @@ -8,12 +8,11 @@ The Notebook Server can be configured to record structured events from a running .. _`Telemetry System`: https://github.com/jupyter/telemetry .. _`JSON schemas`: https://json-schema.org/ -How to emit events ------------------- +Emitting Server Events +---------------------- Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. - To begin recording events, you'll need to set two configurations: 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to @@ -35,11 +34,26 @@ Here's a basic example for emitting events from the `contents` service: The output is a file, ``"event.log"``, with events recorded as JSON data. +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. .. _below: -Event schemas -------------- + +Server Event schemas +-------=======------ .. toctree:: :maxdepth: 2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index 4665e43e8b..b27dd87304 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -29,14 +29,15 @@ def post(self, *args, **kwargs): version = raw_event['version'] event = raw_event['event'] - # Profile, and move to a background thread if this is problematic - # FIXME: Return a more appropriate error response if validation fails - self.eventlog.record_event(schema_name, version, event) - + # Profile, may need to move to a background thread if this is problematic + try: + self.eventlog.record_event(schema_name, version, event) + except: + raise web.HTTPError(500, "Event could not be validated.") + self.set_status(204) self.finish() - default_handlers = [ (r"/api/eventlog", EventLoggingHandler), ] \ No newline at end of file From e76c91b3ac264b20bf2aa6d7d468a1d8c3999fc2 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:46:40 -0700 Subject: [PATCH 14/19] return exception as 400 error in eventlog endpoint --- docs/source/eventlog.rst | 2 +- jupyter_server/services/eventlog/handlers.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst index df5c153fb7..7229717f69 100644 --- a/docs/source/eventlog.rst +++ b/docs/source/eventlog.rst @@ -53,7 +53,7 @@ Events that are validated by this endpoint must have their schema listed in the Server Event schemas --------=======------ +-------------------- .. toctree:: :maxdepth: 2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index b27dd87304..0c9b69815f 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -6,7 +6,6 @@ from notebook.base.handlers import APIHandler, json_errors from jupyter_telemetry.eventlog import EventLog - class EventLoggingHandler(APIHandler): """ A handler that receives and stores telemetry data from the client. @@ -32,8 +31,8 @@ def post(self, *args, **kwargs): # Profile, may need to move to a background thread if this is problematic try: self.eventlog.record_event(schema_name, version, event) - except: - raise web.HTTPError(500, "Event could not be validated.") + except Exception as e: + raise web.HTTPError(400, e) self.set_status(204) self.finish() From 2ce7c54efa056604c52c1bf725464ead6504b4ed Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:55:54 -0700 Subject: [PATCH 15/19] normalize path in emitted event --- jupyter_server/services/contents/handlers.py | 2 +- notebook/tests/test_eventlog.py | 49 -------------------- 2 files changed, 1 insertion(+), 50 deletions(-) delete mode 100644 notebook/tests/test_eventlog.py diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 85065b21cb..c2ba749c85 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -275,7 +275,7 @@ async def delete(self, path=''): self.finish() self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, - { 'action': 'delete', 'path': path } + { 'action': 'delete', 'path': path.lstrip(os.path.sep) } ) diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py deleted file mode 100644 index 994181b73e..0000000000 --- a/notebook/tests/test_eventlog.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -import re -import jsonschema -from ruamel.yaml import YAML -from notebook.notebookapp import NotebookApp -from notebook.utils import eventlogging_schema_fqn, get_schema_files -from unittest import TestCase - -yaml = YAML(typ='safe') - - -class RegisteredSchemasTestCase(TestCase): - - def test_eventlogging_schema_fqn(self): - self.assertEqual( - eventlogging_schema_fqn('test'), - 'eventlogging.jupyter.org/notebook/test' - ) - - def test_valid_schemas(self): - """ - All schemas must be valid json schemas - """ - for schema_file in get_schema_files(): - with open(schema_file) as f: - jsonschema.Draft7Validator.check_schema(yaml.load(f)) - - def test_schema_conventions(self): - """ - Test schema naming convention for this repo. - - 1. All schemas should be under event-schamas/{name}/v{version}.yaml - 2. Schema id should be eventlogging.jupyter.org/notebook/{name} - 3. Schema version should match version in file - """ - for schema_file in get_schema_files(): - filename = os.path.basename(schema_file) - match = re.match('v(\d+)\.yaml', filename) - # All schema locations must match the following pattern - # schema-name/v(version).yaml - self.assertIsNotNone(match) - - with open(schema_file) as f: - schema = yaml.load(f) - - self.assertEqual(schema['$id'], eventlogging_schema_fqn( - os.path.basename(os.path.dirname(schema_file)) - )) - self.assertEqual(schema['version'], int(match.groups()[0])) \ No newline at end of file From 5794d31efb62a1b1a1c5ed1a6e3816a25d223849 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 15:31:03 -0700 Subject: [PATCH 16/19] initial tests --- jupyter_server/event-schemas/README.md | 19 --------- .../contentsmanager-actions.json | 33 ---------------- .../contentsmanager-actions/v1.json | 30 -------------- .../{ => contentsmanager-actions}/v1.yaml | 8 +++- jupyter_server/event-schemas/generate-json.py | 39 ------------------- jupyter_server/services/contents/handlers.py | 36 ++++++++--------- setup.py | 3 +- tests/test_eventlog.py | 4 ++ 8 files changed, 29 insertions(+), 143 deletions(-) delete mode 100644 jupyter_server/event-schemas/README.md delete mode 100644 jupyter_server/event-schemas/contentsmanager-actions.json delete mode 100644 jupyter_server/event-schemas/contentsmanager-actions/v1.json rename jupyter_server/event-schemas/{ => contentsmanager-actions}/v1.yaml (94%) delete mode 100755 jupyter_server/event-schemas/generate-json.py create mode 100644 tests/test_eventlog.py diff --git a/jupyter_server/event-schemas/README.md b/jupyter_server/event-schemas/README.md deleted file mode 100644 index 541a9b0398..0000000000 --- a/jupyter_server/event-schemas/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Event Schemas - -## Generating .json files - -Event Schemas are written in a human readable `.yaml` format. -This is primarily to get multi-line strings in our descriptions, -as documentation is very important. - -Every time you modify a `.yaml` file, you should run the following -commands. - -```bash -./generate-json.py -``` - -This needs the `ruamel.yaml` python package installed. - -Hopefully, this is extremely temporary, and we can just use YAML -with jupyter_telemetry. \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json deleted file mode 100644 index 065f1d5c2f..0000000000 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", - "version": 1, - "title": "Contents Manager activities", - "description": "Record actions on files via the ContentsManager REST API.\n\nThe notebook ContentsManager REST API is used by all frontends to retreive,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", - "type": "object", - "required": [ - "action", - "path" - ], - "properties": { - "action": { - "enum": [ - "get", - "create", - "save", - "upload", - "rename", - "copy", - "delete" - ], - "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. create\n Create a new directory or file at 'path'. Currently, name of the\n file or directory is auto generated by the ContentsManager implementation.\n\n3. save\n Save a file at path with contents from the client\n\n4. upload\n Upload a file at given path with contents from the client\n\n5. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n5. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n6. delete\n Delete a file or empty directory at given path\n" - }, - "path": { - "type": "string", - "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" - }, - "source_path": { - "type": "string", - "description": "Source path of an operation when action is 'copy' or 'rename'" - } - } -} \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.json b/jupyter_server/event-schemas/contentsmanager-actions/v1.json deleted file mode 100644 index 5da6d68b88..0000000000 --- a/jupyter_server/event-schemas/contentsmanager-actions/v1.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", - "version": 1, - "title": "Contents Manager activities", - "description": "Notebook Server emits this event whenever a contentsmanager action happens", - "type": "object", - "required": ["action", "path"], - "properties": { - "action": { - "enum": [ - "get", - "create", - "save", - "upload", - "rename", - "create", - "copy" - ], - "description": "Action performed by contents manager" - }, - "path": { - "type": "string", - "description": "Logical path the action was performed in" - }, - "source_path": { - "type": "string", - "description": "If action is 'copy', this specifies the source path" - } - } -} \ No newline at end of file diff --git a/jupyter_server/event-schemas/v1.yaml b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml similarity index 94% rename from jupyter_server/event-schemas/v1.yaml rename to jupyter_server/event-schemas/contentsmanager-actions/v1.yaml index 3d7e8f2fe9..31a5f293a9 100644 --- a/jupyter_server/event-schemas/v1.yaml +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml @@ -1,6 +1,7 @@ "$id": eventlogging.jupyter.org/notebook/contentsmanager-actions version: 1 title: Contents Manager activities +personal-data: true description: | Record actions on files via the ContentsManager REST API. @@ -37,6 +38,7 @@ properties: - rename - copy - delete + category: unrestricted description: | Action performed by the ContentsManager API. @@ -60,20 +62,22 @@ properties: 5. rename Rename a file or directory from value in source_path to value in path. - + 5. copy Copy a file or directory from value in source_path to value in path. - + 6. delete Delete a file or empty directory at given path path: + category: personally-identifiable-information type: string description: | Logical path on which the operation was performed. This is a required field. source_path: + category: personally-identifiable-information type: string description: | Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file diff --git a/jupyter_server/event-schemas/generate-json.py b/jupyter_server/event-schemas/generate-json.py deleted file mode 100755 index a39fa0610b..0000000000 --- a/jupyter_server/event-schemas/generate-json.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -import os -import jsonschema -from ruamel.yaml import YAML - -from jupyter_telemetry.eventlog import EventLog - -yaml = YAML(typ='safe') - -def main(): - argparser = argparse.ArgumentParser() - argparser.add_argument( - 'directory', - help='Directory with Schema .yaml files' - ) - - args = argparser.parse_args() - - el = EventLog() - for dirname, _, files in os.walk(args.directory): - for file in files: - if not file.endswith('.yaml'): - continue - yaml_path = os.path.join(dirname, file) - print('Processing', yaml_path) - with open(yaml_path) as f: - schema = yaml.load(f) - - # validate schema - el.register_schema(schema) - - json_path = os.path.join(dirname, os.path.splitext(file)[0] + '.json') - with open(json_path, 'w') as f: - json.dump(schema, f, indent=4) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index c2ba749c85..9b7802ff2a 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -5,7 +5,7 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. - +import os import json from tornado import web @@ -115,12 +115,12 @@ async def get(self, path=''): path=path, type=type, format=format, content=content, )) validate_model(model, expect_content=content) - self._finish_model(model, location=False) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'get', 'path': model['path'] } ) + self._finish_model(model, location=False) @web.authenticated async def patch(self, path=''): @@ -130,21 +130,20 @@ async def patch(self, path=''): if model is None: raise web.HTTPError(400, u'JSON body missing') self.log.info(model) - model = yield maybe_future(cm.update(model, path)) + model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) - self._finish_model(model) - self.log.info(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, - { - 'action': 'rename', - 'path': model['path'], - 'source_path': path.lstrip(os.path.sep) + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) } ) + self._finish_model(model) + - @gen.coroutine async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -154,16 +153,16 @@ async def _copy(self, copy_from, copy_to=None): model = self.contents_manager.copy(copy_from, copy_to) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { - 'action': 'copy', - 'path': model['path'], - 'source_path': copy_from.lstrip(os.path.sep) + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) } ) + self._finish_model(model) async def _upload(self, model, path): """Handle upload of a new file to path""" @@ -171,12 +170,12 @@ async def _upload(self, model, path): model = self.contents_manager.new(model, path) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'upload', 'path': model['path'] } ) + self._finish_model(model) async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" @@ -184,12 +183,12 @@ async def _new_untitled(self, path, type='', ext=''): model = self.contents_manager.new_untitled(path=path, type=type, ext=ext) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, # Set path to path of created object, not directory it was created in { 'action': 'create', 'path': model['path'] } ) + self._finish_model(model) async def _save(self, model, path): """Save an existing file.""" @@ -198,12 +197,12 @@ async def _save(self, model, path): self.log.info(u"Saving file at %s", path) model = self.contents_manager.save(model, path) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'save', 'path': model['path'] } ) + self._finish_model(model) @web.authenticated async def post(self, path=''): @@ -272,12 +271,11 @@ async def delete(self, path=''): self.log.warning('delete %s', path) cm.delete(path) self.set_status(204) - self.finish() self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'delete', 'path': path.lstrip(os.path.sep) } ) - + self.finish() class CheckpointsHandler(APIHandler): diff --git a/setup.py b/setup.py index e402e71669..3ccc6b5fcb 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,8 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", - 'jupyter_telemetry' + # Install teh + 'git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py new file mode 100644 index 0000000000..1f7b587327 --- /dev/null +++ b/tests/test_eventlog.py @@ -0,0 +1,4 @@ + + +def test_eventlog(serverapp): + pass \ No newline at end of file From 7c9d3d51f4e4b8f03d984819ddbee6f48202d2cf Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:05:05 -0700 Subject: [PATCH 17/19] add initial telemetry docs --- .gitignore | 10 +---- docs/doc-requirements.txt | 3 +- docs/source/conf.py | 11 ++++-- docs/source/operators/index.rst | 3 +- docs/source/operators/telemetry.rst | 61 +++++++++++++++++++++++++++++ docs/source/other/full-config.rst | 2 +- jupyter_server/utils.py | 2 +- setup.py | 2 +- 8 files changed, 77 insertions(+), 17 deletions(-) create mode 100644 docs/source/operators/telemetry.rst diff --git a/.gitignore b/.gitignore index a69d2eeee6..0ab0672302 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ MANIFEST +docs/source/operators/events build dist _build @@ -7,15 +8,6 @@ docs/source/api/generated docs/source/config.rst docs/gh-pages docs/source/events -notebook/i18n/*/LC_MESSAGES/*.mo -notebook/i18n/*/LC_MESSAGES/nbjs.json -notebook/static/components -notebook/static/style/*.min.css* -notebook/static/*/js/built/ -notebook/static/*/built/ -notebook/static/built/ -notebook/static/*/js/main.min.js* -notebook/static/lab/*bundle.js node_modules *.py[co] __pycache__ diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt index 48b3eda1d0..4167aabf6d 100644 --- a/docs/doc-requirements.txt +++ b/docs/doc-requirements.txt @@ -8,4 +8,5 @@ prometheus_client sphinxcontrib_github_alt sphinxcontrib-openapi sphinxemoji -git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file +git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master +jupyter_telemetry_sphinxext \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 41b089cc07..4add156c81 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,7 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinx-jsonschema' + 'jupyter_telemetry_sphinxext' ] # Add any paths that contain templates here, relative to this directory. @@ -209,9 +209,9 @@ html_context = { 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme + '_static/theme_overrides.css', # override wide tables in RTD theme ], - } +} # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -376,3 +376,8 @@ # import before any doc is built, so _ is guaranteed to be injected import jupyter_server.transutils + +# Jupyter telemetry configuration values. +jupyter_telemetry_schema_source = "../jupyter_server/event-schemas" # Path is relative to conf.py +jupyter_telemetry_schema_output = "source/operators/events" # Path is relative to conf.py +jupyter_telemetry_index_title = "Telemetry Event Schemas" # Title of the index page that lists all found schemas. \ No newline at end of file diff --git a/docs/source/operators/index.rst b/docs/source/operators/index.rst index a654be1a0c..a6d2e212fd 100644 --- a/docs/source/operators/index.rst +++ b/docs/source/operators/index.rst @@ -12,4 +12,5 @@ These pages are targeted at people using, configuring, and/or deploying multiple configuring-extensions migrate-from-nbserver public-server - security \ No newline at end of file + security + telemetry \ No newline at end of file diff --git a/docs/source/operators/telemetry.rst b/docs/source/operators/telemetry.rst new file mode 100644 index 0000000000..2c94e99a7c --- /dev/null +++ b/docs/source/operators/telemetry.rst @@ -0,0 +1,61 @@ +Telemetry and Eventlogging +========================== + +Jupyter Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +Emitting Server Events +---------------------- + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. + +.. _below: + + +Server Event schemas +-------------------- + +.. toctree:: + :maxdepth: 2 + + events/index diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index f7f0cab4ba..70852ea40f 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -897,7 +897,7 @@ FileContentsManager.root_dir : Unicode No description -NotebookNotary.algorithm : 'md5'|'sha3_384'|'sha3_512'|'sha256'|'sha1'|'blake2s'|'sha3_256'|'sha3_224'|'sha384'|'sha512'|'blake2b'|'sha224' +NotebookNotary.algorithm : 'sha1'|'sha3_224'|'blake2s'|'sha384'|'sha224'|'sha3_256'|'sha3_384'|'sha3_512'|'sha512'|'sha256'|'md5'|'blake2b' Default: ``'sha256'`` The hashing algorithm used to sign notebooks. diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 8fc6e89479..ec44e13b75 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -448,7 +448,7 @@ def eventlogging_schema_fqn(name): Matches convention for this particular repo """ - return 'eventlogging.jupyter.org/notebook/{}'.format(name) + return 'eventlogging.jupyter.org/jupyter_server/{}'.format(name) def get_schema_files(): diff --git a/setup.py b/setup.py index 3ccc6b5fcb..bfedef2528 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,7 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", - # Install teh + # Install the working branch of telemetry. 'git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { From ef8573d82d407ddd56f3ebe7ab1d37729e685117 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:16:31 -0700 Subject: [PATCH 18/19] fix jupyter_telemetry dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bfedef2528..9f3932fac7 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", # Install the working branch of telemetry. - 'git+https://github.com/Zsailer/telemetry.git@personal-data' + 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@master' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', From ea9e352d2906ace72c32158b3abb8976c5637a54 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:18:22 -0700 Subject: [PATCH 19/19] point telemetry at correct dev branch --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9f3932fac7..6123e8f64f 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", # Install the working branch of telemetry. - 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@master' + 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters',