Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the auto instrumentation command #567

Merged
merged 21 commits into from
May 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 67 additions & 14 deletions docs/examples/auto-instrumentation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,9 @@ $ source auto_instrumentation/bin/activate
# Installation

```sh
$ pip install opentelemetry-api
$ pip install opentelemetry-sdk
$ pip install opentelemetry-auto-instrumentation
$ pip install ext/opentelemetry-ext-flask
$ pip install flask
$ pip install opentelemetry-ext-flask
$ pip install requests
```

Expand All @@ -71,42 +69,97 @@ This is done in 2 separate consoles, one to run each of the scripts that make up

```sh
$ source auto_instrumentation/bin/activate
$ python opentelemetry-python/opentelemetry-auto-instrumentation/example/server_instrumented.py
$ python opentelemetry-python/docs/examples/auto-instrumentation/server_instrumented.py
```

```sh
$ source auto_instrumentation/bin/activate
$ python opentelemetry-python/opentelemetry-auto-instrumentation/example/client.py testing
$ python opentelemetry-python/docs/examples/auto-instrumentation/client.py testing
```

The execution of `server_instrumented.py` should return an output similar to:

```sh
Hello, testing!
Span(name="serv_request", context=SpanContext(trace_id=0x9c0e0ce8f7b7dbb51d1d6e744a4dad49, span_id=0xd1ba3ec4c76a0d7f, trace_state={}), kind=SpanKind.INTERNAL, parent=None, start_time=2020-03-19T00:06:31.275719Z, end_time=2020-03-19T00:06:31.275920Z)
127.0.0.1 - - [18/Mar/2020 18:06:31] "GET /serv_request?helloStr=Hello%2C+testing%21 HTTP/1.1" 200 -
{
"name": "server_request",
"context": {
"trace_id": "0xfa002aad260b5f7110db674a9ddfcd23",
"span_id": "0x8b8bbaf3ca9c5131",
"trace_state": "{}"
},
"kind": "SpanKind.SERVER",
"parent_id": null,
"start_time": "2020-04-30T17:28:57.886397Z",
"end_time": "2020-04-30T17:28:57.886490Z",
"status": {
"canonical_code": "OK"
},
"attributes": {
"component": "http",
"http.method": "GET",
"http.server_name": "127.0.0.1",
"http.scheme": "http",
"host.port": 8082,
"http.host": "localhost:8082",
"http.target": "/server_request?param=testing",
"net.peer.ip": "127.0.0.1",
"net.peer.port": 52872,
"http.flavor": "1.1"
},
"events": [],
"links": []
}
```

## Execution of an automatically instrumented server

Now, kill the execution of `server_instrumented.py` with `ctrl + c` and run this instead:

```sh
$ opentelemetry-auto-instrumentation opentelemetry-python/opentelemetry-auto-instrumentation/example/server_uninstrumented.py
$ opentelemetry-auto-instrumentation python docs/examples/auto-instrumentation/server_uninstrumented.py
```

In the console where you previously executed `client.py`, run again this again:

```sh
$ python opentelemetry-python/opentelemetry-auto-instrumentation/example/client.py testing
$ python opentelemetry-python/docs/examples/auto-instrumentation/client.py testing
```

The execution of `server_uninstrumented.py` should return an output similar to:

```sh
Hello, testing!
Span(name="serv_request", context=SpanContext(trace_id=0xf26b28b5243e48f5f96bfc753f95f3f0, span_id=0xbeb179a095d087ed, trace_state={}), kind=SpanKind.SERVER, parent=<opentelemetry.trace.DefaultSpan object at 0x7f1a20a54908>, start_time=2020-03-19T00:24:18.828561Z, end_time=2020-03-19T00:24:18.845127Z)
127.0.0.1 - - [18/Mar/2020 18:24:18] "GET /serv_request?helloStr=Hello%2C+testing%21 HTTP/1.1" 200 -
{
"name": "server_request",
"context": {
"trace_id": "0x9f528e0b76189f539d9c21b1a7a2fc24",
"span_id": "0xd79760685cd4c269",
"trace_state": "{}"
},
"kind": "SpanKind.SERVER",
"parent_id": "0xb4fb7eee22ef78e4",
"start_time": "2020-04-30T17:10:02.400604Z",
"end_time": "2020-04-30T17:10:02.401858Z",
"status": {
"canonical_code": "OK"
},
"attributes": {
"component": "http",
"http.method": "GET",
"http.server_name": "127.0.0.1",
"http.scheme": "http",
"host.port": 8082,
"http.host": "localhost:8082",
"http.target": "/server_request?param=testing",
"net.peer.ip": "127.0.0.1",
"net.peer.port": 48240,
"http.flavor": "1.1",
"http.route": "/server_request",
"http.status_text": "OK",
"http.status_code": 200
},
"events": [],
"links": []
}
```

As you can see, both outputs are equivalentsince the automatic instrumentation does what the manual instrumentation does too.
Both outputs are equivalent since the automatic instrumentation does what the manual instrumentation does too.
3 changes: 3 additions & 0 deletions docs/examples/auto-instrumentation/server_instrumented.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from flask import Flask, request

from opentelemetry import propagators, trace
from opentelemetry.ext.wsgi import collect_request_attributes
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
ConsoleSpanExporter,
Expand All @@ -38,6 +39,8 @@ def server_request():
parent=propagators.extract(
lambda dict_, key: dict_.get(key, []), request.headers
)["current-span"],
kind=trace.SpanKind.SERVER,
attributes=collect_request_attributes(request.environ),
):
print(request.args.get("param"))
return "served"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@

::

opentelemetry-auto-instrumentation program.py
opentelemetry-auto-instrumentation python program.py

The code in ``program.py`` needs to use one of the packages for which there is
an OpenTelemetry extension. For a list of the available extensions please check
`here <https://opentelemetry-python.readthedocs.io/>`_.
an OpenTelemetry integration. For a list of the available integrations please
check `here <https://opentelemetry-python.readthedocs.io/>`_.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,40 @@
# limitations under the License.

from logging import getLogger
from runpy import run_path
from os import environ, execl, getcwd
from os.path import abspath, dirname, pathsep
from shutil import which
from sys import argv

from pkg_resources import iter_entry_points

logger = getLogger(__file__)


def run() -> None:

for entry_point in iter_entry_points("opentelemetry_instrumentor"):
try:
entry_point.load()().instrument() # type: ignore
logger.debug("Instrumented %s", entry_point.name)
python_path = environ.get("PYTHONPATH")

if not python_path:
python_path = []

ocelotl marked this conversation as resolved.
Show resolved Hide resolved
else:
python_path = python_path.split(pathsep)

cwd_path = getcwd()

# This is being added to support applications that are being run from their
# own executable, like Django.
# FIXME investigate if there is another way to achieve this
if cwd_path not in python_path:
python_path.insert(0, cwd_path)

filedir_path = dirname(abspath(__file__))

python_path = [path for path in python_path if path != filedir_path]
ocelotl marked this conversation as resolved.
Show resolved Hide resolved

python_path.insert(0, filedir_path)

environ["PYTHONPATH"] = pathsep.join(python_path)

except Exception: # pylint: disable=broad-except
logger.exception("Instrumenting of %s failed", entry_point.name)
executable = which(argv[1])

run_path(argv[1], run_name="__main__") # type: ignore
execl(executable, executable, *argv[2:])
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from logging import getLogger

from pkg_resources import iter_entry_points

logger = getLogger(__file__)


for entry_point in iter_entry_points("opentelemetry_instrumentor"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand this, the goal is to load all available instrumentations if the auto_instrumentation package is on the path, which should only be true if the user called auto_instrumentation/auto_instrumentation.py:run?

Why did you decide to use sitecustomize to do this instead of leaving this in run? Loading the instrumentations as an import effect seems dangerous, but I don't know if this is conventional for sitecustomize.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a little research here. Looks like this is how ddtrace does it too: https://github.com/DataDog/dd-trace-py/blob/master/ddtrace/bootstrap/sitecustomize.py

The main issue that this PR is fixing is the loading + activating of the instrumentations before anything in the downstream script is invoked. In commands like flask_run, you have no control over the actual code, so you can't programatically insert the instrumentation activation into the script itself.

in this scenario the "run" entry point is never invoked by the script that is being execl'd, so auto_instrumentation.py:run will never actually execute in the new python process.

There's very few places that you can run arbitary python code before the script itself starts to run. One of those that I'm aware of is sitecustomize. I'm having trouble thinking of any other place to inject startup code that is guaranteed to run before the script that you're trying to invoke.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, exactly. As @toumorokoshi says, this has been changed to support the frameworks or libraries that use launcher executables (thus requiring execl) and are executed in a new Python process.

Copy link
Contributor

@majorgreys majorgreys Apr 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is similar to how ddtrace-run works to add the directory where thesitecustomize.py @toumorokoshi just linked is located to the PYTHONPATH before calling execl:

https://github.com/DataDog/dd-trace-py/blob/master/ddtrace/commands/ddtrace_run.py

I tried this PR out with the example auto-instrumentation but using uwsgi to load the app and it worked as expected:

$ opentelemetry-auto-instrumentation uwsgi --http :8082 -w server_uninstrumented:app
*** Starting uWSGI 2.0.18 (64bit) on [Thu Apr 30 13:59:02 2020] ***
....
spawned uWSGI worker 1 (and the only) (pid: 54166, cores: 1)
testing
{
    "name": "server_request",
    "context": {
        "trace_id": "0x3416621a548ee3b57495a4ecc3bc9502",
        "span_id": "0xfc0221256e37be82",
        "trace_state": "{}"
    },
    "kind": "SpanKind.SERVER",
    "parent_id": "0xdee7b970d0693b48",
    "start_time": "2020-04-30T17:59:32.630551Z",
    "end_time": "2020-04-30T17:59:32.632212Z",
    "status": {
        "canonical_code": "OK"
    },
    "attributes": {
        "component": "http",
        "http.method": "GET",
        "http.server_name": "tbutt.local",
        "http.scheme": "http",
        "host.port": 8082,
        "http.host": "localhost:8082",
        "http.target": "/server_request?param=testing",
        "net.peer.ip": "127.0.0.1",
        "net.peer.port": "40658",
        "http.flavor": "1.1",
        "http.route": "/server_request",
        "http.status_text": "OK",
        "http.status_code": 200
    },
    "events": [],
    "links": []
}

try:
entry_point.load()().instrument() # type: ignore
logger.debug("Instrumented %s", entry_point.name)
ocelotl marked this conversation as resolved.
Show resolved Hide resolved

except Exception: # pylint: disable=broad-except
logger.exception("Instrumenting of %s failed", entry_point.name)
106 changes: 106 additions & 0 deletions opentelemetry-auto-instrumentation/tests/test_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# type: ignore

from os import environ, getcwd
from os.path import abspath, dirname, pathsep
from unittest import TestCase
from unittest.mock import patch

from opentelemetry.auto_instrumentation import auto_instrumentation


class TestRun(TestCase):
auto_instrumentation_path = dirname(abspath(auto_instrumentation.__file__))

@classmethod
def setUpClass(cls):
cls.argv_patcher = patch(
"opentelemetry.auto_instrumentation.auto_instrumentation.argv"
)
cls.execl_patcher = patch(
"opentelemetry.auto_instrumentation.auto_instrumentation.execl"
)
cls.which_patcher = patch(
"opentelemetry.auto_instrumentation.auto_instrumentation.which"
)

cls.argv_patcher.start()
cls.execl_patcher.start()
cls.which_patcher.start()

@classmethod
def tearDownClass(cls):
cls.argv_patcher.stop()
cls.execl_patcher.stop()
cls.which_patcher.stop()

@patch.dict("os.environ", {"PYTHONPATH": ""})
def test_empty(self):
auto_instrumentation.run()
self.assertEqual(
environ["PYTHONPATH"],
pathsep.join([self.auto_instrumentation_path, getcwd()]),
)

@patch.dict("os.environ", {"PYTHONPATH": "abc"})
def test_non_empty(self):
auto_instrumentation.run()
self.assertEqual(
environ["PYTHONPATH"],
pathsep.join([self.auto_instrumentation_path, getcwd(), "abc"]),
)

@patch.dict(
"os.environ",
{"PYTHONPATH": pathsep.join(["abc", auto_instrumentation_path])},
)
def test_after_path(self):
auto_instrumentation.run()
self.assertEqual(
environ["PYTHONPATH"],
pathsep.join([self.auto_instrumentation_path, getcwd(), "abc"]),
)

@patch.dict(
"os.environ",
{
"PYTHONPATH": pathsep.join(
[auto_instrumentation_path, "abc", auto_instrumentation_path]
)
},
)
def test_single_path(self):
auto_instrumentation.run()
self.assertEqual(
environ["PYTHONPATH"],
pathsep.join([self.auto_instrumentation_path, getcwd(), "abc"]),
)


class TestExecl(TestCase):
@patch(
"opentelemetry.auto_instrumentation.auto_instrumentation.argv",
new=[1, 2, 3],
)
@patch("opentelemetry.auto_instrumentation.auto_instrumentation.which")
@patch("opentelemetry.auto_instrumentation.auto_instrumentation.execl")
def test_execl(
self, mock_execl, mock_which
): # pylint: disable=no-self-use
mock_which.configure_mock(**{"return_value": "python"})

auto_instrumentation.run()

mock_execl.assert_called_with("python", "python", 3)