Skip to content

Commit

Permalink
fix: update pandas.read_json syntax (#9)
Browse files Browse the repository at this point in the history
Pandas has deprecated read_json(string), so we now use
add a get_results() wrapper which calls read_json()
appropriately. README updated with the new syntax.
  • Loading branch information
alubbock authored Aug 17, 2024
1 parent 5f98938 commit 07dafa4
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 14 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ into a `io.StringIO()` buffer, which can be read as follows

```python
import pandas as pd
results = pd.read_json(basic_bench.outfile.getvalue(), lines=True)
results = pd.read_json(basic_bench.outfile, lines=True)
```

The above example captures the fields `start_time`, `finish_time`,
Expand Down Expand Up @@ -169,6 +169,10 @@ The simplest way to examine results in detail is to load them into a
[pandas](https://pandas.pydata.org/) dataframe:

```python
# Read results directly from active benchmark suite
benchmark.get_results()

# Or, equivalently when using a file, read it using pandas directly
import pandas
results = pandas.read_json('/home/user/my-benchmarks', lines=True)
```
Expand Down Expand Up @@ -219,7 +223,7 @@ def my_function():
my_function()

# Read the results into a Pandas DataFrame
results = pandas.read_json(lpbench.outfile.getvalue(), lines=True)
results = lpbench.get_results()

# Get the line profiler report as an object
lp = MBLineProfiler.decode_line_profile(results['line_profiler'][0])
Expand Down
17 changes: 15 additions & 2 deletions microbench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
import numpy
except ImportError:
numpy = None
try:
import pandas
except ImportError:
pandas = None
from .diff import envdiff


Expand Down Expand Up @@ -176,6 +180,15 @@ def output_result(self, bm_data):
# Assume file-like object
self.outfile.write(bm_str)

def get_results(self):
if not pandas:
raise ImportError('This fuctionality requires the "pandas" package')

if hasattr(self.outfile, 'seek'):
self.outfile.seek(0)

return pandas.read_json(self.outfile, lines=True)

def __call__(self, func):
def inner(*args, **kwargs):
bm_data = dict()
Expand Down Expand Up @@ -353,13 +366,13 @@ class MBLineProfiler(object):
in production.
"""
def capturepost_line_profile(self, bm_data):
bm_data['line_profiler'] = base64.encodebytes(
bm_data['line_profiler'] = base64.b64encode(
pickle.dumps(self._line_profiler.get_stats())
).decode('utf8')

@staticmethod
def decode_line_profile(line_profile_pickled):
return pickle.loads(base64.decodebytes(line_profile_pickled.encode()))
return pickle.loads(base64.b64decode(line_profile_pickled))

@classmethod
def print_line_profile(self, line_profile_pickled, **kwargs):
Expand Down
14 changes: 7 additions & 7 deletions microbench/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def my_function():
for _ in range(3):
assert my_function() == 499999500000

results = pandas.read_json(benchmark.outfile.getvalue(), lines=True)
results = benchmark.get_results()
assert (results['function_name'] == 'my_function').all()
assert results['package_versions'][0]['pandas'] == pandas.__version__
runtimes = results['finish_time'] - results['start_time']
Expand All @@ -50,7 +50,7 @@ def my_function():
# call the function
my_function()

results = pandas.read_json(benchmark.outfile.getvalue(), lines=True)
results = benchmark.get_results()
assert (results['function_name'] == 'my_function').all()
runtimes = results['finish_time'] - results['start_time']
assert (runtimes >= datetime.timedelta(0)).all()
Expand All @@ -68,7 +68,7 @@ def noop():

noop()

results = pandas.read_json(globals_bench.outfile.getvalue(), lines=True)
results = globals_bench.get_results()

# We should've captured microbench and pandas versions from top level
# imports in this file
Expand All @@ -89,7 +89,7 @@ def noop():

noop()

results = pandas.read_json(pkg_bench.outfile.getvalue(), lines=True)
results = pkg_bench.get_results()
assert pandas.__version__ == results['package_versions'][0]['pandas']


Expand All @@ -111,7 +111,7 @@ def noop():
assert not telem_bench._telemetry_thread.is_alive()

# Check some telemetry was captured
results = pandas.read_json(telem_bench.outfile.getvalue(), lines=True)
results = telem_bench.get_results()
assert len(results['telemetry']) > 0


Expand All @@ -135,7 +135,7 @@ def dummy(arg1, arg2):
assert all(issubclass(w_.category, JSONEncodeWarning) for w_ in w)


results = pandas.read_json(bench.outfile.getvalue(), lines=True)
results =bench.get_results()
assert results['args'][0] == [_UNENCODABLE_PLACEHOLDER_VALUE]
assert results['kwargs'][0] == {'arg2': _UNENCODABLE_PLACEHOLDER_VALUE}
assert results['return_value'][0] == _UNENCODABLE_PLACEHOLDER_VALUE
Expand Down Expand Up @@ -173,5 +173,5 @@ def dummy():

dummy()

results = pandas.read_json(bench.outfile.getvalue(), lines=True)
results = bench.get_results()
assert results['return_value'][0] == str(obj)
3 changes: 2 additions & 1 deletion microbench/tests/test_line_profiler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from microbench import MicroBench, MBLineProfiler
import pandas
import io


def test_line_profiler():
Expand All @@ -20,7 +21,7 @@ def my_function():
for _ in range(3):
assert my_function() == 499999500000

results = pandas.read_json(lpbench.outfile.getvalue(), lines=True)
results = lpbench.get_results()
lp = MBLineProfiler.decode_line_profile(results['line_profiler'][0])
assert lp.__class__.__name__ == 'LineStats'
MBLineProfiler.print_line_profile(results['line_profiler'][0])
Expand Down
2 changes: 1 addition & 1 deletion microbench/tests/test_nvidia.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ def test():

test()

results = pandas.read_json(bench.outfile.getvalue(), lines=True)
results = bench.get_results()
assert 'nvidia_gpu_name' in results.columns
assert 'nvidia_memory.total' in results.columns
2 changes: 1 addition & 1 deletion microbench/tests/test_psutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ def test_func():

test_func()

results = pandas.read_json(mybench.outfile.getvalue(), lines=True)
results = mybench.get_results()
assert results['cpu_cores_logical'][0] >= 1
assert results['ram_total'][0] > 0

0 comments on commit 07dafa4

Please sign in to comment.