Skip to content

Commit

Permalink
Add support for fetching samples from API (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
drew authored Oct 12, 2020
1 parent b72d005 commit 0d0566c
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 1 deletion.
59 changes: 58 additions & 1 deletion src/gretel_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
from functools import wraps
import time
from typing import Iterator, Callable, Optional, Tuple, Union, List
from typing import Dict, Iterator, Callable, Optional, Tuple, Union, List
import threading
from queue import Queue
from getpass import getpass
Expand All @@ -17,6 +17,16 @@
import requests
import tenacity

try:
import pandas as pd
from pandas import DataFrame as _DataFrameT
except ImportError: # pragma: no cover
pd = None

class _DataFrameT:
... # noqa


from gretel_client.readers import Reader
from gretel_client.samplers import ConstantSampler, Sampler, get_default_sampler
from gretel_client.projects import Project
Expand All @@ -41,6 +51,7 @@
META = "metadata"
DATA = "data"
ID = "id"
SAMPLES = "samples"
INGEST_TIME = "ingest_time"
PROMPT = "prompt"
PROMPT_ALWAYS = "prompt_always"
Expand Down Expand Up @@ -656,6 +667,52 @@ def install_packages(
version=version,
)

def list_samples(self, include_details: bool = False) -> List[Union[str, dict]]:
"""Gretel provides a number of different sample datasets that can be used to
populate projects. This method returns a list of available samples.
Args:
include_details: If ``True``, the function will return additional sample
details. Defult ``False``.
Returns:
A list of available sample datasets.
"""
resp = self._get("records/samples")
if include_details:
return [
{"name": name, "description": desc}
for name, desc in resp[DATA][SAMPLES].items()
]
return list(resp[DATA][SAMPLES].keys())

def get_sample(
self, sample_name: str, as_df=False
) -> Union[List[Dict], _DataFrameT]:
"""Returns a sample dataset by key. Use ``list_samples`` to get a list of
available samples.
Args:
sample_name: The name of the sample to return.
as_df: If ``True``, will return the sample dataset as a ``DataFrame``. If the
sample record contains nested fields, those fields will be flattened
before converting to a ``DataFrame``. Defaults to ``False``.
Returns:
A list or DataFrame containing the sample dataset.
Raises:
RuntimeError if a ``DataFrame`` is requested without pandas being installed.
"""
resp = self._get("records/samples", params={"key": sample_name})
samples = resp[DATA][SAMPLES]
if as_df and not pd:
raise RuntimeError("pandas must be installed when as_df is True")
if as_df and pd:
flattened = pd.json_normalize(samples)
return pd.DataFrame(flattened)
return samples


def _get_or_prompt(
input_key: str, prompt_message: str, env_fallback: str
Expand Down
25 changes: 25 additions & 0 deletions tests/src/gretel_client/integration/test_client.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import pytest

import pandas as pd

from gretel_client.client import (
get_cloud_client,
Client,
)
from gretel_client.errors import NotFound

API_KEY = os.getenv("GRETEL_TEST_API_KEY")

Expand All @@ -24,3 +27,25 @@ def test_detect_entities(client: Client):

assert len(detected_entities) == 1
assert len(detected_entities[0]["metadata"]["fields"]["email"]["ner"]["labels"]) == 3


def test_list_samples(client: Client):
samples = client.list_samples()
assert len(samples) > 0
assert all([type(s) == str for s in samples])

samples = client.list_samples(include_details=True)
assert all([type(s) == dict for s in samples])


def test_get_samples(client: Client):
sample = client.get_sample("safecast")
assert len(sample) > 0

sample = client.get_sample("safecast", as_df=True)
assert isinstance(sample, pd.DataFrame)
assert len(sample) > 0

def test_get_sample_not_found(client: Client):
with pytest.raises(NotFound):
client.get_sample("this_sample_not_found")

0 comments on commit 0d0566c

Please sign in to comment.