Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow filtering datasets by metadata #942

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "langsmith",
"version": "0.1.44",
"version": "0.1.45",
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
"packageManager": "yarn@1.22.19",
"files": [
Expand Down
8 changes: 8 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1265,7 +1265,7 @@
treeFilter?: string;
isRoot?: boolean;
dataSourceType?: string;
}): Promise<any> {

Check warning on line 1268 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
let projectIds_ = projectIds || [];
if (projectNames) {
projectIds_ = [
Expand Down Expand Up @@ -1553,7 +1553,7 @@
`Failed to list shared examples: ${response.status} ${response.statusText}`
);
}
return result.map((example: any) => ({

Check warning on line 1556 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
...example,
_hostUrl: this.getHostUrl(),
}));
Expand Down Expand Up @@ -1910,16 +1910,19 @@
dataType,
inputsSchema,
outputsSchema,
metadata,
}: {
description?: string;
dataType?: DataType;
inputsSchema?: KVMap;
outputsSchema?: KVMap;
metadata?: RecordStringAny;
} = {}
): Promise<Dataset> {
const body: KVMap = {
name,
description,
extra: metadata ? { metadata } : undefined,
};
if (dataType) {
body.data_type = dataType;
Expand Down Expand Up @@ -2065,12 +2068,14 @@
datasetIds,
datasetName,
datasetNameContains,
metadata,
}: {
limit?: number;
offset?: number;
datasetIds?: string[];
datasetName?: string;
datasetNameContains?: string;
metadata?: RecordStringAny;
} = {}): AsyncIterable<Dataset> {
const path = "/datasets";
const params = new URLSearchParams({
Expand All @@ -2088,6 +2093,9 @@
if (datasetNameContains !== undefined) {
params.append("name_contains", datasetNameContains);
}
if (metadata !== undefined) {
params.append("metadata", JSON.stringify(metadata));
}
for await (const datasets of this._getPaginated<Dataset>(path, params)) {
yield* datasets;
}
Expand Down Expand Up @@ -2646,7 +2654,7 @@
}

const feedbackResult = await evaluator.evaluateRun(run_, referenceExample);
const [_, feedbacks] = await this._logEvaluationFeedback(

Check warning on line 2657 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
feedbackResult,
run_,
sourceInfo
Expand Down Expand Up @@ -2976,7 +2984,7 @@
async _logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 2987 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<[results: EvaluationResult[], feedbacks: Feedback[]]> {
const evalResults: Array<EvaluationResult> =
this._selectEvalResults(evaluatorResponse);
Expand Down Expand Up @@ -3015,7 +3023,7 @@
public async logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3026 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<EvaluationResult[]> {
const [results] = await this._logEvaluationFeedback(
evaluatorResponse,
Expand Down Expand Up @@ -3081,7 +3089,7 @@
promptIdentifier: string,
like: boolean
): Promise<LikePromptResponse> {
const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3092 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
const response = await this.caller.call(
fetch,
`${this.apiUrl}/likes/${owner}/${promptName}`,
Expand Down Expand Up @@ -3186,7 +3194,7 @@
}

public async getPrompt(promptIdentifier: string): Promise<Prompt | null> {
const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3197 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
const response = await this.caller.call(
fetch,
`${this.apiUrl}/repos/${owner}/${promptName}`,
Expand Down Expand Up @@ -3230,7 +3238,7 @@
);
}

const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3241 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
if (!(await this._currentTenantIsOwner(owner))) {
throw await this._ownerConflictError("create a prompt", owner);
}
Expand Down Expand Up @@ -3259,7 +3267,7 @@

public async createCommit(
promptIdentifier: string,
object: any,

Check warning on line 3270 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
options?: {
parentCommitHash?: string;
}
Expand All @@ -3268,7 +3276,7 @@
throw new Error("Prompt does not exist, you must create it first.");
}

const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3279 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
const resolvedParentCommitHash =
options?.parentCommitHash === "latest" || !options?.parentCommitHash
? await this._getLatestCommitHash(`${owner}/${promptName}`)
Expand Down
2 changes: 1 addition & 1 deletion js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ export type {
export { RunTree, type RunTreeConfig } from "./run_trees.js";

// Update using yarn bump-version
export const __version__ = "0.1.44";
export const __version__ = "0.1.45";
7 changes: 7 additions & 0 deletions js/src/tests/client.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ test.concurrent(
});
const dataset = await langchainClient.createDataset(datasetName, {
dataType: "llm",
metadata: { key: "valuefoo" },
});
await langchainClient.createExample(
{ input: "hello world" },
Expand All @@ -193,6 +194,12 @@ test.concurrent(
);
const loadedDataset = await langchainClient.readDataset({ datasetName });
expect(loadedDataset.data_type).toEqual("llm");

const datasetsByMetadata = await toArray(
langchainClient.listDatasets({ metadata: { key: "valuefoo" } })
);
expect(datasetsByMetadata.length).toEqual(1);
expect(datasetsByMetadata.map((d) => d.id)).toContain(dataset.id);
await langchainClient.deleteDataset({ datasetName });
},
180_000
Expand Down
7 changes: 7 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2504,6 +2504,7 @@ def create_dataset(
data_type: ls_schemas.DataType = ls_schemas.DataType.kv,
inputs_schema: Optional[Dict[str, Any]] = None,
outputs_schema: Optional[Dict[str, Any]] = None,
metadata: Optional[dict] = None,
) -> ls_schemas.Dataset:
"""Create a dataset in the LangSmith API.

Expand All @@ -2515,6 +2516,8 @@ def create_dataset(
The description of the dataset.
data_type : DataType or None, default=DataType.kv
The data type of the dataset.
metadata: dict or None, default=None
Additional metadata to associate with the dataset.

Returns:
-------
Expand All @@ -2525,6 +2528,7 @@ def create_dataset(
"name": dataset_name,
"data_type": data_type.value,
"created_at": datetime.datetime.now().isoformat(),
"extra": {"metadata": metadata} if metadata else None,
}
if description is not None:
dataset["description"] = description
Expand Down Expand Up @@ -2737,6 +2741,7 @@ def list_datasets(
data_type: Optional[str] = None,
dataset_name: Optional[str] = None,
dataset_name_contains: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
limit: Optional[int] = None,
) -> Iterator[ls_schemas.Dataset]:
"""List the datasets on the LangSmith API.
Expand All @@ -2757,6 +2762,8 @@ def list_datasets(
params["name"] = dataset_name
if dataset_name_contains is not None:
params["name_contains"] = dataset_name_contains
if metadata is not None:
params["metadata"] = json.dumps(metadata)
for i, dataset in enumerate(
self._get_paginated_list("/datasets", params=params)
):
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langsmith"
version = "0.1.102"
version = "0.1.103"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
authors = ["LangChain <support@langchain.dev>"]
license = "MIT"
Expand Down
11 changes: 10 additions & 1 deletion python/tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,9 @@ def test_list_datasets(langchain_client: Client) -> None:
ds1n = "__test_list_datasets1" + uuid4().hex[:4]
ds2n = "__test_list_datasets2" + uuid4().hex[:4]
try:
dataset1 = langchain_client.create_dataset(ds1n, data_type=DataType.llm)
dataset1 = langchain_client.create_dataset(
ds1n, data_type=DataType.llm, metadata={"foo": "barqux"}
)
dataset2 = langchain_client.create_dataset(ds2n, data_type=DataType.kv)
assert dataset1.url is not None
assert dataset2.url is not None
Expand All @@ -484,6 +486,13 @@ def test_list_datasets(langchain_client: Client) -> None:
)
)
assert len(datasets) == 1
# Sub-filter on metadata
datasets = list(
langchain_client.list_datasets(
dataset_ids=[dataset1.id, dataset2.id], metadata={"foo": "barqux"}
)
)
assert len(datasets) == 1
finally:
# Delete datasets
for name in [ds1n, ds2n]:
Expand Down
Loading