infra: update mypy 1.10, ruff 0.5 (#23721)

```python """python scripts/update_mypy_ruff.py""" import glob import tomllib from pathlib import Path import toml import subprocess import re ROOT_DIR = Path(__file__).parents[1] def main(): for path in glob.glob(str(ROOT_DIR / "libs/**/pyproject.toml"), recursive=True): print(path) with open(path, "rb") as f: pyproject = tomllib.load(f) try: pyproject["tool"]["poetry"]["group"]["typing"]["dependencies"]["mypy"] = ( "^1.10" ) pyproject["tool"]["poetry"]["group"]["lint"]["dependencies"]["ruff"] = ( "^0.5" ) except KeyError: continue with open(path, "w") as f: toml.dump(pyproject, f) cwd = "/".join(path.split("/")[:-1]) completed = subprocess.run( "poetry lock --no-update; poetry install --with typing; poetry run mypy . --no-color", cwd=cwd, shell=True, capture_output=True, text=True, ) logs = completed.stdout.split("\n") to_ignore = {} for l in logs: if re.match("^(.*)\:(\d+)\: error:.*\[(.*)\]", l): path, line_no, error_type = re.match( "^(.*)\:(\d+)\: error:.*\[(.*)\]", l ).groups() if (path, line_no) in to_ignore: to_ignore[(path, line_no)].append(error_type) else: to_ignore[(path, line_no)] = [error_type] print(len(to_ignore)) for (error_path, line_no), error_types in to_ignore.items(): all_errors = ", ".join(error_types) full_path = f"{cwd}/{error_path}" try: with open(full_path, "r") as f: file_lines = f.readlines() except FileNotFoundError: continue file_lines[int(line_no) - 1] = ( file_lines[int(line_no) - 1][:-1] + f" # type: ignore[{all_errors}]\n" ) with open(full_path, "w") as f: f.write("".join(file_lines)) subprocess.run( "poetry run ruff format .; poetry run ruff --select I --fix .", cwd=cwd, shell=True, capture_output=True, text=True, ) if __name__ == "__main__": main() ```
langchain-ai · Jul 3, 2024 · a0c2281 · a0c2281 · shirly59 · Aug 14, 2024
1 parent 6cd5682
commit a0c2281
Show file tree

Hide file tree

Showing 915 changed files with 4,750 additions and 4,038 deletions.
diff --git a/.github/actions/people/app/main.py b/.github/actions/people/app/main.py
@@ -350,11 +350,7 @@ def get_graphql_pr_edges(*, settings: Settings, after: Union[str, None] = None):
         print("Querying PRs...")
     else:
         print(f"Querying PRs with cursor {after}...")
-    data = get_graphql_response(
-        settings=settings,
-        query=prs_query,
-        after=after
-    )
+    data = get_graphql_response(settings=settings, query=prs_query, after=after)
     graphql_response = PRsResponse.model_validate(data)
     return graphql_response.data.repository.pullRequests.edges
 
@@ -484,10 +480,16 @@ def get_contributors(settings: Settings):
             lines_changed = pr.additions + pr.deletions
             score = _logistic(files_changed, 20) + _logistic(lines_changed, 100)
             contributor_scores[pr.author.login] += score
-            three_months_ago = (datetime.now(timezone.utc) - timedelta(days=3*30))
+            three_months_ago = datetime.now(timezone.utc) - timedelta(days=3 * 30)
             if pr.createdAt > three_months_ago:
                 recent_contributor_scores[pr.author.login] += score
-    return contributors, contributor_scores, recent_contributor_scores, reviewers, authors
+    return (
+        contributors,
+        contributor_scores,
+        recent_contributor_scores,
+        reviewers,
+        authors,
+    )
 
 
 def get_top_users(
@@ -524,9 +526,13 @@ def get_top_users(
     # question_commentors, question_last_month_commentors, question_authors = get_experts(
     #     settings=settings
     # )
-    contributors, contributor_scores, recent_contributor_scores, reviewers, pr_authors = get_contributors(
-        settings=settings
-    )
+    (
+        contributors,
+        contributor_scores,
+        recent_contributor_scores,
+        reviewers,
+        pr_authors,
+    ) = get_contributors(settings=settings)
     # authors = {**question_authors, **pr_authors}
     authors = {**pr_authors}
     maintainers_logins = {
@@ -559,7 +565,7 @@ def get_top_users(
         maintainers.append(
             {
                 "login": login,
-                "count": contributors[login], #+ question_commentors[login],
+                "count": contributors[login],  # + question_commentors[login],
                 "avatarUrl": user.avatarUrl,
                 "twitterUsername": user.twitterUsername,
                 "url": user.url,
@@ -615,9 +621,7 @@ def get_top_users(
     new_people_content = yaml.dump(
         people, sort_keys=False, width=200, allow_unicode=True
     )
-    if (
-        people_old_content == new_people_content
-    ):
+    if people_old_content == new_people_content:
         logging.info("The LangChain People data hasn't changed, finishing.")
         sys.exit(0)
     people_path.write_text(new_people_content, encoding="utf-8")
@@ -630,9 +634,7 @@ def get_top_users(
     logging.info(f"Creating a new branch {branch_name}")
     subprocess.run(["git", "checkout", "-B", branch_name], check=True)
     logging.info("Adding updated file")
-    subprocess.run(
-        ["git", "add", str(people_path)], check=True
-    )
+    subprocess.run(["git", "add", str(people_path)], check=True)
     logging.info("Committing updated file")
     message = "👥 Update LangChain people data"
     result = subprocess.run(["git", "commit", "-m", message], check=True)
@@ -641,4 +643,4 @@ def get_top_users(
     logging.info("Creating PR")
     pr = repo.create_pull(title=message, body=message, base="master", head=branch_name)
     logging.info(f"Created PR: {pr.number}")
-    logging.info("Finished")
+    logging.info("Finished")
diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py
@@ -1,11 +1,12 @@
+import glob
 import json
-import sys
 import os
-from typing import Dict, List, Set
-
+import re
+import sys
 import tomllib
 from collections import defaultdict
-import glob
+from typing import Dict, List, Set
+
 
 LANGCHAIN_DIRS = [
     "libs/core",
@@ -15,8 +16,13 @@
     "libs/experimental",
 ]
 
+
 def all_package_dirs() -> Set[str]:
-    return {"/".join(path.split("/")[:-1]) for path in glob.glob("./libs/**/pyproject.toml", recursive=True)}
+    return {
+        "/".join(path.split("/")[:-1]).lstrip("./")
+        for path in glob.glob("./libs/**/pyproject.toml", recursive=True)
+        if "libs/cli" not in path and "libs/standard-tests" not in path
+    }
 
 
 def dependents_graph() -> dict:
@@ -26,9 +32,9 @@ def dependents_graph() -> dict:
         if "template" in path:
             continue
         with open(path, "rb") as f:
-            pyproject = tomllib.load(f)['tool']['poetry']
+            pyproject = tomllib.load(f)["tool"]["poetry"]
         pkg_dir = "libs" + "/".join(path.split("libs")[1].split("/")[:-1])
-        for dep in pyproject['dependencies']:
+        for dep in pyproject["dependencies"]:
             if "langchain" in dep:
                 dependents[dep].add(pkg_dir)
     return dependents
@@ -122,9 +128,12 @@ def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:
 
     outputs = {
         "dirs-to-lint": add_dependents(
-            dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
+            dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],
+            dependents,
+        ),
+        "dirs-to-test": add_dependents(
+            dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
         ),
-        "dirs-to-test": add_dependents(dirs_to_run["test"] | dirs_to_run["extended-test"], dependents),
         "dirs-to-extended-test": list(dirs_to_run["extended-test"]),
         "docs-edited": "true" if docs_edited else "",
     }

diff --git a/.github/scripts/get_min_versions.py b/.github/scripts/get_min_versions.py
@@ -74,6 +74,4 @@ def get_min_version_from_toml(toml_path: str):
     # Call the function to get the minimum versions
     min_versions = get_min_version_from_toml(toml_file)
 
-    print(
-        " ".join([f"{lib}=={version}" for lib, version in min_versions.items()])
-    )
+    print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))
diff --git a/libs/community/Makefile b/libs/community/Makefile
@@ -48,14 +48,14 @@ lint lint_diff lint_package lint_tests:
 	./scripts/check_pydantic.sh .
 	./scripts/lint_imports.sh
 	./scripts/check_pickle.sh .
-	poetry run ruff .
+	poetry run ruff check .
 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I $(PYTHON_FILES)
 	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
 
 format format_diff:
 	poetry run ruff format $(PYTHON_FILES)
-	poetry run ruff --select I --fix $(PYTHON_FILES)
+	poetry run ruff check --select I --fix $(PYTHON_FILES)
 
 spell_check:
 	poetry run codespell --toml pyproject.toml

diff --git a/libs/community/langchain_community/__init__.py b/libs/community/langchain_community/__init__.py
@@ -1,4 +1,5 @@
 """Main entrypoint into package."""
+
 from importlib import metadata
 
 try:

diff --git a/libs/community/langchain_community/adapters/openai.py b/libs/community/langchain_community/adapters/openai.py
@@ -206,8 +206,7 @@ def create(
         provider: str = "ChatOpenAI",
         stream: Literal[False] = False,
         **kwargs: Any,
-    ) -> dict:
-        ...
+    ) -> dict: ...
 
     @overload
     @staticmethod
@@ -217,8 +216,7 @@ def create(
         provider: str = "ChatOpenAI",
         stream: Literal[True],
         **kwargs: Any,
-    ) -> Iterable:
-        ...
+    ) -> Iterable: ...
 
     @staticmethod
     def create(
@@ -249,8 +247,7 @@ async def acreate(
         provider: str = "ChatOpenAI",
         stream: Literal[False] = False,
         **kwargs: Any,
-    ) -> dict:
-        ...
+    ) -> dict: ...
 
     @overload
     @staticmethod
@@ -260,8 +257,7 @@ async def acreate(
         provider: str = "ChatOpenAI",
         stream: Literal[True],
         **kwargs: Any,
-    ) -> AsyncIterator:
-        ...
+    ) -> AsyncIterator: ...
 
     @staticmethod
     async def acreate(
@@ -319,8 +315,7 @@ def create(
         provider: str = "ChatOpenAI",
         stream: Literal[False] = False,
         **kwargs: Any,
-    ) -> ChatCompletions:
-        ...
+    ) -> ChatCompletions: ...
 
     @overload
     @staticmethod
@@ -330,8 +325,7 @@ def create(
         provider: str = "ChatOpenAI",
         stream: Literal[True],
         **kwargs: Any,
-    ) -> Iterable:
-        ...
+    ) -> Iterable: ...
 
     @staticmethod
     def create(
@@ -366,8 +360,7 @@ async def acreate(
         provider: str = "ChatOpenAI",
         stream: Literal[False] = False,
         **kwargs: Any,
-    ) -> ChatCompletions:
-        ...
+    ) -> ChatCompletions: ...
 
     @overload
     @staticmethod
@@ -377,8 +370,7 @@ async def acreate(
         provider: str = "ChatOpenAI",
         stream: Literal[True],
         **kwargs: Any,
-    ) -> AsyncIterator:
-        ...
+    ) -> AsyncIterator: ...
 
     @staticmethod
     async def acreate(

diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py
@@ -2189,14 +2189,14 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBVectorSearch:
                 index_name=index_name,
             )
         else:
-            self._cache_dict[
-                index_name
-            ] = AzureCosmosDBVectorSearch.from_connection_string(
-                connection_string=self.cosmosdb_connection_string,
-                namespace=namespace,
-                embedding=self.embedding,
-                index_name=index_name,
-                application_name=self.application_name,
+            self._cache_dict[index_name] = (
+                AzureCosmosDBVectorSearch.from_connection_string(
+                    connection_string=self.cosmosdb_connection_string,
+                    namespace=namespace,
+                    embedding=self.embedding,
+                    index_name=index_name,
+                    application_name=self.application_name,
+                )
             )
 
         # create index for the vectorstore

diff --git a/libs/community/langchain_community/callbacks/__init__.py b/libs/community/langchain_community/callbacks/__init__.py
@@ -6,6 +6,7 @@
 
     BaseCallbackHandler --> <name>CallbackHandler  # Example: AimCallbackHandler
 """
+
 import importlib
 from typing import TYPE_CHECKING, Any
 

diff --git a/libs/community/langchain_community/callbacks/arize_callback.py b/libs/community/langchain_community/callbacks/arize_callback.py
@@ -82,9 +82,9 @@ def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
                 "completion_tokens", 0
             )
         else:
-            self.prompt_tokens = (
-                self.total_tokens
-            ) = self.completion_tokens = 0  # assign default value
+            self.prompt_tokens = self.total_tokens = self.completion_tokens = (
+                0  # assign default value
+            )
 
         for generations in response.generations:
             for generation in generations:

diff --git a/libs/community/langchain_community/callbacks/arthur_callback.py b/libs/community/langchain_community/callbacks/arthur_callback.py
@@ -1,4 +1,5 @@
 """ArthurAI's Callback Handler."""
+
 from __future__ import annotations
 
 import os

diff --git a/libs/community/langchain_community/callbacks/promptlayer_callback.py b/libs/community/langchain_community/callbacks/promptlayer_callback.py
@@ -1,4 +1,5 @@
 """Callback handler for promptlayer."""
+
 from __future__ import annotations
 
 import datetime

diff --git a/libs/community/langchain_community/callbacks/tracers/wandb.py b/libs/community/langchain_community/callbacks/tracers/wandb.py
@@ -1,4 +1,5 @@
 """A Tracer Implementation that records activity to Weights & Biases."""
+
 from __future__ import annotations
 
 import json
@@ -234,9 +235,9 @@ def build_tree(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
 
     for child_id, parent_id in child_to_parent.items():
         parent_dict = id_to_data[parent_id]
-        parent_dict[next(iter(parent_dict))][
-            next(iter(id_to_data[child_id]))
-        ] = id_to_data[child_id][next(iter(id_to_data[child_id]))]
+        parent_dict[next(iter(parent_dict))][next(iter(id_to_data[child_id]))] = (
+            id_to_data[child_id][next(iter(id_to_data[child_id]))]
+        )
 
     root_dict = next(
         data for id_val, data in id_to_data.items() if id_val not in child_to_parent

diff --git a/libs/community/langchain_community/chains/ernie_functions/base.py b/libs/community/langchain_community/chains/ernie_functions/base.py
@@ -1,4 +1,5 @@
 """Methods for creating chains that use Ernie function-calling APIs."""
+
 import inspect
 from typing import (
     Any,
@@ -191,9 +192,9 @@ def get_ernie_output_parser(
             }
         else:
             pydantic_schema = functions[0]
-        output_parser: Union[
-            BaseOutputParser, BaseGenerationOutputParser
-        ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
+        output_parser: Union[BaseOutputParser, BaseGenerationOutputParser] = (
+            PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
+        )
     else:
         output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
     return output_parser

diff --git a/libs/community/langchain_community/chains/graph_qa/arangodb.py b/libs/community/langchain_community/chains/graph_qa/arangodb.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 import re

diff --git a/libs/community/langchain_community/chains/graph_qa/base.py b/libs/community/langchain_community/chains/graph_qa/base.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 from typing import Any, Dict, List, Optional

diff --git a/libs/community/langchain_community/chains/graph_qa/cypher.py b/libs/community/langchain_community/chains/graph_qa/cypher.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 import re

diff --git a/libs/community/langchain_community/chains/graph_qa/falkordb.py b/libs/community/langchain_community/chains/graph_qa/falkordb.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 import re

diff --git a/libs/community/langchain_community/chains/graph_qa/gremlin.py b/libs/community/langchain_community/chains/graph_qa/gremlin.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 from typing import Any, Dict, List, Optional

diff --git a/libs/community/langchain_community/chains/graph_qa/hugegraph.py b/libs/community/langchain_community/chains/graph_qa/hugegraph.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 from typing import Any, Dict, List, Optional

diff --git a/libs/community/langchain_community/chains/graph_qa/kuzu.py b/libs/community/langchain_community/chains/graph_qa/kuzu.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 import re

diff --git a/libs/community/langchain_community/chains/graph_qa/nebulagraph.py b/libs/community/langchain_community/chains/graph_qa/nebulagraph.py
@@ -1,4 +1,5 @@
 """Question answering over a graph."""
+
 from __future__ import annotations
 
 from typing import Any, Dict, List, Optional