Skip to content

Commit

Permalink
infra: update mypy 1.10, ruff 0.5 (#23721)
Browse files Browse the repository at this point in the history
```python
"""python scripts/update_mypy_ruff.py"""
import glob
import tomllib
from pathlib import Path

import toml
import subprocess
import re

ROOT_DIR = Path(__file__).parents[1]


def main():
    for path in glob.glob(str(ROOT_DIR / "libs/**/pyproject.toml"), recursive=True):
        print(path)
        with open(path, "rb") as f:
            pyproject = tomllib.load(f)
        try:
            pyproject["tool"]["poetry"]["group"]["typing"]["dependencies"]["mypy"] = (
                "^1.10"
            )
            pyproject["tool"]["poetry"]["group"]["lint"]["dependencies"]["ruff"] = (
                "^0.5"
            )
        except KeyError:
            continue
        with open(path, "w") as f:
            toml.dump(pyproject, f)
        cwd = "/".join(path.split("/")[:-1])
        completed = subprocess.run(
            "poetry lock --no-update; poetry install --with typing; poetry run mypy . --no-color",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )
        logs = completed.stdout.split("\n")

        to_ignore = {}
        for l in logs:
            if re.match("^(.*)\:(\d+)\: error:.*\[(.*)\]", l):
                path, line_no, error_type = re.match(
                    "^(.*)\:(\d+)\: error:.*\[(.*)\]", l
                ).groups()
                if (path, line_no) in to_ignore:
                    to_ignore[(path, line_no)].append(error_type)
                else:
                    to_ignore[(path, line_no)] = [error_type]
        print(len(to_ignore))
        for (error_path, line_no), error_types in to_ignore.items():
            all_errors = ", ".join(error_types)
            full_path = f"{cwd}/{error_path}"
            try:
                with open(full_path, "r") as f:
                    file_lines = f.readlines()
            except FileNotFoundError:
                continue
            file_lines[int(line_no) - 1] = (
                file_lines[int(line_no) - 1][:-1] + f"  # type: ignore[{all_errors}]\n"
            )
            with open(full_path, "w") as f:
                f.write("".join(file_lines))

        subprocess.run(
            "poetry run ruff format .; poetry run ruff --select I --fix .",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )


if __name__ == "__main__":
    main()

```
  • Loading branch information
baskaryan authored Jul 3, 2024
1 parent 6cd5682 commit a0c2281
Show file tree
Hide file tree
Showing 915 changed files with 4,750 additions and 4,038 deletions.
38 changes: 20 additions & 18 deletions .github/actions/people/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,7 @@ def get_graphql_pr_edges(*, settings: Settings, after: Union[str, None] = None):
print("Querying PRs...")
else:
print(f"Querying PRs with cursor {after}...")
data = get_graphql_response(
settings=settings,
query=prs_query,
after=after
)
data = get_graphql_response(settings=settings, query=prs_query, after=after)
graphql_response = PRsResponse.model_validate(data)
return graphql_response.data.repository.pullRequests.edges

Expand Down Expand Up @@ -484,10 +480,16 @@ def get_contributors(settings: Settings):
lines_changed = pr.additions + pr.deletions
score = _logistic(files_changed, 20) + _logistic(lines_changed, 100)
contributor_scores[pr.author.login] += score
three_months_ago = (datetime.now(timezone.utc) - timedelta(days=3*30))
three_months_ago = datetime.now(timezone.utc) - timedelta(days=3 * 30)
if pr.createdAt > three_months_ago:
recent_contributor_scores[pr.author.login] += score
return contributors, contributor_scores, recent_contributor_scores, reviewers, authors
return (
contributors,
contributor_scores,
recent_contributor_scores,
reviewers,
authors,
)


def get_top_users(
Expand Down Expand Up @@ -524,9 +526,13 @@ def get_top_users(
# question_commentors, question_last_month_commentors, question_authors = get_experts(
# settings=settings
# )
contributors, contributor_scores, recent_contributor_scores, reviewers, pr_authors = get_contributors(
settings=settings
)
(
contributors,
contributor_scores,
recent_contributor_scores,
reviewers,
pr_authors,
) = get_contributors(settings=settings)
# authors = {**question_authors, **pr_authors}
authors = {**pr_authors}
maintainers_logins = {
Expand Down Expand Up @@ -559,7 +565,7 @@ def get_top_users(
maintainers.append(
{
"login": login,
"count": contributors[login], #+ question_commentors[login],
"count": contributors[login], # + question_commentors[login],
"avatarUrl": user.avatarUrl,
"twitterUsername": user.twitterUsername,
"url": user.url,
Expand Down Expand Up @@ -615,9 +621,7 @@ def get_top_users(
new_people_content = yaml.dump(
people, sort_keys=False, width=200, allow_unicode=True
)
if (
people_old_content == new_people_content
):
if people_old_content == new_people_content:
logging.info("The LangChain People data hasn't changed, finishing.")
sys.exit(0)
people_path.write_text(new_people_content, encoding="utf-8")
Expand All @@ -630,9 +634,7 @@ def get_top_users(
logging.info(f"Creating a new branch {branch_name}")
subprocess.run(["git", "checkout", "-B", branch_name], check=True)
logging.info("Adding updated file")
subprocess.run(
["git", "add", str(people_path)], check=True
)
subprocess.run(["git", "add", str(people_path)], check=True)
logging.info("Committing updated file")
message = "👥 Update LangChain people data"
result = subprocess.run(["git", "commit", "-m", message], check=True)
Expand All @@ -641,4 +643,4 @@ def get_top_users(
logging.info("Creating PR")
pr = repo.create_pull(title=message, body=message, base="master", head=branch_name)
logging.info(f"Created PR: {pr.number}")
logging.info("Finished")
logging.info("Finished")
27 changes: 18 additions & 9 deletions .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import glob
import json
import sys
import os
from typing import Dict, List, Set

import re
import sys
import tomllib
from collections import defaultdict
import glob
from typing import Dict, List, Set


LANGCHAIN_DIRS = [
"libs/core",
Expand All @@ -15,8 +16,13 @@
"libs/experimental",
]


def all_package_dirs() -> Set[str]:
return {"/".join(path.split("/")[:-1]) for path in glob.glob("./libs/**/pyproject.toml", recursive=True)}
return {
"/".join(path.split("/")[:-1]).lstrip("./")
for path in glob.glob("./libs/**/pyproject.toml", recursive=True)
if "libs/cli" not in path and "libs/standard-tests" not in path
}


def dependents_graph() -> dict:
Expand All @@ -26,9 +32,9 @@ def dependents_graph() -> dict:
if "template" in path:
continue
with open(path, "rb") as f:
pyproject = tomllib.load(f)['tool']['poetry']
pyproject = tomllib.load(f)["tool"]["poetry"]
pkg_dir = "libs" + "/".join(path.split("libs")[1].split("/")[:-1])
for dep in pyproject['dependencies']:
for dep in pyproject["dependencies"]:
if "langchain" in dep:
dependents[dep].add(pkg_dir)
return dependents
Expand Down Expand Up @@ -122,9 +128,12 @@ def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:

outputs = {
"dirs-to-lint": add_dependents(
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],
dependents,
),
"dirs-to-test": add_dependents(
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
),
"dirs-to-test": add_dependents(dirs_to_run["test"] | dirs_to_run["extended-test"], dependents),
"dirs-to-extended-test": list(dirs_to_run["extended-test"]),
"docs-edited": "true" if docs_edited else "",
}
Expand Down
4 changes: 1 addition & 3 deletions .github/scripts/get_min_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,4 @@ def get_min_version_from_toml(toml_path: str):
# Call the function to get the minimum versions
min_versions = get_min_version_from_toml(toml_file)

print(
" ".join([f"{lib}=={version}" for lib, version in min_versions.items()])
)
print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))
6 changes: 3 additions & 3 deletions libs/community/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ lint lint_diff lint_package lint_tests:
./scripts/check_pydantic.sh .
./scripts/lint_imports.sh
./scripts/check_pickle.sh .
poetry run ruff .
poetry run ruff check .
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)

format format_diff:
poetry run ruff format $(PYTHON_FILES)
poetry run ruff --select I --fix $(PYTHON_FILES)
poetry run ruff check --select I --fix $(PYTHON_FILES)

spell_check:
poetry run codespell --toml pyproject.toml
Expand Down
1 change: 1 addition & 0 deletions libs/community/langchain_community/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Main entrypoint into package."""

from importlib import metadata

try:
Expand Down
24 changes: 8 additions & 16 deletions libs/community/langchain_community/adapters/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,7 @@ def create(
provider: str = "ChatOpenAI",
stream: Literal[False] = False,
**kwargs: Any,
) -> dict:
...
) -> dict: ...

@overload
@staticmethod
Expand All @@ -217,8 +216,7 @@ def create(
provider: str = "ChatOpenAI",
stream: Literal[True],
**kwargs: Any,
) -> Iterable:
...
) -> Iterable: ...

@staticmethod
def create(
Expand Down Expand Up @@ -249,8 +247,7 @@ async def acreate(
provider: str = "ChatOpenAI",
stream: Literal[False] = False,
**kwargs: Any,
) -> dict:
...
) -> dict: ...

@overload
@staticmethod
Expand All @@ -260,8 +257,7 @@ async def acreate(
provider: str = "ChatOpenAI",
stream: Literal[True],
**kwargs: Any,
) -> AsyncIterator:
...
) -> AsyncIterator: ...

@staticmethod
async def acreate(
Expand Down Expand Up @@ -319,8 +315,7 @@ def create(
provider: str = "ChatOpenAI",
stream: Literal[False] = False,
**kwargs: Any,
) -> ChatCompletions:
...
) -> ChatCompletions: ...

@overload
@staticmethod
Expand All @@ -330,8 +325,7 @@ def create(
provider: str = "ChatOpenAI",
stream: Literal[True],
**kwargs: Any,
) -> Iterable:
...
) -> Iterable: ...

@staticmethod
def create(
Expand Down Expand Up @@ -366,8 +360,7 @@ async def acreate(
provider: str = "ChatOpenAI",
stream: Literal[False] = False,
**kwargs: Any,
) -> ChatCompletions:
...
) -> ChatCompletions: ...

@overload
@staticmethod
Expand All @@ -377,8 +370,7 @@ async def acreate(
provider: str = "ChatOpenAI",
stream: Literal[True],
**kwargs: Any,
) -> AsyncIterator:
...
) -> AsyncIterator: ...

@staticmethod
async def acreate(
Expand Down
16 changes: 8 additions & 8 deletions libs/community/langchain_community/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2189,14 +2189,14 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBVectorSearch:
index_name=index_name,
)
else:
self._cache_dict[
index_name
] = AzureCosmosDBVectorSearch.from_connection_string(
connection_string=self.cosmosdb_connection_string,
namespace=namespace,
embedding=self.embedding,
index_name=index_name,
application_name=self.application_name,
self._cache_dict[index_name] = (
AzureCosmosDBVectorSearch.from_connection_string(
connection_string=self.cosmosdb_connection_string,
namespace=namespace,
embedding=self.embedding,
index_name=index_name,
application_name=self.application_name,
)
)

# create index for the vectorstore
Expand Down
1 change: 1 addition & 0 deletions libs/community/langchain_community/callbacks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
BaseCallbackHandler --> <name>CallbackHandler # Example: AimCallbackHandler
"""

import importlib
from typing import TYPE_CHECKING, Any

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
"completion_tokens", 0
)
else:
self.prompt_tokens = (
self.total_tokens
) = self.completion_tokens = 0 # assign default value
self.prompt_tokens = self.total_tokens = self.completion_tokens = (
0 # assign default value
)

for generations in response.generations:
for generation in generations:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""ArthurAI's Callback Handler."""

from __future__ import annotations

import os
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Callback handler for promptlayer."""

from __future__ import annotations

import datetime
Expand Down
7 changes: 4 additions & 3 deletions libs/community/langchain_community/callbacks/tracers/wandb.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""A Tracer Implementation that records activity to Weights & Biases."""

from __future__ import annotations

import json
Expand Down Expand Up @@ -234,9 +235,9 @@ def build_tree(runs: List[Dict[str, Any]]) -> Dict[str, Any]:

for child_id, parent_id in child_to_parent.items():
parent_dict = id_to_data[parent_id]
parent_dict[next(iter(parent_dict))][
next(iter(id_to_data[child_id]))
] = id_to_data[child_id][next(iter(id_to_data[child_id]))]
parent_dict[next(iter(parent_dict))][next(iter(id_to_data[child_id]))] = (
id_to_data[child_id][next(iter(id_to_data[child_id]))]
)

root_dict = next(
data for id_val, data in id_to_data.items() if id_val not in child_to_parent
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Methods for creating chains that use Ernie function-calling APIs."""

import inspect
from typing import (
Any,
Expand Down Expand Up @@ -191,9 +192,9 @@ def get_ernie_output_parser(
}
else:
pydantic_schema = functions[0]
output_parser: Union[
BaseOutputParser, BaseGenerationOutputParser
] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
output_parser: Union[BaseOutputParser, BaseGenerationOutputParser] = (
PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
)
else:
output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
return output_parser
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

import re
Expand Down
1 change: 1 addition & 0 deletions libs/community/langchain_community/chains/graph_qa/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

from typing import Any, Dict, List, Optional
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

import re
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

import re
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

from typing import Any, Dict, List, Optional
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

from typing import Any, Dict, List, Optional
Expand Down
1 change: 1 addition & 0 deletions libs/community/langchain_community/chains/graph_qa/kuzu.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

import re
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Question answering over a graph."""

from __future__ import annotations

from typing import Any, Dict, List, Optional
Expand Down
Loading

1 comment on commit a0c2281

@shirly59
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

你好

知名大厂云智能,大模型应用/高级/解决方案架构师

工作职责
面向互联网和企业客户,在营销、游戏、社交、阅读、个人助手和教育等场景中,以大模型为基础,通过使用langchain、RAG、Agent等相关技术架构结合公共云产品设计可落地的大模型应用解决方案,解决客户实际场景问题;

任职要求
在大模型LLM相关技术方向有实战经验者优先,比如大模型落地业务场景识别、数据清洗/构造,LLM训练和调优,搜索增强RAG,LLM Agents等一个或多个方向有实际经验;

BASE北京朝阳区

有朋友有兴趣了解?

Please sign in to comment.