Skip to content

Commit

Permalink
chat_loaders refactoring (#10381)
Browse files Browse the repository at this point in the history
Replaced unnecessary namespace renaming
`from langchain.chat_loaders import base as chat_loaders`
with
`from langchain.chat_loaders.base import BaseChatLoader, ChatSession` 
and simplified correspondent types.

@eyurtsev
  • Loading branch information
leo-gan authored Sep 9, 2023
1 parent 40d9191 commit 90504fc
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 33 deletions.
14 changes: 7 additions & 7 deletions libs/langchain/langchain/chat_loaders/imessage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@
from typing import TYPE_CHECKING, Iterator, List, Optional, Union

from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession

if TYPE_CHECKING:
import sqlite3


class IMessageChatLoader(chat_loaders.BaseChatLoader):
class IMessageChatLoader(BaseChatLoader):
"""Load chat sessions from the `iMessage` chat.db SQLite file.
It only works on macOS when you have iMessage enabled and have the chat.db file.
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
terminal may not have permission to access this file. To resolve this, you can
copy the file to a different location, change the permissions of the file, or
grant full disk access for your terminal emulator in System Settings > Security
and Privacy > Full Disk Access.
grant full disk access for your terminal emulator
in System Settings > Security and Privacy > Full Disk Access.
"""

def __init__(self, path: Optional[Union[str, Path]] = None):
Expand All @@ -46,7 +46,7 @@ def __init__(self, path: Optional[Union[str, Path]] = None):

def _load_single_chat_session(
self, cursor: "sqlite3.Cursor", chat_id: int
) -> chat_loaders.ChatSession:
) -> ChatSession:
"""
Load a single chat session from the iMessage chat.db.
Expand Down Expand Up @@ -83,9 +83,9 @@ def _load_single_chat_session(
)
)

return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)

def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""
Lazy load the chat sessions from the iMessage chat.db
and yield them in the required format.
Expand Down
12 changes: 5 additions & 7 deletions libs/langchain/langchain/chat_loaders/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from typing import Dict, Iterator, List, Union

from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession

logger = logging.getLogger(__name__)


class SlackChatLoader(chat_loaders.BaseChatLoader):
class SlackChatLoader(BaseChatLoader):
"""Load `Slack` conversations from a dump zip file."""

def __init__(
Expand All @@ -27,9 +27,7 @@ def __init__(
if not self.zip_path.exists():
raise FileNotFoundError(f"File {self.zip_path} not found")

def _load_single_chat_session(
self, messages: List[Dict]
) -> chat_loaders.ChatSession:
def _load_single_chat_session(self, messages: List[Dict]) -> ChatSession:
results: List[Union[schema.AIMessage, schema.HumanMessage]] = []
previous_sender = None
for message in messages:
Expand Down Expand Up @@ -62,7 +60,7 @@ def _load_single_chat_session(
)
)
previous_sender = sender
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)

def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
"""Read JSON data from a zip subfile."""
Expand All @@ -72,7 +70,7 @@ def _read_json(self, zip_file: zipfile.ZipFile, file_path: str) -> List[dict]:
raise ValueError(f"Expected list of dictionaries, got {type(data)}")
return data

def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""
Lazy load the chat sessions from the Slack dump file and yield them
in the required format.
Expand Down
24 changes: 10 additions & 14 deletions libs/langchain/langchain/chat_loaders/telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from typing import Iterator, List, Union

from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession

logger = logging.getLogger(__name__)


class TelegramChatLoader(chat_loaders.BaseChatLoader):
class TelegramChatLoader(BaseChatLoader):
"""Load `telegram` conversations to LangChain chat messages.
To export, use the Telegram Desktop app from
Expand All @@ -35,16 +35,14 @@ def __init__(
"""
self.path = path if isinstance(path, str) else str(path)

def _load_single_chat_session_html(
self, file_path: str
) -> chat_loaders.ChatSession:
def _load_single_chat_session_html(self, file_path: str) -> ChatSession:
"""Load a single chat session from an HTML file.
Args:
file_path (str): Path to the HTML file.
Returns:
chat_loaders.ChatSession: The loaded chat session.
ChatSession: The loaded chat session.
"""
try:
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -81,18 +79,16 @@ def _load_single_chat_session_html(
)
previous_sender = from_name

return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)

def _load_single_chat_session_json(
self, file_path: str
) -> chat_loaders.ChatSession:
def _load_single_chat_session_json(self, file_path: str) -> ChatSession:
"""Load a single chat session from a JSON file.
Args:
file_path (str): Path to the JSON file.
Returns:
chat_loaders.ChatSession: The loaded chat session.
ChatSession: The loaded chat session.
"""
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
Expand All @@ -114,7 +110,7 @@ def _load_single_chat_session_json(
)
)

return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)

def _iterate_files(self, path: str) -> Iterator[str]:
"""Iterate over files in a directory or zip file.
Expand All @@ -139,12 +135,12 @@ def _iterate_files(self, path: str) -> Iterator[str]:
with tempfile.TemporaryDirectory() as temp_dir:
yield zip_file.extract(file, path=temp_dir)

def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""Lazy load the messages from the chat file and yield them
in as chat sessions.
Yields:
chat_loaders.ChatSession: The loaded chat session.
ChatSession: The loaded chat session.
"""
for file_path in self._iterate_files(self.path):
if file_path.endswith(".html"):
Expand Down
10 changes: 5 additions & 5 deletions libs/langchain/langchain/chat_loaders/whatsapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from typing import Iterator, List, Union

from langchain import schema
from langchain.chat_loaders import base as chat_loaders
from langchain.chat_loaders.base import BaseChatLoader, ChatSession
from langchain.schema import messages

logger = logging.getLogger(__name__)


class WhatsAppChatLoader(chat_loaders.BaseChatLoader):
class WhatsAppChatLoader(BaseChatLoader):
"""Load `WhatsApp` conversations from a dump zip file or directory."""

def __init__(self, path: str):
Expand Down Expand Up @@ -42,7 +42,7 @@ def __init__(self, path: str):
flags=re.IGNORECASE,
)

def _load_single_chat_session(self, file_path: str) -> chat_loaders.ChatSession:
def _load_single_chat_session(self, file_path: str) -> ChatSession:
"""Load a single chat session from a file.
Args:
Expand Down Expand Up @@ -84,7 +84,7 @@ def _load_single_chat_session(self, file_path: str) -> chat_loaders.ChatSession:
)
else:
logger.debug(f"Could not parse line: {line}")
return chat_loaders.ChatSession(messages=results)
return ChatSession(messages=results)

def _iterate_files(self, path: str) -> Iterator[str]:
"""Iterate over the files in a directory or zip file.
Expand All @@ -108,7 +108,7 @@ def _iterate_files(self, path: str) -> Iterator[str]:
if file.endswith(".txt"):
yield zip_file.extract(file)

def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
def lazy_load(self) -> Iterator[ChatSession]:
"""Lazy load the messages from the chat file and yield
them as chat sessions.
Expand Down

0 comments on commit 90504fc

Please sign in to comment.