Skip to content

Commit

Permalink
VectorStores, DocumentLoaders, TextSplitters, Embeddings and other ad…
Browse files Browse the repository at this point in the history
…ditions (#157)

This release adds many bug fixes and exposes many other new nodes.
  • Loading branch information
ibiscp authored Apr 14, 2023
2 parents d3c1e25 + b87ee31 commit 4aa9bd6
Show file tree
Hide file tree
Showing 60 changed files with 3,213 additions and 407 deletions.
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ lerna-debug.log*
# Mac
.DS_Store

# VSCode
.vscode
.chroma
.ruff_cache

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

Expand Down Expand Up @@ -233,5 +238,5 @@ venv.bak/
.dmypy.json
dmypy.json

# Poetry
.testenv/*
# Poetry
.testenv/*
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ flow("Hey, have you heard of LangFlow?")

## 👋 Contributing

We welcome contributions from developers of all levels to our open-source project on GitHub. If you'd like to contribute, please check our contributing guidelines and help make LangFlow more accessible.
We welcome contributions from developers of all levels to our open-source project on GitHub. If you'd like to contribute, please check our [contributing guidelines](./CONTRIBUTING.md) and help make LangFlow more accessible.


[![Star History Chart](https://api.star-history.com/svg?repos=logspace-ai/langflow&type=Timeline)](https://star-history.com/#logspace-ai/langflow&Date)
Expand Down
2 changes: 1 addition & 1 deletion dev.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM python:3.10-slim
WORKDIR /app

# Install Poetry
RUN apt-get update && apt-get install gcc curl -y
RUN apt-get update && apt-get install gcc g++ curl build-essential postgresql-server-dev-all -y
RUN curl -sSL https://install.python-poetry.org | python3 -
# # Add Poetry to PATH
ENV PATH="${PATH}:/root/.local/bin"
Expand Down
2,172 changes: 2,072 additions & 100 deletions poetry.lock

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langflow"
version = "0.0.55"
version = "0.0.56"
description = "A Python package with a built-in web application"
authors = ["Logspace <contact@logspace.ai>"]
maintainers = [
Expand Down Expand Up @@ -34,8 +34,18 @@ openai = "^0.27.2"
types-pyyaml = "^6.0.12.8"
dill = "^0.3.6"
pandas = "^1.5.3"
chromadb = "^0.3.21"
huggingface-hub = "^0.13.3"
rich = "^13.3.3"
llama-cpp-python = "0.1.23"
networkx = "^3.1"
unstructured = "^0.5.11"
pypdf = "^3.7.1"
lxml = "^4.9.2"
pysrt = "^1.1.2"
fake-useragent = "^1.1.3"
docstring-parser = "^0.15"
psycopg2 = "^2.9.6"

[tool.poetry.group.dev.dependencies]
black = "^23.1.0"
Expand Down
4 changes: 2 additions & 2 deletions src/backend/langflow/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from fastapi import APIRouter, HTTPException

from langflow.interface.run import process_graph
from langflow.interface.run import process_graph_cached
from langflow.interface.types import build_langchain_types_dict

# build router
Expand All @@ -19,7 +19,7 @@ def get_all():
@router.post("/predict")
def get_load(data: Dict[str, Any]):
try:
return process_graph(data)
return process_graph_cached(data)
except Exception as e:
# Log stack trace
logger.exception(e)
Expand Down
93 changes: 85 additions & 8 deletions src/backend/langflow/cache/utils.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,62 @@
import base64
import contextlib
import functools
import hashlib
import json
import os
import tempfile
from collections import OrderedDict
from pathlib import Path

import dill # type: ignore


def create_cache_folder(func):
def wrapper(*args, **kwargs):
# Get the destination folder
cache_path = Path(tempfile.gettempdir()) / PREFIX

# Create the destination folder if it doesn't exist
os.makedirs(cache_path, exist_ok=True)

return func(*args, **kwargs)

return wrapper


def memoize_dict(maxsize=128):
cache = OrderedDict()

def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
hashed = compute_dict_hash(args[0])
key = (func.__name__, hashed, frozenset(kwargs.items()))
if key not in cache:
result = func(*args, **kwargs)
cache[key] = result
if len(cache) > maxsize:
cache.popitem(last=False)
else:
result = cache[key]
return result

def clear_cache():
cache.clear()

wrapper.clear_cache = clear_cache
return wrapper

return decorator


PREFIX = "langflow_cache"


@create_cache_folder
def clear_old_cache_files(max_cache_size: int = 3):
cache_dir = Path(tempfile.gettempdir())
cache_files = list(cache_dir.glob(f"{PREFIX}_*.dill"))
cache_dir = Path(tempfile.gettempdir()) / PREFIX
cache_files = list(cache_dir.glob("*.dill"))

if len(cache_files) > max_cache_size:
cache_files_sorted_by_mtime = sorted(
Expand All @@ -24,6 +68,13 @@ def clear_old_cache_files(max_cache_size: int = 3):
os.remove(cache_file)


def compute_dict_hash(graph_data):
graph_data = filter_json(graph_data)

cleaned_graph_json = json.dumps(graph_data, sort_keys=True)
return hashlib.sha256(cleaned_graph_json.encode("utf-8")).hexdigest()


def filter_json(json_data):
filtered_data = json_data.copy()

Expand All @@ -48,24 +99,50 @@ def filter_json(json_data):
return filtered_data


def compute_hash(graph_data):
graph_data = filter_json(graph_data)
@create_cache_folder
def save_binary_file(content: str, file_name: str, accepted_types: list[str]) -> str:
"""
Save a binary file to the specified folder.
cleaned_graph_json = json.dumps(graph_data, sort_keys=True)
return hashlib.sha256(cleaned_graph_json.encode("utf-8")).hexdigest()
Args:
content: The content of the file as a bytes object.
file_name: The name of the file, including its extension.
Returns:
The path to the saved file.
"""
if not any(file_name.endswith(suffix) for suffix in accepted_types):
raise ValueError(f"File {file_name} is not accepted")

# Get the destination folder
cache_path = Path(tempfile.gettempdir()) / PREFIX

data = content.split(",")[1]
decoded_bytes = base64.b64decode(data)

# Create the full file path
file_path = os.path.join(cache_path, file_name)

# Save the binary content to the file
with open(file_path, "wb") as file:
file.write(decoded_bytes)

return file_path


@create_cache_folder
def save_cache(hash_val: str, chat_data, clean_old_cache_files: bool):
cache_path = Path(tempfile.gettempdir()) / f"{PREFIX}_{hash_val}.dill"
cache_path = Path(tempfile.gettempdir()) / PREFIX / f"{hash_val}.dill"
with cache_path.open("wb") as cache_file:
dill.dump(chat_data, cache_file)

if clean_old_cache_files:
clear_old_cache_files()


@create_cache_folder
def load_cache(hash_val):
cache_path = Path(tempfile.gettempdir()) / f"{PREFIX}_{hash_val}.dill"
cache_path = Path(tempfile.gettempdir()) / PREFIX / f"{hash_val}.dill"
if cache_path.exists():
with cache_path.open("rb") as cache_file:
return dill.load(cache_file)
Expand Down
42 changes: 39 additions & 3 deletions src/backend/langflow/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ agents:
- JsonAgent
- CSVAgent
- initialize_agent
- VectorStoreAgent
- VectorStoreRouterAgent
- SQLAgent

prompts:
- PromptTemplate
Expand All @@ -27,6 +30,7 @@ llms:
# - AzureOpenAI
- ChatOpenAI
- HuggingFaceHub
- LlamaCpp

tools:
- Search
Expand All @@ -43,16 +47,48 @@ wrappers:
toolkits:
- OpenAPIToolkit
- JsonToolkit
- VectorStoreInfo
- VectorStoreRouterToolkit

memories:
- ConversationBufferMemory
- ConversationSummaryMemory
- ConversationKGMemory

embeddings: []
embeddings:
- OpenAIEmbeddings

vectorstores: []
vectorstores:
- Chroma

documentloaders: []
documentloaders:
- AirbyteJSONLoader
- CoNLLULoader
- CSVLoader
- UnstructuredEmailLoader
- EverNoteLoader
- FacebookChatLoader
- GutenbergLoader
- BSHTMLLoader
- UnstructuredHTMLLoader
# - UnstructuredImageLoader # Issue with Python 3.11 (https://github.com/Unstructured-IO/unstructured-inference/issues/83)
- UnstructuredMarkdownLoader
- PyPDFLoader
- UnstructuredPowerPointLoader
- SRTLoader
- TelegramChatLoader
- TextLoader
- UnstructuredWordDocumentLoader
- WebBaseLoader
- AZLyricsLoader
- CollegeConfidentialLoader
- HNLoader
- IFixitLoader
- IMSDbLoader
- GitbookLoader
- ReadTheDocsLoader

textsplitters:
- CharacterTextSplitter

dev: false
3 changes: 3 additions & 0 deletions src/backend/langflow/custom/customs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"JsonAgent": nodes.JsonAgentNode(),
"CSVAgent": nodes.CSVAgentNode(),
"initialize_agent": nodes.InitializeAgentNode(),
"VectorStoreAgent": nodes.VectorStoreAgentNode(),
"VectorStoreRouterAgent": nodes.VectorStoreRouterAgentNode(),
"SQLAgent": nodes.SQLAgentNode(),
},
}

Expand Down
25 changes: 20 additions & 5 deletions src/backend/langflow/graph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
# - Defer prompts building to the last moment or when they have all the tools
# - Build each inner agent first, then build the outer agent

import contextlib
import types
import warnings
from copy import deepcopy
from typing import Any, Dict, List, Optional

from langflow.cache import utils as cache_utils
from langflow.graph.constants import DIRECT_TYPES
from langflow.graph.utils import load_file
from langflow.interface import loading
from langflow.interface.listing import ALL_TYPES_DICT
from langflow.utils.logger import logger
Expand Down Expand Up @@ -88,11 +89,12 @@ def _build_params(self):
file_name = value.get("value")
content = value.get("content")
type_to_load = value.get("suffixes")
loaded_dict = load_file(file_name, content, type_to_load)
params[key] = loaded_dict
file_path = cache_utils.save_binary_file(
content=content, file_name=file_name, accepted_types=type_to_load
)

params[key] = file_path

# We should check if the type is in something not
# the opposite
elif value.get("type") not in DIRECT_TYPES:
# Get the edge that connects to this node
edges = [
Expand Down Expand Up @@ -126,6 +128,9 @@ def _build_params(self):
new_value = value.get("value")
if new_value is None:
warnings.warn(f"Value for {key} in {self.node_type} is None. ")
if value.get("type") == "int":
with contextlib.suppress(TypeError, ValueError):
new_value = int(new_value) # type: ignore
params[key] = new_value

# Add _type to params
Expand Down Expand Up @@ -160,6 +165,7 @@ def _build(self):
result = result.run # type: ignore
elif hasattr(result, "get_function"):
result = result.get_function() # type: ignore

self.params[key] = result
elif isinstance(value, list) and all(
isinstance(node, Node) for node in value
Expand Down Expand Up @@ -189,6 +195,15 @@ def _build(self):
def build(self, force: bool = False) -> Any:
if not self._built or force:
self._build()

#! Deepcopy is breaking for vectorstores
if self.base_type in [
"vectorstores",
"VectorStoreRouterAgent",
"VectorStoreAgent",
"VectorStoreInfo",
] or self.node_type in ["VectorStoreInfo", "VectorStoreRouterToolkit"]:
return self._built_object
return deepcopy(self._built_object)

def add_edge(self, edge: "Edge") -> None:
Expand Down
Loading

0 comments on commit 4aa9bd6

Please sign in to comment.