Skip to content

Commit

Permalink
Merge pull request #53 from AbanteAI/add-chunking-files
Browse files Browse the repository at this point in the history
add file filter to chunker
  • Loading branch information
PCSwingle authored Jun 15, 2024
2 parents 186ddf8 + 09c5170 commit 3d99b97
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 11 deletions.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,6 @@ dev = [
"pytest",
"pytest-asyncio"
]

[tool.pyright]
ignore = ["tests/sample"]
6 changes: 3 additions & 3 deletions ragdaemon/annotators/call_graph.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import asyncio
from functools import partial
import json
from functools import partial
from pathlib import Path
from typing import Any, Optional

from tqdm.asyncio import tqdm
from spice import SpiceMessages
from spice.models import TextModel
from tqdm.asyncio import tqdm

from ragdaemon.annotators.base_annotator import Annotator
from ragdaemon.database import Database, remove_update_db_duplicates
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.errors import RagdaemonError
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.utils import (
DEFAULT_CODE_EXTENSIONS,
DEFAULT_COMPLETION_MODEL,
Expand Down
18 changes: 12 additions & 6 deletions ragdaemon/annotators/chunker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
from copy import deepcopy
from functools import partial
from pathlib import Path
from typing import Optional, Set

from astroid.exceptions import AstroidSyntaxError
from tqdm.asyncio import tqdm

from ragdaemon.annotators.base_annotator import Annotator
from ragdaemon.annotators.chunker.chunk_astroid import chunk_document as chunk_astroid
from ragdaemon.annotators.chunker.chunk_line import chunk_document as chunk_line
from ragdaemon.annotators.chunker.chunk_llm import chunk_document as chunk_llm
from ragdaemon.annotators.chunker.utils import resolve_chunk_parent
from ragdaemon.database import (
Database,
remove_add_to_db_duplicates,
remove_update_db_duplicates,
)
from ragdaemon.annotators.chunker.utils import resolve_chunk_parent
from ragdaemon.annotators.chunker.chunk_astroid import chunk_document as chunk_astroid
from ragdaemon.annotators.chunker.chunk_llm import chunk_document as chunk_llm
from ragdaemon.annotators.chunker.chunk_line import chunk_document as chunk_line

from ragdaemon.errors import RagdaemonError
from ragdaemon.graph import KnowledgeGraph
from ragdaemon.utils import (
Expand All @@ -33,9 +33,13 @@ class Chunker(Annotator):
name = "chunker"
chunk_field_id = "chunks"

def __init__(self, *args, use_llm: bool = False, **kwargs):
def __init__(
self, *args, files: Optional[Set[str]] = None, use_llm: bool = False, **kwargs
):
super().__init__(*args, **kwargs)

self.files = files

# By default, use either the LLM chunker or a basic line chunker.
if use_llm and self.spice_client is not None:
default_chunk_fn = partial(
Expand Down Expand Up @@ -109,6 +113,8 @@ async def annotate(
if data.get("type") == "chunk":
graph.remove_node(node)
elif data.get("type") == "file":
if self.files is not None and node not in self.files:
continue
if self.chunk_extensions_map is None:
files_with_chunks.append((node, data))
else:
Expand Down
7 changes: 5 additions & 2 deletions ragdaemon/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import time
from pathlib import Path
from typing import Any, Iterable, Optional
from typing import Any, Dict, Iterable, Optional

from networkx.readwrite import json_graph
from spice import Spice
Expand Down Expand Up @@ -73,6 +73,9 @@ def __init__(
if self.verbose > 1:
print("Initialized empty graph.")

self.set_annotators(annotators)

def set_annotators(self, annotators: Optional[Dict[str, Dict]] = None):
annotators = annotators if annotators is not None else default_annotators()
if self.verbose > 1:
print(f"Initializing annotators: {list(annotators.keys())}...")
Expand All @@ -81,7 +84,7 @@ def __init__(
self.pipeline[ann] = annotators_map[ann](
**kwargs,
verbose=self.verbose,
spice_client=spice_client,
spice_client=self.spice_client,
pipeline=self.pipeline,
)

Expand Down

0 comments on commit 3d99b97

Please sign in to comment.