Skip to content

Commit

Permalink
More fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
david1542 committed Sep 17, 2024
1 parent a4e753a commit 4308565
Show file tree
Hide file tree
Showing 16 changed files with 874 additions and 98 deletions.
1 change: 1 addition & 0 deletions services/api/src/agent/tools/static/semantic_search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ export default async function (context: RunContext) {
title = "PagerDuty Alert";
break;
}
case "Jira":
case "Confluence": {
url = document.metadata.url;
title = document.metadata.title;
Expand Down
2 changes: 1 addition & 1 deletion services/data-processor/src/loaders/confluence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import namedtuple
import os
import requests
from loaders.raw_readers.confluence import ConfluenceReader
from loaders.readers.confluence import ConfluenceReader
from atlassian import Confluence

from db.types import Integration
Expand Down
8 changes: 5 additions & 3 deletions services/data-processor/src/loaders/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
from github import Github, Auth, GithubException

# from llama_index.core import SimpleDirectoryReader
from llama_index.readers.github.repository.github_client import GithubClient
from loaders.utils.github_client import GithubClient
from llama_index.readers.github import (
GitHubIssuesClient,
)
from db.types import Integration
from loaders.raw_readers.github_repo import GithubRepositoryReader
from loaders.raw_readers.github_issues import GitHubRepositoryIssuesReader
from loaders.readers.github_repo import GithubRepositoryReader
from loaders.readers.github_issues import GitHubRepositoryIssuesReader


def get_repos(token: str, repos_to_sync=None):
Expand Down Expand Up @@ -70,6 +70,8 @@ async def fetch_github_documents(
# # TODO: this can crash if the repo is huge, because of Github API Rate limit.
# # Need to find a way to "wait" maybe or to filter garbage.
code_client = GithubClient(token, fail_on_http_error=False, verbose=True)

# TODO: updated_at timestamp doesn't seem to work (our code treats same docs as new)
loader = GithubRepositoryReader(
github_client=code_client,
owner=owner,
Expand Down
26 changes: 20 additions & 6 deletions services/data-processor/src/loaders/jira.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import requests
from llama_index.readers.jira import JiraReader
from datetime import datetime, timezone
from dateutil import parser
from loaders.readers.jira import JiraReader
from db.types import Integration

JQL_QUERY = "issuetype is not EMPTY"


def fetch_jira_documents(integration: Integration):
integration_type = integration.type
Expand All @@ -19,9 +23,7 @@ def fetch_jira_documents(integration: Integration):
loader = JiraReader(
Oauth2={"cloud_id": cloud_id, "api_token": access_token}
)
documents = loader.load_data(
"issuetype is not EMPTY"
) # This "should" fetch all issues
documents = loader.load_data(JQL_QUERY) # This "should" fetch all issues
total_documents.extend(documents)
else:
loader = JiraReader(
Expand All @@ -31,12 +33,24 @@ def fetch_jira_documents(integration: Integration):
"server_url": integration.metadata["site_url"],
}
)
documents = loader.load_data("issuetype is not EMPTY")
documents = loader.load_data(JQL_QUERY)
total_documents.extend(documents)

# Adding the global "source" metadata field
for document in total_documents:
document.metadata.pop("labels", None)
document.metadata["source"] = "Jira"

return documents
# Transform 'created_at' and 'updated_at' to UTC with milliseconds
created_at = parser.isoparse(document.metadata["created_at"])
updated_at = parser.isoparse(document.metadata["updated_at"])
document.metadata["created_at"] = (
created_at.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
+ "Z"
)
document.metadata["updated_at"] = (
updated_at.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
+ "Z"
)

return total_documents
2 changes: 1 addition & 1 deletion services/data-processor/src/loaders/notion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from db.types import Integration
from notion_client import Client
from loaders.raw_readers.notion import NotionPageReader
from loaders.readers.notion import NotionPageReader


def fetch_notion_documents(integration: Integration):
Expand Down
82 changes: 5 additions & 77 deletions services/data-processor/src/loaders/pagerduty.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,11 @@
from db.types import Integration
import httpx
from llama_index.core import Document

INCIDENT_TEXT_TEMPLATE = """
Incident title: {title}
Incident description: {description}
Incident summary: {summary}
Incident status: {status}
Service name: {service_name}
Created at: {created_at}
"""


async def get_incidents(integration: Integration):
access_token = integration.credentials["access_token"]
integration_type = integration.type
headers = {}
if integration_type == "basic":
headers["Authorization"] = f"Token token={access_token}"
elif integration_type == "oauth":
headers["Authorization"] = f"Bearer {access_token}"
else:
raise ValueError(f"Invalid integration type: {integration_type}")

limit = 100
offset = 0
resolved_incidents = []
while True:
async with httpx.AsyncClient() as client:
response = await client.get(
"https://api.pagerduty.com/incidents",
headers=headers,
params={
"date_range": "all",
"statuses[]": "resolved",
"limit": limit,
"offset": offset,
},
)
data = response.json()
incidents = data["incidents"]
resolved_incidents.extend(incidents)
if not data["more"]:
break
offset += limit
return resolved_incidents
from loaders.readers.pagerduty import PagerDutyReader


async def fetch_pagerduty_documents(integration: Integration):
incidents = await get_incidents(integration)

documents = []
for incident in incidents:
service = incident.get("service", {})
service_name = service.get("summary", "Unknown")

text = INCIDENT_TEXT_TEMPLATE.format(
title=incident["title"],
description=incident["description"],
summary=incident["summary"],
status=incident["status"],
service_name=service_name,
created_at=incident["created_at"],
)
metadata = {
"source": "PagerDuty",
"id": incident["id"],
"link": incident["html_url"],
"status": incident["status"],
"urgency": incident["urgency"],
"service_id": service.get("id", "Unknown"),
"first_trigger_log_entry_id": incident.get(
"first_trigger_log_entry", {}
).get("id", "Unknown"),
"created_at": incident["created_at"],
"updated_at": incident["updated_at"],
}

document = Document(doc_id=incident["id"], text=text, metadata=metadata)
documents.append(document)
access_token = integration.credentials["access_token"]
token_type = integration.type
loader = PagerDutyReader(access_token, token_type)
documents = await loader.load_data()

return documents
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ async def load_data(
extra_info = {
"state": issue["state"],
"created_at": issue["created_at"],
"updated_at": issue["updated_at"],
# url is the API URL
"url": issue["url"],
# source is the HTML URL, more convenient for humans
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,10 @@ async def _recurse_tree(
)
return blobs_and_full_paths

async def _get_latest_commit(self, path) -> str:
commits = await self._github_client.get_commits(self._owner, self._repo, path)
return commits[0]

async def _generate_documents(
self,
blobs_and_paths: List[Tuple[GitTreeResponseModel.GitTreeObject, str]],
Expand All @@ -472,6 +476,7 @@ async def _generate_documents(
documents = []
async for blob_data, full_path in buffered_iterator:
print_if_verbose(self._verbose, f"generating document for {full_path}")
latest_commit = await self._get_latest_commit(full_path)
assert (
blob_data.encoding == "base64"
), f"blob encoding {blob_data.encoding} not supported"
Expand Down Expand Up @@ -525,6 +530,7 @@ async def _generate_documents(
"file_path": full_path,
"file_name": full_path.split("/")[-1],
"url": url,
"updated_at": latest_commit.commit.author.date,
},
)
documents.append(document)
Expand Down
117 changes: 117 additions & 0 deletions services/data-processor/src/loaders/readers/jira.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from typing import List, Optional, TypedDict

from llama_index.core.readers.base import BaseReader
from llama_index.core.schema import Document


class BasicAuth(TypedDict):
email: str
api_token: str
server_url: str


class Oauth2(TypedDict):
cloud_id: str
api_token: str


class JiraReader(BaseReader):
"""Jira reader. Reads data from Jira issues from passed query.
Args:
Optional basic_auth:{
"email": "email",
"api_token": "token",
"server_url": "server_url"
}
Optional oauth:{
"cloud_id": "cloud_id",
"api_token": "token"
}
"""

def __init__(
self,
email: Optional[str] = None,
api_token: Optional[str] = None,
server_url: Optional[str] = None,
BasicAuth: Optional[BasicAuth] = None,
Oauth2: Optional[Oauth2] = None,
) -> None:
from jira import JIRA

if email and api_token and server_url:
if BasicAuth is None:
BasicAuth = {}
BasicAuth["email"] = email
BasicAuth["api_token"] = api_token
BasicAuth["server_url"] = server_url

if Oauth2:
options = {
"server": f"https://api.atlassian.com/ex/jira/{Oauth2['cloud_id']}",
"headers": {"Authorization": f"Bearer {Oauth2['api_token']}"},
}
self.jira = JIRA(options=options)
else:
self.jira = JIRA(
basic_auth=(BasicAuth["email"], BasicAuth["api_token"]),
server=f"https://{BasicAuth['server_url']}",
)

def load_data(self, query: str) -> List[Document]:
relevant_issues = self.jira.search_issues(query)

issues = []

assignee = ""
reporter = ""
epic_key = ""
epic_summary = ""
epic_descripton = ""

for issue in relevant_issues:
# Iterates through only issues and not epics
if "parent" in (issue.raw["fields"]):
if issue.fields.assignee:
assignee = issue.fields.assignee.displayName

if issue.fields.reporter:
reporter = issue.fields.reporter.displayName

if issue.raw["fields"]["parent"]["key"]:
epic_key = issue.raw["fields"]["parent"]["key"]

if issue.raw["fields"]["parent"]["fields"]["summary"]:
epic_summary = issue.raw["fields"]["parent"]["fields"]["summary"]

if issue.raw["fields"]["parent"]["fields"]["status"]["description"]:
epic_descripton = issue.raw["fields"]["parent"]["fields"]["status"][
"description"
]

issues.append(
Document(
text=f"{issue.fields.summary} \n {issue.fields.description}",
doc_id=issue.id,
extra_info={
"id": issue.id,
"title": issue.fields.summary,
"url": issue.permalink(),
"created_at": issue.fields.created,
"updated_at": issue.fields.updated,
"labels": issue.fields.labels,
"status": issue.fields.status.name,
"assignee": assignee,
"reporter": reporter,
"project": issue.fields.project.name,
"issue_type": issue.fields.issuetype.name,
"priority": issue.fields.priority.name,
"epic_key": epic_key,
"epic_summary": epic_summary,
"epic_description": epic_descripton,
},
)
)

return issues
Loading

0 comments on commit 4308565

Please sign in to comment.