-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use Spice #543
Use Spice #543
Changes from 3 commits
9eade0e
8f229d6
18079f9
51014d6
f5588f1
2d111cf
d474ce2
d0c46c6
443fba1
d739320
f649ad7
e7d7844
5f1fe25
ae3d9e1
f766d80
528e2b9
eeae387
f9446b9
ebc9abf
6026d0d
da22b60
91bc6eb
93f192d
3f9a9e7
36c2a5a
d727903
52fd9ec
c2bd2fa
4bf16e1
798e42c
acb114e
4e40c30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,7 @@ async def enable_agent_mode(self): | |
] | ||
model = ctx.config.model | ||
response = await ctx.llm_api_handler.call_llm_api(messages, model, False) | ||
content = response.choices[0].message.content or "" | ||
content = response.text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switching to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switching to |
||
|
||
paths = [Path(path) for path in content.strip().split("\n") if Path(path).exists()] | ||
self.agent_file_message = "" | ||
|
@@ -87,7 +87,7 @@ async def _determine_commands(self) -> List[str]: | |
ctx.stream.send(f"Error accessing OpenAI API: {e.message}", style="error") | ||
return [] | ||
|
||
content = response.choices[0].message.content or "" | ||
content = response.text | ||
|
||
messages.append(ChatCompletionAssistantMessageParam(role="assistant", content=content)) | ||
parsed_llm_response = await ctx.config.parser.parse_llm_response(content) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,7 +96,7 @@ async def filter( | |
stream=False, | ||
response_format=ResponseFormat(type="json_object"), | ||
) | ||
message = (llm_response.choices[0].message.content) or "" | ||
message = llm_response.text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the removal of detailed LLM API call logging, consider implementing a new mechanism to log or monitor these calls for debugging and performance analysis. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the removal of detailed LLM API call logging, consider implementing a new mechanism to log or monitor these calls for debugging and performance analysis. |
||
tokens = prompt_tokens(messages, model) | ||
response_tokens = count_tokens(message, model, full_message=True) | ||
cost_tracker.log_api_call_stats( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
) | ||
from openai.types.chat.completion_create_params import ResponseFormat | ||
from PIL import Image | ||
from spice import Spice, SpiceResponse | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like the import for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems the import for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It appears the import for |
||
|
||
from mentat.errors import MentatError, ReturnToUser, UserError | ||
from mentat.session_context import SESSION_CONTEXT | ||
|
@@ -97,11 +98,8 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> Any: | |
|
||
|
||
# Ensures that each chunk will have at most one newline character | ||
def chunk_to_lines(chunk: ChatCompletionChunk) -> list[str]: | ||
content = None | ||
if len(chunk.choices) > 0: | ||
content = chunk.choices[0].delta.content | ||
return ("" if content is None else content).splitlines(keepends=True) | ||
def chunk_to_lines(content: str) -> list[str]: | ||
granawkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return content.splitlines(keepends=True) | ||
|
||
|
||
def get_encoding_for_model(model: str) -> tiktoken.Encoding: | ||
|
@@ -264,6 +262,9 @@ def __contains__(self, key: object) -> bool: | |
"gpt-3.5-turbo-16k-0613": Model("gpt-3.5-turbo-16k-0613", 16385, 0.003, 0.004), | ||
"gpt-3.5-turbo-0301": Model("gpt-3.5-turbo-0301", 4096, 0.0015, 0.002), | ||
"text-embedding-ada-002": Model("text-embedding-ada-002", 8191, 0.0001, 0, embedding_model=True), | ||
"claude-3-opus-20240229": Model("claude-3-opus-20240229", 200000, 0.015, 0.075), | ||
"claude-3-sonnet-20240229": Model("claude-3-sonnet-20240229", 200000, 0.003, 0.015), | ||
"claude-3-haiku-20240307": Model("claude-3-haiku-20240307", 200000, 0.00025, 0.00125), | ||
} | ||
) | ||
|
||
|
@@ -362,39 +363,21 @@ def initialize_client(self): | |
self.async_client = AsyncOpenAI(api_key=key, base_url=base_url) | ||
self.sync_client = OpenAI(api_key=key, base_url=base_url) | ||
|
||
self.spice_client = Spice(provider="anthropic") | ||
|
||
try: | ||
self.async_client.models.list() # Test the key | ||
except AuthenticationError as e: | ||
raise UserError(f"API gave an Authentication Error:\n{e}") | ||
|
||
@overload | ||
async def call_llm_api( | ||
self, | ||
messages: list[ChatCompletionMessageParam], | ||
model: str, | ||
stream: Literal[True], | ||
response_format: ResponseFormat = ResponseFormat(type="text"), | ||
) -> AsyncIterator[ChatCompletionChunk]: | ||
... | ||
|
||
@overload | ||
async def call_llm_api( | ||
self, | ||
messages: list[ChatCompletionMessageParam], | ||
model: str, | ||
stream: Literal[False], | ||
response_format: ResponseFormat = ResponseFormat(type="text"), | ||
) -> ChatCompletion: | ||
... | ||
|
||
@api_guard | ||
async def call_llm_api( | ||
self, | ||
messages: list[ChatCompletionMessageParam], | ||
model: str, | ||
stream: bool, | ||
response_format: ResponseFormat = ResponseFormat(type="text"), | ||
) -> ChatCompletion | AsyncIterator[ChatCompletionChunk]: | ||
) -> SpiceResponse: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's recommended to add a comment explaining why |
||
session_context = SESSION_CONTEXT.get() | ||
config = session_context.config | ||
cost_tracker = session_context.cost_tracker | ||
|
@@ -409,52 +392,34 @@ async def call_llm_api( | |
start_time = default_timer() | ||
with sentry_sdk.start_span(description="LLM Call") as span: | ||
span.set_tag("model", model) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The switch to using the |
||
# TODO: handle this for gpt-4-vision-preview in spice? | ||
# OpenAI's API is bugged; when gpt-4-vision-preview is used, including the response format | ||
# at all returns a 400 error. Additionally, gpt-4-vision-preview has a max response of 30 tokens by default. | ||
# Until this is fixed, we have to use this workaround. | ||
if model == "gpt-4-vision-preview": | ||
response = await self.async_client.chat.completions.create( | ||
model=model, | ||
messages=messages, | ||
temperature=config.temperature, | ||
stream=stream, | ||
max_tokens=4096, | ||
) | ||
else: | ||
# This makes it slightly easier when using the litellm proxy or models outside of OpenAI | ||
if response_format["type"] == "text": | ||
response = await self.async_client.chat.completions.create( | ||
model=model, | ||
messages=messages, | ||
temperature=config.temperature, | ||
stream=stream, | ||
max_tokens=4096, | ||
) | ||
else: | ||
response = await self.async_client.chat.completions.create( | ||
model=model, | ||
messages=messages, | ||
temperature=config.temperature, | ||
stream=stream, | ||
response_format=response_format, | ||
max_tokens=4096, | ||
) | ||
|
||
# We have to cast response since pyright isn't smart enough to connect | ||
# the dots between stream and the overloaded create function | ||
response = await self.spice_client.call_llm( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the removal of the detailed OpenAI client configuration and the switch to Spice, ensure that all necessary Spice client configurations are properly set, especially those related to API keys and endpoints. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding a detailed comment explaining the workaround for the |
||
model=model, | ||
messages=messages, | ||
stream=stream, | ||
temperature=config.temperature, | ||
response_format=response_format, | ||
) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ensure comprehensive testing of the |
||
if not stream: | ||
time_elapsed = default_timer() - start_time | ||
response_tokens = count_tokens( | ||
cast(ChatCompletion, response).choices[0].message.content or "", | ||
response.text, | ||
model, | ||
full_message=False, | ||
) | ||
cost_tracker.log_api_call_stats(tokens, response_tokens, model, time_elapsed) | ||
else: | ||
cost_tracker.last_api_call = "" | ||
response = cost_tracker.response_logger_wrapper( | ||
tokens, cast(AsyncStream[ChatCompletionChunk], response), model | ||
) | ||
# TODO: replace this tracking for stream | ||
# response = cost_tracker.response_logger_wrapper( | ||
# tokens, cast(AsyncStream[ChatCompletionChunk], response), model | ||
# ) | ||
|
||
return response | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,7 +71,7 @@ async def revise_edit(file_edit: FileEdit): | |
style="info", | ||
) | ||
response = await ctx.llm_api_handler.call_llm_api(messages, model=ctx.config.model, stream=False) | ||
message = response.choices[0].message.content or "" | ||
message = response.text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the change to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the change to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the change to use |
||
messages.append(ChatCompletionAssistantMessageParam(content=message, role="assistant")) | ||
ctx.conversation.add_transcript_message( | ||
ModelMessage(message=message, prior_messages=messages, message_type="revisor") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,14 +72,14 @@ def check_version(): | |
def check_model(): | ||
ctx = SESSION_CONTEXT.get() | ||
model = ctx.config.model | ||
if "gpt-4" not in model: | ||
if "gpt-4" not in model and "opus" not in model: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The update to the model compatibility warning is important for user guidance. Ensure that this message is clear and accurately reflects the models supported by Mentat. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The updated warnings for model compatibility are crucial for guiding users effectively. Ensure that these messages are displayed prominently in the UI and consider adding a link to documentation or a FAQ section for users who wish to learn more about model compatibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The updated warnings for model compatibility are important for guiding users effectively. Ensure that these messages are displayed prominently in the UI and consider adding a link to documentation or a FAQ section for users who wish to learn more about model compatibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The updated warnings for model compatibility are important for guiding users effectively. Ensure that these messages are displayed prominently in the UI and consider adding a link to documentation or a FAQ section for users who wish to learn more about model compatibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the updated model compatibility warnings, consider adding more detailed guidance or links to documentation within the warning messages to help users understand the implications of using unsupported models. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The update to the model compatibility warning is important for user guidance. Ensure that this message is clear and accurately reflects the models supported by Mentat. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The update to the model compatibility warning is important for user guidance. Ensure that this message is clear and accurately reflects the models supported by Mentat. |
||
ctx.stream.send( | ||
"Warning: Mentat has only been tested on GPT-4. You may experience" | ||
" issues with quality. This model may not be able to respond in" | ||
" mentat's edit format.", | ||
"Warning: The only recommended models are GPT-4 and Claude 3 Opus. " | ||
"You may experience issues with quality. This model may not be able to " | ||
"respond in mentat's edit format.", | ||
style="warning", | ||
) | ||
if "gpt-3.5" not in model: | ||
if "gpt-3.5" not in model and "claude-3" not in model: | ||
ctx.stream.send( | ||
"Warning: Mentat does not know how to calculate costs or context" " size for this model.", | ||
style="warning", | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -89,9 +89,9 @@ def create_viewer(transcripts: list[Transcript]) -> Path: | |||
|
||||
|
||||
async def add_newline( | ||||
iterator: AsyncIterator[ChatCompletionChunk], | ||||
iterator: AsyncIterator[str], | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After modifying the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After modifying the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The change in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After modifying the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After modifying the |
||||
role: Optional[Literal["system", "user", "assistant", "tool"]] = "assistant", | ||||
) -> AsyncIterator[ChatCompletionChunk]: | ||||
) -> AsyncIterator[str]: | ||||
""" | ||||
The model often doesn't end it's responses in a newline; | ||||
adding a newline makes it significantly easier for us to parse. | ||||
|
@@ -101,20 +101,7 @@ async def add_newline( | |||
last_chunk = chunk | ||||
yield chunk | ||||
if last_chunk is not None: | ||||
yield ChatCompletionChunk( | ||||
id=last_chunk.id, | ||||
choices=[ | ||||
Choice( | ||||
delta=ChoiceDelta(content="\n", role=role), | ||||
finish_reason=last_chunk.choices[0].finish_reason, | ||||
index=0, | ||||
) | ||||
], | ||||
created=last_chunk.created, | ||||
model=last_chunk.model, | ||||
object=last_chunk.object, | ||||
system_fingerprint=last_chunk.system_fingerprint, | ||||
) | ||||
yield "\n" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The change in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Consider using a more robust method for determining if a file is text-encoded. The current approach might not be efficient for large files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The change in |
||||
|
||||
|
||||
def get_relative_path(path: Path, target: Path) -> Path: | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,3 +29,4 @@ typing_extensions==4.8.0 | |
tqdm==4.66.1 | ||
webdriver_manager==4.0.1 | ||
watchfiles==0.21.0 | ||
spice @ git+https://github.com/AbanteAI/spice@main | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider pinning the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To ensure stability and predictable behavior, it's recommended to pin the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider specifying a specific commit hash or version tag for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To ensure stability and predictable behavior, it's recommended to pin the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To ensure stability and predictable behavior, it's recommended to pin the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The change to use
response.text
directly simplifies the code. Ensure that all instances where the LLM API response is processed are updated to this simpler approach.