Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: double task gen, better logging, disable max retries #38

Merged
merged 15 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions commons/cache/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ async def enqueue(self, data: Any) -> int:
# fuck it and push the data into the queue as well, instead of it being a reference to the persistent data
# this also simplifies dequeuing logic
num_items: int = await self.redis.rpush(queue_key, str_data) # type: ignore

# collect cids for each answer and log successful upload to DB
ids: list[str] = [response["cid"] for response in data["responses"]]
logger.success(f"Pushed Task {ids} to DB")
return num_items
except Exception as exc:
logger.opt(exception=True).error(
Expand Down
2 changes: 1 addition & 1 deletion commons/qa_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def _get_science_answer_examples() -> str:
},
{
"filename": "index.html",
"content": "<!DOCTYPE html><html lang="en"><head><meta http-equiv="Content-Security-Policy" content="default-src 'none'; script-src 'unsafe-inline' https://cdnjs.cloudflare.com https://cdn.jsdelivr.net https://unpkg.com; style-src 'unsafe-inline'; img-src data: blob: https://threejsfundamentals.org; connect-src 'none'; form-action 'none'; base-uri 'none';"><meta charset="utf-8"/><meta content="width=device-width, initial-scale=1.0" name="viewport"/><title>Golf Ball Trajectory Simulator</title><style>body { } #content-wrapper { } ::-webkit-scrollbar { width: 6px; height: 6px; } ::-webkit-scrollbar-track { background: hsla(60, 17%, 0%, 0); } ::-webkit-scrollbar-thumb { background: hsla(175, 100%, 36%, 0.387); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: hsl(175, 100%, 36%); } body { margin: 0; overflow: hidden; font-family: Arial, sans-serif; } #canvas { display: block; } #controls { position: absolute; top: 10px; left: 10px; background: rgba(255,255,255,0.7); padding: 10px; border-radius: 5px; } #controls input { width: 100px; } #swingButton { margin-top: 10px; } #scoreboard { position: absolute; top: 10px; right: 10px; background: rgba(255,255,255,0.7); padding: 10px; border-radius: 5px; }</style></head><body><canvas id="canvas"></canvas><div id="controls"><label>Power: <input id="powerSlider" max="100" min="0" type="range" value="50"/></label><br/><label>Angle: <input id="angleSlider" max="90" min="0" type="range" value="45"/></label><br/><label>Wind Speed: <input id="windSpeedSlider" max="20" min="0" type="range" value="0"/></label><br/><label>Wind Direction: <input id="windDirectionSlider" max="360" min="0" type="range" value="0"/></label><br/><button id="swingButton">Swing!</button></div><div id="scoreboard"><p>Current Distance: <span id="currentDistance">0</span> m</p><p>Best Distance: <span id="bestDistance">0</span> m</p></div><script src="index.js"></script></body></html>",
"content": "<!DOCTYPE html><html lang="en"><head><meta charset="utf-8" /><meta content="width=device-width, initial-scale=1.0" name="viewport" /><title>Golf Ball Trajectory Simulator</title><style>body{margin:0;overflow:hidden;font-family:Arial,sans-serif}#canvas{display:block}#controls{position:absolute;top:10px;left:10px;background:rgba(255,255,255,0.7);padding:10px;border-radius:5px}#controls input{width:100px}#swingButton{margin-top:10px}#scoreboard{position:absolute;top:10px;right:10px;background:rgba(255,255,255,0.7);padding:10px;border-radius:5px}</style></head><body><canvas id="canvas"></canvas><div id="controls"><label>Power:<input id="powerSlider" max="100" min="0" type="range" value="50" /></label><br /><label>Angle:<input id="angleSlider" max="90" min="0" type="range" value="45" /></label><br /><label>Wind Speed:<input id="windSpeedSlider" max="20" min="0" type="range" value="0" /></label><br /><label>Wind Direction:<input id="windDirectionSlider" max="360" min="0" type="range" value="0" /></label><br /><button id="swingButton">Swing!</button></div><div id="scoreboard"><p>Current Distance: <span id="currentDistance">0</span> m</p><p>Best Distance: <span id="bestDistance">0</span> m</p></div><script src="index.js"></script></body></html>",
"language": "html"
}
]
Expand Down
6 changes: 2 additions & 4 deletions commons/routes/synthetic_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import functools
import json

from fastapi import APIRouter, BackgroundTasks
from fastapi import APIRouter
from pydantic import BaseModel

from commons.cache import RedisCache
Expand Down Expand Up @@ -31,7 +31,7 @@ class SyntheticGenResponse(BaseModel):


@synthetic_gen_router.get("/synthetic-gen")
async def generate_synthetic_data(background_tasks: BackgroundTasks):
async def generate_synthetic_data():
try:
num_elems = await cache.get_queue_length()
if num_elems == 0:
Expand All @@ -51,8 +51,6 @@ async def generate_synthetic_data(background_tasks: BackgroundTasks):
except json.JSONDecodeError:
result = {}

background_tasks.add_task(worker.run)

return SyntheticGenResponse(success=True, body=result, error=None)
except Exception as e:
return SyntheticGenResponse(success=False, body={}, error=str(e))
97 changes: 54 additions & 43 deletions commons/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ async def generate_question(
_topic: Topics,
persona: str,
) -> tuple[str | None, Dict | None]:
logger.info(f"Generating question with model: {model}")

MAX_RETRIES = 5
MAX_RETRIES = 0
global used_objects
global previous_coding_question
global used_models
Expand Down Expand Up @@ -146,8 +144,8 @@ async def generate_question(
**kwargs_clone,
},
)
logger.success(f"Completed generating base question: {coding_question}")
previous_coding_question = coding_question
logger.info("Base Question Generation Completed")
return coding_question, kwargs
except RetryError:
logger.error(
Expand Down Expand Up @@ -177,13 +175,14 @@ async def generate_answer(
model: str,
question: str,
topic: Topics,
qa_id: str,
err: str | None = None,
code: str | None = None,
) -> Tuple[str, CodeAnswer | None]:
"""Generates a coding question answer for a given coding question."""
# import commons.config as config

logger.info(f"Generating code answer with model: {model}")
# logger.info(f"{qa_id} Generating code answer with model: {model}")
if bool(err) != bool(code):
raise ValueError("Both error and code must be provided or neither")

Expand Down Expand Up @@ -216,7 +215,7 @@ async def generate_answer(
if model.startswith("openai"):
kwargs["seed"] = random.randint(0, cast(int, 1e9)) # needed for OpenAI

MAX_RETRIES = 2
MAX_RETRIES = 0
# try generating until max retries, then switch models
try:
async for attempt in AsyncRetrying(
Expand All @@ -243,11 +242,10 @@ async def generate_answer(
},
)
# logger.warning(f"@@@@@ generate_answer(): {response_model} \n")
logger.info(f"{qa_id} Answer Generation Completed ")
return model, response_model
except RetryError:
logger.error(
f"Failed to generate answer after {MAX_RETRIES} attempts. Switching model."
)
logger.error(f"{qa_id} Failed after {MAX_RETRIES} attempts. Switching model.")
raise
# used_models.add(model)
# remaining_models = [m for m in config.ANSWER_MODELS if m not in used_models]
Expand All @@ -258,7 +256,7 @@ async def generate_answer(
# new_model = random.choice(remaining_models)
# return await generate_answer(client, new_model, question, topic=topic)
except Exception as e:
logger.error(f"Error occurred while generating code answer: {e}")
logger.error(f"Error occurred while generating {qa_id} answer: {e}")

return model, None

Expand Down Expand Up @@ -290,17 +288,18 @@ async def augment_question(
question: str,
augmentation_level: AugmentationLevel,
topic: Topics,
) -> str:
) -> tuple[str, str]:
"""Augment the question with the given model and augmentation level."""
logger.info(
f"Augmenting question with model and augmentation: {model}, {augmentation_level}"
)

augmentation_prompt = ""
preamble = """
<system>
You are an LLM specializing in modifying existing coding questions to create similar yet distinct versions. Ultimately the new edited questions that you generate will be implemented by a programming agent. As such, use your vast knowledge of UX and software engineering principles to make intelligent yet distinguishable modifications.
</system>
"""
# create unique qa_id
qa_id = str(uuid.uuid4())

if augmentation_level == AugmentationLevel.REMOVE_REQUIREMENTS:
augmentation_prompt = f"You must remove any 1 requirement from the following question: {question}. Ensure that the requirement you remove will not break the functionality of the remaining requirements."
elif augmentation_level == AugmentationLevel.ADD_REQUIREMENTS:
Expand All @@ -319,7 +318,7 @@ async def augment_question(
"augmentation_level": augmentation_level,
}
)
return question
return question, qa_id

kwargs = {
"response_model": CodingQuestion,
Expand All @@ -337,24 +336,28 @@ async def augment_question(

if model.startswith("openai"):
kwargs["seed"] = random.randint(0, int(1e9)) # needed for OpenAI
response_model = await client.chat.completions.create(**kwargs)

kwargs_clone = kwargs.copy()
kwargs_clone["response_model"] = kwargs["response_model"].model_json_schema()
langfuse_context.update_current_observation(
input=kwargs_clone.pop("messages"),
model=model,
output=response_model.model_dump(),
# usage=log_llm_usage(response_model.usage),
metadata={
"topic": topic,
"question": question,
"augmentation_level": augmentation_level,
**kwargs_clone,
},
)
logger.success(f"Completed generating augmentation {augmentation_level}")
return response_model.question
try:
response_model = await client.chat.completions.create(**kwargs)

kwargs_clone = kwargs.copy()
kwargs_clone["response_model"] = kwargs["response_model"].model_json_schema()
langfuse_context.update_current_observation(
input=kwargs_clone.pop("messages"),
model=model,
output=response_model.model_dump(),
# usage=log_llm_usage(response_model.usage),
metadata={
"topic": topic,
"question": question,
"augmentation_level": augmentation_level,
**kwargs_clone,
},
)
logger.info(f"{qa_id} {augmentation_level} Completed")
return response_model.question, qa_id
except Exception as e:
logger.error(f"{qa_id}: failed to augment question: {e}")
raise RuntimeError from (e)


def build_single_index_html(ans: CodeAnswer) -> CodeAnswer:
Expand Down Expand Up @@ -459,9 +462,12 @@ async def _generate_response(
model: str,
question: str,
topic: Topics,
qa_id: str,
level: AugmentationLevel | None = None,
):
model, result = await generate_answer(client, model, question, topic=topic)
model, result = await generate_answer(
client, model, question, topic=topic, qa_id=qa_id
)
if result is None:
raise ValueError("generate_answer() returned none")
# TODO remove after testing ensure single index.html file just for now
Expand Down Expand Up @@ -492,7 +498,7 @@ async def _generate_response(
else:
raise ValueError("No index.html file found in the result")

return model, result, level
return model, result, level, qa_id

# 1. get random persona from hugging face
persona = get_random_persona()
Expand All @@ -512,7 +518,11 @@ async def _generate_response(
augmented_prompts = []
if response_strategy == ResponseStrategy.NO_AUGMENTATION:
for model in answer_models:
tasks.append(_generate_response(model, question_prompt, selected_topic[0]))
tasks.append(
_generate_response(
model, question_prompt, selected_topic[0], qa_id="placeholder"
)
)
elif response_strategy == ResponseStrategy.AUGMENTATION_DETERIORIATE:
# 4. augment questions
# if augmenting, use same model for both question and answer generation
Expand All @@ -521,7 +531,7 @@ async def _generate_response(
assert type(answer_models) is str

for level in AugmentationLevel:
augmented_question = await augment_question(
augmented_question, qa_id = await augment_question(
client, question_model, question_prompt, level, selected_topic[0]
)
augmented_prompts.append(
Expand All @@ -534,16 +544,17 @@ async def _generate_response(
augmented_question,
topic=selected_topic[0],
level=level,
qa_id=qa_id,
)
)

results: list[
tuple[str, CodeAnswer | None, AugmentationLevel | None]
tuple[str, CodeAnswer | None, AugmentationLevel | None, str]
] = await asyncio.gather(*tasks)

responses = []
synthetic_ground_truth: dict[str, int] = {}
for model, result, level in results:
for model, result, level, qa_id in results:
if not result:
raise RuntimeError("Error generating prompt-response pair")

Expand All @@ -555,18 +566,18 @@ async def _generate_response(
}
for file in result.files
]
completion_id = str(uuid.uuid4())

responses.append(
{
"model": model,
"completion": {"files": formatted_files},
"cid": completion_id,
"cid": qa_id,
}
)

if level:
logger.debug(f"{model=},{completion_id=}, {level=}")
synthetic_ground_truth[completion_id] = level.value
logger.debug(f"{model=},{qa_id=}, {level=}")
synthetic_ground_truth[qa_id] = level.value

return {
"prompt": question_prompt,
Expand Down
Loading