Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

process labels for attribute extraction when llm does not return a valid response #638

Merged
merged 1 commit into from
Nov 20, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions src/autolabel/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
import logging
from autolabel.utils import print_table
from rich.console import Console
import json
import pickle
from autolabel.tasks import TaskFactory
from autolabel.tasks import TaskFactory, BaseTask
from autolabel.schema import TaskType

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -118,9 +116,15 @@ def process_labels(

if self.config.task_type() == TaskType.ATTRIBUTE_EXTRACTION:
for attr in self.config.attributes():
self.df[self.generate_label_name("label", attr["name"])] = [
x.label.get(attr["name"], "") for x in llm_labels
]
attribute_labels = []
for x in llm_labels:
if x.successfully_labeled:
attribute_labels.append(x.label.get(attr["name"], ""))
else:
attribute_labels.append(BaseTask.NULL_LABEL_TOKEN)
self.df[
self.generate_label_name("label", attr["name"])
] = attribute_labels

# Add the LLM errors to the dataframe
self.df[self.generate_label_name("error")] = [x.error for x in llm_labels]
Expand Down Expand Up @@ -152,9 +156,17 @@ def process_labels(
]
if self.config.task_type() == TaskType.ATTRIBUTE_EXTRACTION:
for attr in self.config.attributes():
self.df[self.generate_label_name("confidence", attr["name"])] = [
x.confidence_score.get(attr["name"], 0.0) for x in llm_labels
]
attr_confidence_scores = []
for x in llm_labels:
if x.successfully_labeled:
attr_confidence_scores.append(
x.confidence_score.get(attr["name"], 0.0)
)
else:
attr_confidence_scores.append(0.0)
self.df[
self.generate_label_name("confidence", attr["name"])
] = attr_confidence_scores

# Add the LLM explanations to the dataframe if chain of thought is set in config
if self.config.chain_of_thought():
Expand Down
Loading