refuel-ai · nihit · Nov 20, 2023 · Nov 20, 2023
diff --git a/src/autolabel/dataset/dataset.py b/src/autolabel/dataset/dataset.py
@@ -7,9 +7,7 @@
 import logging
 from autolabel.utils import print_table
 from rich.console import Console
-import json
-import pickle
-from autolabel.tasks import TaskFactory
+from autolabel.tasks import TaskFactory, BaseTask
 from autolabel.schema import TaskType
 
 logger = logging.getLogger(__name__)
@@ -118,9 +116,15 @@ def process_labels(
 
         if self.config.task_type() == TaskType.ATTRIBUTE_EXTRACTION:
             for attr in self.config.attributes():
-                self.df[self.generate_label_name("label", attr["name"])] = [
-                    x.label.get(attr["name"], "") for x in llm_labels
-                ]
+                attribute_labels = []
+                for x in llm_labels:
+                    if x.successfully_labeled:
+                        attribute_labels.append(x.label.get(attr["name"], ""))
+                    else:
+                        attribute_labels.append(BaseTask.NULL_LABEL_TOKEN)
+                self.df[
+                    self.generate_label_name("label", attr["name"])
+                ] = attribute_labels
 
         # Add the LLM errors to the dataframe
         self.df[self.generate_label_name("error")] = [x.error for x in llm_labels]
@@ -152,9 +156,17 @@ def process_labels(
             ]
             if self.config.task_type() == TaskType.ATTRIBUTE_EXTRACTION:
                 for attr in self.config.attributes():
-                    self.df[self.generate_label_name("confidence", attr["name"])] = [
-                        x.confidence_score.get(attr["name"], 0.0) for x in llm_labels
-                    ]
+                    attr_confidence_scores = []
+                    for x in llm_labels:
+                        if x.successfully_labeled:
+                            attr_confidence_scores.append(
+                                x.confidence_score.get(attr["name"], 0.0)
+                            )
+                        else:
+                            attr_confidence_scores.append(0.0)
+                    self.df[
+                        self.generate_label_name("confidence", attr["name"])
+                    ] = attr_confidence_scores
 
         # Add the LLM explanations to the dataframe if chain of thought is set in config
         if self.config.chain_of_thought():