Fix handling of nan values

91d68179 · finn · 639a1aad · 91d68179
Commit 91d68179 authored 1 year ago by finn
--- a/metrics/slot_accuracy/slot_accuracy.py
+++ b/metrics/slot_accuracy/slot_accuracy.py
@@ -55,9 +55,17 @@ def slot_accuracy(domain_knowledge, annotation):
    Returns:
    float: Slot accuracy score.
    """
+
+    if pd.isna(domain_knowledge) or pd.isna(annotation):
+        # Handle the nan values 
+        return 0
+        
+    domain_knowledge = domain_knowledge.replace('null', 'None')
+    annotation = annotation.replace('null', 'None')
+
    domain_knowledge = ast.literal_eval(domain_knowledge)
    annotation = ast.literal_eval(annotation)
-
+        
    count = {"True": 0, "False": 0, "Total": 0}
    
    for detail in domain_knowledge["Details"]:
@@ -88,6 +96,7 @@ def main(input_file, output_file):
    input_file (str): Path to the input CSV file.
    output_file (str): Path to the output CSV file.
    """
+    
    df = pd.read_csv(input_file, sep=',', quoting=csv.QUOTE_NONE, escapechar='/', index_col=False)
    df['slot_accuracy_original'] = df.apply(lambda row: slot_accuracy(row['Domain Knowledge'], row['original_annotation']), axis=1)
    df['slot_accuracy_generated'] = df.apply(lambda row: slot_accuracy(row['Domain Knowledge'], row['generated_annotation']), axis=1)