From 1f9f8ab3f2ea6c8876820ff181f8e1e32652a2bb Mon Sep 17 00:00:00 2001 From: Arjun Bansal Date: Fri, 1 Sep 2023 15:57:29 -0700 Subject: [PATCH] add metrics --- data/compute_metrics.py | 74 +++++++++++++++++++++++++++++++++++++++++ data/json2l.py | 17 ++++++++++ 2 files changed, 91 insertions(+) create mode 100644 data/compute_metrics.py create mode 100644 data/json2l.py diff --git a/data/compute_metrics.py b/data/compute_metrics.py new file mode 100644 index 0000000..c5a0c4d --- /dev/null +++ b/data/compute_metrics.py @@ -0,0 +1,74 @@ +import json +from collections import defaultdict + +# Paths to the ground truth and model output JSON files +ground_truth_path = 'testset-v1.json' + +model_output_path = 'gpt-3.5-turbo-testset-v1.json' +# Accuracy: 0.7107 +# Macro-Averaged Precision: 0.5879 +# Macro-Averaged Recall: 0.5736 +# Macro-Averaged F1 Score: 0.5755 + +#model_output_path = 'pandalm-7b-testset-v1.json' +# Accuracy: 0.6677 +# Macro-Averaged Precision: 0.5738 +# Macro-Averaged Recall: 0.5750 +# Macro-Averaged F1 Score: 0.5743 + +def get_majority_vote(*votes): + vote_count = defaultdict(int) + for vote in votes: + vote_count[vote] += 1 + return max(vote_count, key=vote_count.get) + +def compute_metrics(ground_truth_path, model_output_path): + with open(ground_truth_path, 'r') as gt_file, open(model_output_path, 'r') as mo_file: + ground_truths = json.load(gt_file) + model_outputs = json.load(mo_file) + + confusion_matrix = defaultdict(lambda: defaultdict(int)) + + for gt, mo in zip(ground_truths, model_outputs): + majority_vote = get_majority_vote(gt['annotator1'], gt['annotator2'], gt['annotator3']) + if 'gpt' in model_output_path: + model_prediction = mo['gpt_result'] + elif 'panda' in model_output_path: + model_prediction = mo['pandalm_result'] + if model_prediction == "Tie" or model_prediction == "tie" or model_prediction =="garbage": + model_prediction = 0 + else: + model_prediction = int(model_prediction) + + confusion_matrix[majority_vote][model_prediction] += 1 + + # Accuracy calculation + possible_classes = [0, 1, 2] + total_true_positives = sum(confusion_matrix[i][i] for i in possible_classes) + total_instances = sum(sum(confusion_matrix[i].values()) for i in possible_classes) + accuracy = total_true_positives / total_instances + + metrics = {} + + for cls in possible_classes: + TP = confusion_matrix[cls][cls] + FP = sum(confusion_matrix[x][cls] for x in possible_classes) - TP + FN = sum(confusion_matrix[cls][x] for x in possible_classes) - TP + precision = TP / (TP + FP) if (TP + FP) != 0 else 0 + recall = TP / (TP + FN) if (TP + FN) != 0 else 0 + f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) != 0 else 0 + + metrics[cls] = {'precision': precision, 'recall': recall, 'f1': f1} + + macro_avg = {} + for metric in ['precision', 'recall', 'f1']: + macro_avg[metric] = sum(metrics[cls][metric] for cls in possible_classes) / 3 + + return accuracy, macro_avg + + +accuracy, macro_avg = compute_metrics(ground_truth_path, model_output_path) +print(f"Accuracy: {accuracy:.4f}") +print(f"Macro-Averaged Precision: {macro_avg['precision']:.4f}") +print(f"Macro-Averaged Recall: {macro_avg['recall']:.4f}") +print(f"Macro-Averaged F1 Score: {macro_avg['f1']:.4f}") diff --git a/data/json2l.py b/data/json2l.py new file mode 100644 index 0000000..273fb99 --- /dev/null +++ b/data/json2l.py @@ -0,0 +1,17 @@ +import json + +def convert_json_to_jsonl(json_file_path, jsonl_file_path): + with open(json_file_path, 'r') as json_file: + data = json.load(json_file) + + if not isinstance(data, list): + raise ValueError("The JSON file should contain a list of objects") + + with open(jsonl_file_path, 'w') as jsonl_file: + for item in data: + jsonl_file.write(json.dumps(item) + '\n') + + print(f"Converted {json_file_path} to {jsonl_file_path}") + +# Example usage: +convert_json_to_jsonl('train.json', 'train.jsonl')