From aff3b5f4dde7064486867fa80705046bcec983f0 Mon Sep 17 00:00:00 2001
From: Jamie Ip <jamieip@berkeley.edu>
Date: Wed, 11 Nov 2020 18:52:02 -0800
Subject: [PATCH 1/8] edited my created tests

---
 consensus_and_scoring/test/test_IAA_jamie.py | 30 +++++++++++++-------
 consensus_and_scoring/test/test_dep_jamie.py |  2 +-
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/consensus_and_scoring/test/test_IAA_jamie.py b/consensus_and_scoring/test/test_IAA_jamie.py
index a452417..5f21bc7 100644
--- a/consensus_and_scoring/test/test_IAA_jamie.py
+++ b/consensus_and_scoring/test/test_IAA_jamie.py
@@ -39,25 +39,33 @@ def test_user_highlighting_consensus(config, tmpdir):
 #N users on schema v1 and N users on schema v2--ensure output rows identical
 def test_diff_schemas(config, tmpdir):
     test_path = test_utils.make_test_directory(config, 'test_diff_schemas')
-    out_path = test_utils.make_test_directory(config, 'out_test_diff_schemas')
+    out_path = test_utils.make_test_directory(config, 'test_diff_schemas_out')
     #Covid_Evidence2020_03_21_copy is a copy with Q13 set to Ordinal, which should be detected as a new schema
     for x in [('jamietest_old', 'Covid_Evidence2020_03_21'), ('jamietest_new', 'Covid_Evidence2020_03_21_copy')]:
         dh = datahunt(out_folder=test_path, source_task_id = x[0])
         dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': x[1], 'contributor_uuid':'A'})
-        dh.add_row({'answer_label': 'T1.Q1.A3', 'namespace': x[1], 'contributor_uuid':'B'})
-        dh.add_row({'answer_label': 'T1.Q3.A1', 'namespace': x[1], 'contributor_uuid':'C'})
-        dh.add_row({'answer_label': 'T1.Q14.A1', 'namespace': x[1], 'contributor_uuid':'D'})
-        dh.add_row({'answer_label': 'T1.Q14.A10', 'namespace': x[1], 'contributor_uuid':'E'})
-        dh.add_row({'answer_label': 'T1.Q14.A10', 'namespace': x[1], 'contributor_uuid':'F'})
+        dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': x[1], 'contributor_uuid':'A'})
+        dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': x[1], 'contributor_uuid':'B'})
+        dh.add_row({'answer_label': 'T1.Q1.A3', 'namespace': x[1], 'contributor_uuid':'A'})
+        dh.add_row({'answer_label': 'T1.Q2.A1', 'namespace': x[1], 'contributor_uuid':'A'})
+        dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': x[1], 'contributor_uuid':'A'})
+        dh.add_row({'answer_label': 'T1.Q2.A8', 'namespace': x[1], 'contributor_uuid':'A'})
+        dh.add_row({'answer_label': 'T1.Q2.A7', 'namespace': x[1], 'contributor_uuid':'B'})
+        dh.add_row({'answer_label': 'T1.Q2.A8', 'namespace': x[1], 'contributor_uuid':'B'})
+        dh.add_row({'answer_label': 'T1.Q3.A1', 'namespace': x[1], 'contributor_uuid':'A'})
         fin_path = dh.export()
         data_path = config['data_dir']
         schema_path = config['persistent_test_dir']+'/schemas'
 
     iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory = out_path)
     for root, dir, files in os.walk(iaa_out):
-        out_df_old  = pd.read_csv(os.path.join(iaa_out, files[0]), encoding='utf-8')
-        out_df_new  = pd.read_csv(os.path.join(iaa_out, files[1]), encoding='utf-8')
-    out_df_new = out_df_new.drop(['schema_sha256', 'namespace'], axis=1)
-    out_df_old = out_df_old.drop(['schema_sha256', 'namespace'], axis=1)
+        out_df_old  = pd.read_csv(os.path.join(iaa_out, 'DataHunt_jamietest_old.IAA-Default-Tags.csv'), encoding='utf-8')
+        out_df_new  = pd.read_csv(os.path.join(iaa_out, 'DataHunt_jamietest_new.IAA-Default-Tags.csv'), encoding='utf-8')
 
-    assert out_df_old.equals(out_df_new)
+    assert out_df_old.equals(out_df_new) == False
+
+    schema_columns = ['article_sha256', 'article_id', 'schema_sha256', 'namespace']
+    out_df_old = out_df_old.drop(schema_columns, axis=1)
+    out_df_new = out_df_new.drop(schema_columns, axis=1)
+
+    assert out_df_old.equals(out_df_new) == True
diff --git a/consensus_and_scoring/test/test_dep_jamie.py b/consensus_and_scoring/test/test_dep_jamie.py
index 16852e4..c4f6402 100644
--- a/consensus_and_scoring/test/test_dep_jamie.py
+++ b/consensus_and_scoring/test/test_dep_jamie.py
@@ -60,7 +60,7 @@ def test_bad_parent(config):
     iaa_files_path = test_utils.make_test_directory(config, 'dep_bad_dad')
     out_path = test_utils.make_test_directory(config, 'dep_bad_dad_out')
 
-    parents = {1:[2], 2:[3,4,5,7,8], 5:[6], 9:[10,11]}
+    parents = {1:[2], 2:[3,4,5], 5:[6], 9:[10,11]}
     childNumAnswers = {2:9, 3:1, 4:6, 5:5, 6:3, 7:1, 8:5, 10:5, 11:5}
     for parent in parents:
         iaa = IAA_task(out_folder=iaa_files_path, source_task_id='gru' + str(parent))

From 8cc3eef3086625a111d399d41e2402e1b5f69e7e Mon Sep 17 00:00:00 2001
From: Jamie Ip <jamieip@berkeley.edu>
Date: Wed, 11 Nov 2020 21:58:41 -0800
Subject: [PATCH 2/8] laid groundwork for agreemnt scores

---
 consensus_and_scoring/AgreementScoring.py     | 12 ++++++
 consensus_and_scoring/Dependency.py           |  3 ++
 .../test/test_agreement_score.py              | 37 +++++++++++++++++++
 3 files changed, 52 insertions(+)
 create mode 100644 consensus_and_scoring/AgreementScoring.py
 create mode 100644 consensus_and_scoring/test/test_agreement_score.py

diff --git a/consensus_and_scoring/AgreementScoring.py b/consensus_and_scoring/AgreementScoring.py
new file mode 100644
index 0000000..59d5e3d
--- /dev/null
+++ b/consensus_and_scoring/AgreementScoring.py
@@ -0,0 +1,12 @@
+import pandas as pd
+import numpy as np
+
+def AgreementScore(iaaData, schemaPath):
+    print("AGREEMENT SCORING TIME!!!")
+    print("OLD AGREEMENT SCORES:")
+    print(iaaData['agreement_score'])
+    #TODO: AGREEMENT SCORE CHANGES HERE
+    #iaaData['agreement_score'] = np.zeros(3)
+    print("NEW AGREEMENT SCORES:")
+    print(iaaData['agreement_score'])
+    return iaaData
diff --git a/consensus_and_scoring/Dependency.py b/consensus_and_scoring/Dependency.py
index 4d82197..87e6524 100644
--- a/consensus_and_scoring/Dependency.py
+++ b/consensus_and_scoring/Dependency.py
@@ -4,6 +4,7 @@
 import os
 import json
 from dataV3 import *
+from AgreementScoring import *
 
 def eval_dependency(directory, iaa_dir, schema_dir, out_dir):
     print("DEPENDENCY STARTING")
@@ -132,6 +133,8 @@ def handleDependencies(schemaPath, iaaPath, out_dir):
                     indices = merge_indices(row_indices, indices).tolist()
                     iaaData.at[row, 'highlighted_indices'] = json.dumps(indices)
 
+    iaaData = AgreementScore(iaaData, schemaPath)
+
     print('exporting to csv')
     path, name = get_path(iaaPath)
     outputpath  = os.path.join(out_dir, 'Dep_'+name)
diff --git a/consensus_and_scoring/test/test_agreement_score.py b/consensus_and_scoring/test/test_agreement_score.py
new file mode 100644
index 0000000..c61f6e5
--- /dev/null
+++ b/consensus_and_scoring/test/test_agreement_score.py
@@ -0,0 +1,37 @@
+import sys
+import os
+import pandas as pd
+
+import test_utils
+from filegen_utils import *
+from Dependency import *
+import conftest
+
+#REFERENCE: in Evidence, parents = {1.1:[2], 1.2:[2], 2.1:[4], 2.5:[4,5], 2.8:[3], 5.1:[6], 5.2:[6], 5.3:[6], 9.1:[10,11], 9.2:[10,11]}
+def test_dep_sample(config):
+    iaa_files_path = test_utils.make_test_directory(config, 'dep_sample')
+    out_path = test_utils.make_test_directory(config, 'out_dep_sample')
+    # source_task_id generated by smashing keyboard
+    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='kjncsa87nxao21899102j1j2')
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 1, "agreed_Answer": 1,  "agreement_score": .75})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 1, "agreed_Answer": 1,  "agreement_score": .75})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 1, "agreed_Answer": 1,  "agreement_score": .75})
+    fin_path = iaa.export()
+    data_path = config['data_dir']
+    schema_path = data_path + '/schemas'
+    dh_path = None #doesn't get used by dependency but is still an argument
+
+    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
+
+    for root, dir, files in os.walk(out_path):
+        for file in files:
+            #should be only 1 file for this case, so just run it on the only one
+            # if there's more than 1 then you can get fancy
+            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
+    #9 answer choices to a checklist question
+    # assert len(out_df) == 2
+    # q_three = out_df[out_df['question_Number']==2]
+    # hl = q_three['highlighted_indices'].iloc[0]
+    # assert len(hl) >18
+    # assert '10' in hl
+    # assert '29' in hl

From 3e4a43d8be7f46c35b8771d2acc1a6bb1e3e04ea Mon Sep 17 00:00:00 2001
From: Jamie Ip <jamieip@berkeley.edu>
Date: Thu, 12 Nov 2020 13:54:34 -0800
Subject: [PATCH 3/8] implemented agremeent scoring by parent weights

---
 consensus_and_scoring/AgreementScoring.py     | 42 +++++++++++++++++--
 .../test/test_agreement_score.py              | 34 ++++++++++++---
 2 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/consensus_and_scoring/AgreementScoring.py b/consensus_and_scoring/AgreementScoring.py
index 59d5e3d..55fa407 100644
--- a/consensus_and_scoring/AgreementScoring.py
+++ b/consensus_and_scoring/AgreementScoring.py
@@ -1,12 +1,48 @@
 import pandas as pd
 import numpy as np
+import re
+from dataV3 import create_dependencies_dict
 
 def AgreementScore(iaaData, schemaPath):
     print("AGREEMENT SCORING TIME!!!")
     print("OLD AGREEMENT SCORES:")
-    print(iaaData['agreement_score'])
+    print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
     #TODO: AGREEMENT SCORE CHANGES HERE
-    #iaaData['agreement_score'] = np.zeros(3)
+    schemData = pd.read_csv(schemaPath, encoding = 'utf-8')
+    dependencies = create_dependencies_dict(schemData)
+    iaaQuestions = iaaData['question_Number'].tolist()
+    for child in dependencies.keys():
+        if child not in iaaQuestions:
+            continue
+        parents = dependencies[child].keys()
+        #TODO: clean this up
+        temp = []
+        for parent in parents:
+            answers = dependencies[child][parent]
+            parentScores = iaaData[(iaaData['question_Number'] == parent)]
+            parentScores = parentScores[parentScores['agreed_Answer'].astype(int).isin(answers)]
+            temp.append(np.mean(parentScores['agreement_score']))
+        avgParentScores = np.mean(temp)
+        iaaData['agreement_score'] = np.where(iaaData['question_Number'] == child, iaaData['agreement_score'] * avgParentScores, iaaData['agreement_score'])
+        #iaaData['agreement_score'] = np.zeros(3)
     print("NEW AGREEMENT SCORES:")
-    print(iaaData['agreement_score'])
+    print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
     return iaaData
+
+# Creates a dictionary of Parent Question: Answer: Child Questions
+# ex. {1: {1: [2], 2: [2]}, 2: {1: [4], 5: [4, 5], 8: [3]}, 5: {1: [6], 2: [6], 3: [6]}, 9: {1: [10, 11], 2: [10, 11]}}
+# T1.Q1.A1 changes T1.Q2, etc.
+# def create_parents_dict(schemadata):
+#     df = schemadata[schemadata['answer_next_questions'].notna()]
+#     parents = df['answer_label'].tolist()
+#     children = df['answer_next_questions'].tolist()
+#     dependencies = {}
+#     for i in range(len(parents)):
+#         parent_q = int(re.findall(r"Q(\d+)", parents[i])[0])
+#         parent_a = int(re.findall(r"A(\d+)", parents[i])[0])
+#         child_q = [int(q) for q in re.findall(r"Q(\d+)", children[i])]
+#         if parent_q not in dependencies:
+#             dependencies[parent_q] = {parent_a:child_q}
+#         else:
+#             dependencies[parent_q][parent_a] = child_q
+#     return dependencies
diff --git a/consensus_and_scoring/test/test_agreement_score.py b/consensus_and_scoring/test/test_agreement_score.py
index c61f6e5..0eb945e 100644
--- a/consensus_and_scoring/test/test_agreement_score.py
+++ b/consensus_and_scoring/test/test_agreement_score.py
@@ -9,13 +9,13 @@
 
 #REFERENCE: in Evidence, parents = {1.1:[2], 1.2:[2], 2.1:[4], 2.5:[4,5], 2.8:[3], 5.1:[6], 5.2:[6], 5.3:[6], 9.1:[10,11], 9.2:[10,11]}
 def test_dep_sample(config):
-    iaa_files_path = test_utils.make_test_directory(config, 'dep_sample')
-    out_path = test_utils.make_test_directory(config, 'out_dep_sample')
+    iaa_files_path = test_utils.make_test_directory(config, 'agscore_sample')
+    out_path = test_utils.make_test_directory(config, 'agscore_sample_out')
     # source_task_id generated by smashing keyboard
-    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='kjncsa87nxao21899102j1j2')
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 1, "agreed_Answer": 1,  "agreement_score": .75})
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 1, "agreed_Answer": 1,  "agreement_score": .75})
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 1, "agreed_Answer": 1,  "agreement_score": .75})
+    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='agscore_test')
+    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":1,  "agreement_score":.5, 'highlighted_indices':test_utils.make_highlight_indices(10,30)})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 1,  "agreement_score": .5})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 4, "agreed_Answer": 1,  "agreement_score": .5})
     fin_path = iaa.export()
     data_path = config['data_dir']
     schema_path = data_path + '/schemas'
@@ -35,3 +35,25 @@ def test_dep_sample(config):
     # assert len(hl) >18
     # assert '10' in hl
     # assert '29' in hl
+
+def test_dep_sample2(config):
+    iaa_files_path = test_utils.make_test_directory(config, 'agscore_sample2')
+    out_path = test_utils.make_test_directory(config, 'agscore_sample2_out')
+    # source_task_id generated by smashing keyboard
+    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='agscore_test')
+    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":1,  "agreement_score":.5})
+    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":2,  "agreement_score":1})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 1,  "agreement_score": 1})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 2,  "agreement_score": .5})
+    fin_path = iaa.export()
+    data_path = config['data_dir']
+    schema_path = data_path + '/schemas'
+    dh_path = None #doesn't get used by dependency but is still an argument
+
+    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
+
+    for root, dir, files in os.walk(out_path):
+        for file in files:
+            #should be only 1 file for this case, so just run it on the only one
+            # if there's more than 1 then you can get fancy
+            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')

From ceea4f342b185f5219c488d2a5ee867bc82b86f2 Mon Sep 17 00:00:00 2001
From: Jamie Ip <jamieip@berkeley.edu>
Date: Thu, 19 Nov 2020 00:22:47 -0800
Subject: [PATCH 4/8] working on highlighting agreement scoring

---
 consensus_and_scoring/AgreementScoring.py    | 44 +++++++++++++-------
 consensus_and_scoring/ChecklistCoding.py     |  6 ++-
 consensus_and_scoring/test/test_IAA_basic.py | 37 ++++++++++++++++
 3 files changed, 72 insertions(+), 15 deletions(-)
 create mode 100644 consensus_and_scoring/test/test_IAA_basic.py

diff --git a/consensus_and_scoring/AgreementScoring.py b/consensus_and_scoring/AgreementScoring.py
index 55fa407..afbb864 100644
--- a/consensus_and_scoring/AgreementScoring.py
+++ b/consensus_and_scoring/AgreementScoring.py
@@ -2,6 +2,21 @@
 import numpy as np
 import re
 from dataV3 import create_dependencies_dict
+from nltk import agreement
+
+def highlightAgreementScore(starts, ends):
+    print("HIGHLIGHT AGREEMENT SCORING TIME!!!")
+    return 666
+
+coder1 = [1,0,2,0,1,1,2,0,1,1]
+coder2 = [1,1,0,0,1,1,2,1,1,0]
+coder3 = [1,2,2,1,2,1,2,1,1,0]
+formatted_codes = [[1,i,coder1[i]] for i in range(len(coder1))] + [[2,i,coder2[i]] for i in range(len(coder2))]  + [[3,i,coder3[i]] for i in range(len(coder3))]
+ratingtask = agreement.AnnotationTask(data=formatted_codes)
+
+print('Fleiss\'s Kappa:',ratingtask.multi_kappa())
+print('Krippendorff\'s alpha:',ratingtask.alpha())
+print('Scott\'s pi:',ratingtask.pi())
 
 def AgreementScore(iaaData, schemaPath):
     print("AGREEMENT SCORING TIME!!!")
@@ -32,17 +47,18 @@ def AgreementScore(iaaData, schemaPath):
 # Creates a dictionary of Parent Question: Answer: Child Questions
 # ex. {1: {1: [2], 2: [2]}, 2: {1: [4], 5: [4, 5], 8: [3]}, 5: {1: [6], 2: [6], 3: [6]}, 9: {1: [10, 11], 2: [10, 11]}}
 # T1.Q1.A1 changes T1.Q2, etc.
-# def create_parents_dict(schemadata):
-#     df = schemadata[schemadata['answer_next_questions'].notna()]
-#     parents = df['answer_label'].tolist()
-#     children = df['answer_next_questions'].tolist()
-#     dependencies = {}
-#     for i in range(len(parents)):
-#         parent_q = int(re.findall(r"Q(\d+)", parents[i])[0])
-#         parent_a = int(re.findall(r"A(\d+)", parents[i])[0])
-#         child_q = [int(q) for q in re.findall(r"Q(\d+)", children[i])]
-#         if parent_q not in dependencies:
-#             dependencies[parent_q] = {parent_a:child_q}
-#         else:
-#             dependencies[parent_q][parent_a] = child_q
-#     return dependencies
+# I wrote this function and it works but didn't actually end up using it since create_dependencies_dict was better
+def create_parents_dict(schemadata):
+    df = schemadata[schemadata['answer_next_questions'].notna()]
+    parents = df['answer_label'].tolist()
+    children = df['answer_next_questions'].tolist()
+    dict = {}
+    for i in range(len(parents)):
+        parent_q = int(re.findall(r"Q(\d+)", parents[i])[0])
+        parent_a = int(re.findall(r"A(\d+)", parents[i])[0])
+        child_q = [int(q) for q in re.findall(r"Q(\d+)", children[i])]
+        if parent_q not in dict:
+            dict[parent_q] = {parent_a:child_q}
+        else:
+            dict[parent_q][parent_a] = child_q
+    return dict
diff --git a/consensus_and_scoring/ChecklistCoding.py b/consensus_and_scoring/ChecklistCoding.py
index bf69c65..8c6e7a1 100644
--- a/consensus_and_scoring/ChecklistCoding.py
+++ b/consensus_and_scoring/ChecklistCoding.py
@@ -1,4 +1,5 @@
 from CodingScoring import *
+from AgreementScoring import highlightAgreementScore
 #from repScores import *
 
 def scoreChecklist(answers,numUsers, num_choices):
@@ -50,8 +51,11 @@ def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sour
                                                         weightScaledNumUsers, userWeightDict, sourceText, useRep=useRep,
                                                         threshold_func = threshold_func)
         firstSecondDiff = 1 - codingScore
+
+        print("STARTS:",starts,"ENDS:",ends)
+        hlAgreeFactor = highlightAgreementScore(starts, ends)
+        #out.append(hlAgreeFactor)
         out.append([winner,units,uScore,iScore, codingScore, numUsers, selectedText, firstSecondDiff, 'checklist', num_choices])
         #do_rep_calculation_nominal(users, answers, out[0], units, starts, ends, length, repDF,last30, checkListScale=(1/num_choices))
 
     return out
-
diff --git a/consensus_and_scoring/test/test_IAA_basic.py b/consensus_and_scoring/test/test_IAA_basic.py
new file mode 100644
index 0000000..da3eb8a
--- /dev/null
+++ b/consensus_and_scoring/test/test_IAA_basic.py
@@ -0,0 +1,37 @@
+import sys
+import os
+import pandas as pd
+
+import test_utils
+from filegen_utils import *
+from IAA import *
+
+sys.path.append('../../')
+
+def test_iaa_constructor(config, tmpdir):
+    test_path = test_utils.make_test_directory(config, 'test_basic_b')
+    out_path = test_utils.make_test_directory(config, 'test_basic_b_out')
+    #source_task_id generated by smashing keyboard
+    dh = datahunt(out_folder=test_path, source_task_id = 'oogabooga')
+
+    # dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'start_pos':1, 'end_pos':4})
+    # dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'start_pos':2, 'end_pos':4})
+    # dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'start_pos':1, 'end_pos':4})
+    # dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'start_pos':1, 'end_pos':4})
+
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
+    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
+
+    fin_path = dh.export()
+    data_path = config['data_dir']
+    schema_path = data_path+'/schemas'
+
+    iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory = out_path)
+    print(iaa_out)
+    for root, dir, files in os.walk(iaa_out):
+        for file in files:
+            #should be only 1 file for this case, so just run it on the only one
+            # if there's more than 1 then you can get fancy
+            out_df  = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')

From 263847ef6500e02f594d7a6cf5aa00fbf6eeac68 Mon Sep 17 00:00:00 2001
From: Jamie Ip <jamieip@berkeley.edu>
Date: Thu, 19 Nov 2020 16:09:38 -0800
Subject: [PATCH 5/8] finished highlight agreement scores for checklist
 questions

---
 consensus_and_scoring/AgreementScoring.py     | 88 ++++++++++++++++---
 consensus_and_scoring/ChecklistCoding.py      | 27 ++++--
 consensus_and_scoring/Dependency.py           |  2 +-
 consensus_and_scoring/test/test_IAA_basic.py  | 10 ++-
 .../test/test_agreement_score.py              | 56 ++++--------
 consensus_and_scoring/test/test_dep_basic.py  | 52 +++++++++++
 6 files changed, 174 insertions(+), 61 deletions(-)
 create mode 100644 consensus_and_scoring/test/test_dep_basic.py

diff --git a/consensus_and_scoring/AgreementScoring.py b/consensus_and_scoring/AgreementScoring.py
index afbb864..73a595a 100644
--- a/consensus_and_scoring/AgreementScoring.py
+++ b/consensus_and_scoring/AgreementScoring.py
@@ -4,22 +4,47 @@
 from dataV3 import create_dependencies_dict
 from nltk import agreement
 
+#Takes in starts and ends of highlights for a specific question answer, returns factor to scale answer's agreement score by
 def highlightAgreementScore(starts, ends):
+    assert len(starts) == len(ends)
+    if len(starts) == 0:
+        return 0
+    if len(starts) == 1:
+        return 1
+
     print("HIGHLIGHT AGREEMENT SCORING TIME!!!")
-    return 666
+    first_start = min(starts)
+    last_end = max(ends) + 1
+    coders = []
+    #Creates a list of each annotator's highlights as a list where 0 is an unhighlighted index and 1 is a highlighted index
+    #e.g highlightAgreementScore([4, 3, 2], [6, 7, 5]) becomes [[0,0,1,1,1,0], [0,1,1,1,1,1], [1,1,1,1,0,0]]
+    for i in range(len(starts)):
+        highlights = np.zeros(last_end - first_start)
+        highlights[[x for x in range(starts[i] - first_start, ends[i] - first_start + 1)]] = 1
+        print("Highlights " + str(i+1) + ": ", highlights)
+        coders.append(highlights)
+
+    #Formats the codes properly as (coder,item,label) tuples
+    formatted_codes = []
+    for annotator_num in range(len(coders)):
+        coder = coders[annotator_num]
+        formatted_codes += [[annotator_num+1, ind, coder[ind]] for ind in range(len(coder))]
+    ratingtask = agreement.AnnotationTask(data=formatted_codes)
 
-coder1 = [1,0,2,0,1,1,2,0,1,1]
-coder2 = [1,1,0,0,1,1,2,1,1,0]
-coder3 = [1,2,2,1,2,1,2,1,1,0]
-formatted_codes = [[1,i,coder1[i]] for i in range(len(coder1))] + [[2,i,coder2[i]] for i in range(len(coder2))]  + [[3,i,coder3[i]] for i in range(len(coder3))]
-ratingtask = agreement.AnnotationTask(data=formatted_codes)
+    avgAg = ratingtask.avg_Ao()
+    print('AVERAGE PAIRWISE AGREEMENT: ',avgAg)
+    # alpha = ratingtask.alpha()
+    # print('Krippendorff\'s alpha:',alpha)
+    # if alpha != 1: #other metrics error if alpha is 1
+    #     print('Fleiss\'s Kappa:',ratingtask.multi_kappa())
+    #     print('Scott\'s pi:',ratingtask.pi())
+    return avgAg
 
-print('Fleiss\'s Kappa:',ratingtask.multi_kappa())
-print('Krippendorff\'s alpha:',ratingtask.alpha())
-print('Scott\'s pi:',ratingtask.pi())
+highlightAgreementScore([2, 2, 2, 2, 2], [15, 15, 15, 15, 15])
 
+#Parent Agrement Scoring
 def AgreementScore(iaaData, schemaPath):
-    print("AGREEMENT SCORING TIME!!!")
+    print("PARENT AGREEMENT SCORING TIME!!!")
     print("OLD AGREEMENT SCORES:")
     print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
     #TODO: AGREEMENT SCORE CHANGES HERE
@@ -44,6 +69,49 @@ def AgreementScore(iaaData, schemaPath):
     print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
     return iaaData
 
+#Agreement scoring but scores of parents of parents don't affect children
+def AgreementScoreReverse(iaaData, schemaPath):
+    print("PARENT AGREEMENT SCORING TIME!!!")
+    print("OLD AGREEMENT SCORES:")
+    print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
+    #TODO: AGREEMENT SCORE CHANGES HERE
+    schemData = pd.read_csv(schemaPath, encoding = 'utf-8')
+    dependencies = create_dependencies_dict(schemData)
+    iaaQuestions = iaaData['question_Number'].tolist()
+    reversed_keys = list(dependencies.keys())[::-1]
+    for child in reversed_keys:
+        if child not in iaaQuestions:
+            continue
+        parents = dependencies[child].keys()
+        #TODO: clean this up
+        temp = []
+        for parent in parents:
+            answers = dependencies[child][parent]
+            parentScores = iaaData[(iaaData['question_Number'] == parent)]
+            parentScores = parentScores[parentScores['agreed_Answer'].astype(int).isin(answers)]
+            temp.append(np.mean(parentScores['agreement_score']))
+        avgParentScores = np.mean(temp)
+        iaaData['agreement_score'] = np.where(iaaData['question_Number'] == child, iaaData['agreement_score'] * avgParentScores, iaaData['agreement_score'])
+        #iaaData['agreement_score'] = np.zeros(3)
+    print("NEW AGREEMENT SCORES:")
+    print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
+    return iaaData
+
+#Just for testing out the differences betwene metrics
+def metricTest():
+    coder1 = [1,0,2,0,1,1,2,0,1,1]
+    coder2 = [1,1,0,0,1,1,2,1,1,0]
+    coder3 = [1,2,2,1,2,1,2,1,1,0]
+    formatted_codes = [[1,i,coder1[i]] for i in range(len(coder1))] + [[2,i,coder2[i]] for i in range(len(coder2))]  + [[3,i,coder3[i]] for i in range(len(coder3))]
+    print('RUNNING METRIC TEST')
+    ratingtask = agreement.AnnotationTask(data=formatted_codes)
+    print('Average pairwise agreement: ',ratingtask.avg_Ao())
+    print('Cohen\'s Kappa:',ratingtask.kappa())
+    print('Fleiss\'s Kappa:',ratingtask.multi_kappa())
+    print('Krippendorff\'s alpha:',ratingtask.alpha())
+    print('Scott\'s pi:',ratingtask.pi())
+#metricTest()
+
 # Creates a dictionary of Parent Question: Answer: Child Questions
 # ex. {1: {1: [2], 2: [2]}, 2: {1: [4], 5: [4, 5], 8: [3]}, 5: {1: [6], 2: [6], 3: [6]}, 9: {1: [10, 11], 2: [10, 11]}}
 # T1.Q1.A1 changes T1.Q2, etc.
diff --git a/consensus_and_scoring/ChecklistCoding.py b/consensus_and_scoring/ChecklistCoding.py
index 8c6e7a1..8ecdd58 100644
--- a/consensus_and_scoring/ChecklistCoding.py
+++ b/consensus_and_scoring/ChecklistCoding.py
@@ -2,7 +2,7 @@
 from AgreementScoring import highlightAgreementScore
 #from repScores import *
 
-def scoreChecklist(answers,numUsers, num_choices):
+def scoreChecklist(answers,numUsers, num_choices, starts, ends):
     out = []
     #print('answers', answers, num_choices)
     length = num_choices+1
@@ -11,9 +11,26 @@ def scoreChecklist(answers,numUsers, num_choices):
     scores = np.zeros(length)
     for a in answers:
         scores[a] = scores[a]+1
+
+    starts_i = {}
+    ends_i = {}
+    for i in range(len(answers)):
+        a = answers[i]
+        if a not in starts_i:
+            starts_i[a] = [starts[i]]
+            ends_i[a] = [ends[i]]
+        else:
+            starts_i[a] += [starts[i]]
+            ends_i[a] += [ends[i]]
+    print(starts_i, ends_i)
+
     for i in range(len(scores)):
         #print('scores', scores, numUsers)
-        out.append(scores[i]/numUsers)
+        hlAgreeFactor = 1
+        if i in starts_i:
+            hlAgreeFactor = highlightAgreementScore(starts_i[i], ends_i[i])
+        out.append(scores[i]/numUsers * hlAgreeFactor)
+
     return out
 
 def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sourceText, hlUsers, hlAns,
@@ -22,7 +39,7 @@ def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sour
     repScaledAnswers, repScaledUsers = repScaleAnsUsers(answers, users, repDF, useRep=useRep)
     #assert len(starts) == len(users), 'starts, users mismatched'
     #TODO: scale numUsers when repScaled gets scaled up
-    percArray = scoreChecklist(repScaledAnswers, numUsers, num_choices)
+    percArray = scoreChecklist(repScaledAnswers, numUsers, num_choices, starts, ends)
     out = []
     for i in range(1,len(percArray)):
         codingScore = percArray[i]
@@ -51,10 +68,6 @@ def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sour
                                                         weightScaledNumUsers, userWeightDict, sourceText, useRep=useRep,
                                                         threshold_func = threshold_func)
         firstSecondDiff = 1 - codingScore
-
-        print("STARTS:",starts,"ENDS:",ends)
-        hlAgreeFactor = highlightAgreementScore(starts, ends)
-        #out.append(hlAgreeFactor)
         out.append([winner,units,uScore,iScore, codingScore, numUsers, selectedText, firstSecondDiff, 'checklist', num_choices])
         #do_rep_calculation_nominal(users, answers, out[0], units, starts, ends, length, repDF,last30, checkListScale=(1/num_choices))
 
diff --git a/consensus_and_scoring/Dependency.py b/consensus_and_scoring/Dependency.py
index 87e6524..8d66f2d 100644
--- a/consensus_and_scoring/Dependency.py
+++ b/consensus_and_scoring/Dependency.py
@@ -15,7 +15,7 @@ def eval_dependency(directory, iaa_dir, schema_dir, out_dir):
             # minimal check here; everything in the schema directory should be a schema csv
             if file.endswith('.csv'):
                 file_path = os.path.join(dirpath, file)
-                print("found schema " + file_path)
+                #print("found schema " + file_path)
                 schema.append(file_path)
     print("looking for IAA", iaa_dir)
     for dirpath, dirnames, files in os.walk(iaa_dir):
diff --git a/consensus_and_scoring/test/test_IAA_basic.py b/consensus_and_scoring/test/test_IAA_basic.py
index da3eb8a..509ed46 100644
--- a/consensus_and_scoring/test/test_IAA_basic.py
+++ b/consensus_and_scoring/test/test_IAA_basic.py
@@ -19,10 +19,11 @@ def test_iaa_constructor(config, tmpdir):
     # dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'start_pos':1, 'end_pos':4})
     # dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'start_pos':1, 'end_pos':4})
 
-    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
-    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
-    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
-    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':4})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
 
     fin_path = dh.export()
     data_path = config['data_dir']
@@ -35,3 +36,4 @@ def test_iaa_constructor(config, tmpdir):
             #should be only 1 file for this case, so just run it on the only one
             # if there's more than 1 then you can get fancy
             out_df  = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
+            print(out_df[['question_Number', 'agreed_Answer', 'agreement_score']])
diff --git a/consensus_and_scoring/test/test_agreement_score.py b/consensus_and_scoring/test/test_agreement_score.py
index 0eb945e..e0f0a02 100644
--- a/consensus_and_scoring/test/test_agreement_score.py
+++ b/consensus_and_scoring/test/test_agreement_score.py
@@ -4,53 +4,31 @@
 
 import test_utils
 from filegen_utils import *
+from IAA import *
 from Dependency import *
 import conftest
 
 #REFERENCE: in Evidence, parents = {1.1:[2], 1.2:[2], 2.1:[4], 2.5:[4,5], 2.8:[3], 5.1:[6], 5.2:[6], 5.3:[6], 9.1:[10,11], 9.2:[10,11]}
-def test_dep_sample(config):
-    iaa_files_path = test_utils.make_test_directory(config, 'agscore_sample')
-    out_path = test_utils.make_test_directory(config, 'agscore_sample_out')
-    # source_task_id generated by smashing keyboard
-    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='agscore_test')
-    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":1,  "agreement_score":.5, 'highlighted_indices':test_utils.make_highlight_indices(10,30)})
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 1,  "agreement_score": .5})
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 4, "agreed_Answer": 1,  "agreement_score": .5})
-    fin_path = iaa.export()
-    data_path = config['data_dir']
-    schema_path = data_path + '/schemas'
-    dh_path = None #doesn't get used by dependency but is still an argument
-
-    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
+def test_sample(config):
+    test_path = test_utils.make_test_directory(config, 'test_agscore')
+    iaa_files_path = test_utils.make_test_directory(config, 'test_agscore_iaa')
+    out_path = test_utils.make_test_directory(config, 'test_agscore_out')
+    #source_task_id generated by smashing keyboard
+    dh = datahunt(out_folder=test_path, source_task_id = 'oogabooga')
 
-    for root, dir, files in os.walk(out_path):
-        for file in files:
-            #should be only 1 file for this case, so just run it on the only one
-            # if there's more than 1 then you can get fancy
-            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
-    #9 answer choices to a checklist question
-    # assert len(out_df) == 2
-    # q_three = out_df[out_df['question_Number']==2]
-    # hl = q_three['highlighted_indices'].iloc[0]
-    # assert len(hl) >18
-    # assert '10' in hl
-    # assert '29' in hl
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
 
-def test_dep_sample2(config):
-    iaa_files_path = test_utils.make_test_directory(config, 'agscore_sample2')
-    out_path = test_utils.make_test_directory(config, 'agscore_sample2_out')
-    # source_task_id generated by smashing keyboard
-    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='agscore_test')
-    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":1,  "agreement_score":.5})
-    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":2,  "agreement_score":1})
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 1,  "agreement_score": 1})
-    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 2,  "agreement_score": .5})
-    fin_path = iaa.export()
+    fin_path = dh.export()
     data_path = config['data_dir']
-    schema_path = data_path + '/schemas'
-    dh_path = None #doesn't get used by dependency but is still an argument
+    schema_path = data_path+'/schemas'
+
+    iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory = iaa_files_path)
 
-    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
+    eval_dependency(test_path, iaa_files_path, schema_path, out_path)
 
     for root, dir, files in os.walk(out_path):
         for file in files:
diff --git a/consensus_and_scoring/test/test_dep_basic.py b/consensus_and_scoring/test/test_dep_basic.py
new file mode 100644
index 0000000..0d55958
--- /dev/null
+++ b/consensus_and_scoring/test/test_dep_basic.py
@@ -0,0 +1,52 @@
+import sys
+import os
+import pandas as pd
+
+import test_utils
+from filegen_utils import *
+from Dependency import *
+import conftest
+
+#REFERENCE: in Evidence, parents = {1.1:[2], 1.2:[2], 2.1:[4], 2.5:[4,5], 2.8:[3], 5.1:[6], 5.2:[6], 5.3:[6], 9.1:[10,11], 9.2:[10,11]}
+def test_dep_sample(config):
+    iaa_files_path = test_utils.make_test_directory(config, 'dep_basic')
+    out_path = test_utils.make_test_directory(config, 'dep_basic_out')
+    # source_task_id generated by smashing keyboard
+    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='boogaboga')
+    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":1,  "agreement_score":.5, 'highlighted_indices':test_utils.make_highlight_indices(10,30)})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 1,  "agreement_score": .5})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 4, "agreed_Answer": 1,  "agreement_score": .5})
+    fin_path = iaa.export()
+    data_path = config['data_dir']
+    schema_path = data_path + '/schemas'
+    dh_path = None #doesn't get used by dependency but is still an argument
+
+    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
+
+    for root, dir, files in os.walk(out_path):
+        for file in files:
+            #should be only 1 file for this case, so just run it on the only one
+            # if there's more than 1 then you can get fancy
+            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
+
+def test_dep_sample2(config):
+    iaa_files_path = test_utils.make_test_directory(config, 'dep_basic2')
+    out_path = test_utils.make_test_directory(config, 'dep_basic2_out')
+    # source_task_id generated by smashing keyboard
+    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='boogabogas')
+    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":1,  "agreement_score":.5})
+    iaa.add_row({"namespace":"Covid_Evidence2020_03_21", "question_Number":1, "agreed_Answer":2,  "agreement_score":1})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 1,  "agreement_score": 1})
+    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 2, "agreed_Answer": 2,  "agreement_score": .5})
+    fin_path = iaa.export()
+    data_path = config['data_dir']
+    schema_path = data_path + '/schemas'
+    dh_path = None #doesn't get used by dependency but is still an argument
+
+    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
+
+    for root, dir, files in os.walk(out_path):
+        for file in files:
+            #should be only 1 file for this case, so just run it on the only one
+            # if there's more than 1 then you can get fancy
+            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')

From dcd5e4cd371721d878b97a721d8e3409836e7c6e Mon Sep 17 00:00:00 2001
From: Jamie <probablyjamie@gmail.com>
Date: Thu, 10 Dec 2020 15:29:35 -0800
Subject: [PATCH 6/8] extended highlight weighting to work for code questions,
 added weights and use arguments to both weighting functions

---
 consensus_and_scoring/AgreementScoring.py     | 115 +++++-------------
 consensus_and_scoring/ChecklistCoding.py      |  23 +---
 consensus_and_scoring/Dependency.py           |   2 +-
 consensus_and_scoring/IAA.py                  |  37 ++++++
 consensus_and_scoring/test/test_IAA_basic.py  |  30 +++--
 .../test/test_agreement_score.py              |  13 +-
 6 files changed, 99 insertions(+), 121 deletions(-)

diff --git a/consensus_and_scoring/AgreementScoring.py b/consensus_and_scoring/AgreementScoring.py
index 73a595a..17c8fb4 100644
--- a/consensus_and_scoring/AgreementScoring.py
+++ b/consensus_and_scoring/AgreementScoring.py
@@ -4,15 +4,23 @@
 from dataV3 import create_dependencies_dict
 from nltk import agreement
 
-#Takes in starts and ends of highlights for a specific question answer, returns factor to scale answer's agreement score by
-def highlightAgreementScore(starts, ends):
-    assert len(starts) == len(ends)
-    if len(starts) == 0:
-        return 0
-    if len(starts) == 1:
+#Changing Agreement Scores based on Highlights
+#To enable, set use=True
+#To dimnish the value it scales by, set weight to a lower value
+#e.g. if score = 0.5 and weight = 0.5, it scales agscore by 0.75 instead of 0.5
+def highlightAgreementScore(starts, ends, weight=1, use=True):
+    if not use:
+        return 1
+    if (not isinstance(starts, list) or not isinstance(ends, list)):
+        print("INVALID HIGHLIGHTS")
+        return 1
+    if len(starts) != len(ends):
+        print("INVALID HIGHLIGHTS")
+        return 1
+    if len(starts) <= 1:
         return 1
 
-    print("HIGHLIGHT AGREEMENT SCORING TIME!!!")
+    # print("HIGHLIGHT AGREEMENT SCORING TIME!!!")
     first_start = min(starts)
     last_end = max(ends) + 1
     coders = []
@@ -21,7 +29,7 @@ def highlightAgreementScore(starts, ends):
     for i in range(len(starts)):
         highlights = np.zeros(last_end - first_start)
         highlights[[x for x in range(starts[i] - first_start, ends[i] - first_start + 1)]] = 1
-        print("Highlights " + str(i+1) + ": ", highlights)
+        #print("Highlights for Annotator " + str(i+1) + ": ", highlights)
         coders.append(highlights)
 
     #Formats the codes properly as (coder,item,label) tuples
@@ -31,23 +39,21 @@ def highlightAgreementScore(starts, ends):
         formatted_codes += [[annotator_num+1, ind, coder[ind]] for ind in range(len(coder))]
     ratingtask = agreement.AnnotationTask(data=formatted_codes)
 
+    #Return the average agreement score of all highlights
     avgAg = ratingtask.avg_Ao()
-    print('AVERAGE PAIRWISE AGREEMENT: ',avgAg)
-    # alpha = ratingtask.alpha()
-    # print('Krippendorff\'s alpha:',alpha)
-    # if alpha != 1: #other metrics error if alpha is 1
-    #     print('Fleiss\'s Kappa:',ratingtask.multi_kappa())
-    #     print('Scott\'s pi:',ratingtask.pi())
-    return avgAg
-
-highlightAgreementScore([2, 2, 2, 2, 2], [15, 15, 15, 15, 15])
+    weighted_avgAg = 1 - ((1 - avgAg) * weight)
+    print('Average Pairwise Agreement: ' + str(avgAg) + ', Weighted: ' + str(weighted_avgAg))
+    return weighted_avgAg
 
-#Parent Agrement Scoring
-def AgreementScore(iaaData, schemaPath):
+#Changing Agreement Scores based on Parent Agreement Scores
+#To enable, set use=True
+#To dimnish the value it scales by, set weight to a lower value
+def parentAgreementScore(iaaData, schemaPath, weight=1, use=True):
+    if not use:
+        return iaaData
     print("PARENT AGREEMENT SCORING TIME!!!")
     print("OLD AGREEMENT SCORES:")
     print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
-    #TODO: AGREEMENT SCORE CHANGES HERE
     schemData = pd.read_csv(schemaPath, encoding = 'utf-8')
     dependencies = create_dependencies_dict(schemData)
     iaaQuestions = iaaData['question_Number'].tolist()
@@ -55,7 +61,7 @@ def AgreementScore(iaaData, schemaPath):
         if child not in iaaQuestions:
             continue
         parents = dependencies[child].keys()
-        #TODO: clean this up
+        #TODO: clean this bit up?
         temp = []
         for parent in parents:
             answers = dependencies[child][parent]
@@ -63,70 +69,9 @@ def AgreementScore(iaaData, schemaPath):
             parentScores = parentScores[parentScores['agreed_Answer'].astype(int).isin(answers)]
             temp.append(np.mean(parentScores['agreement_score']))
         avgParentScores = np.mean(temp)
-        iaaData['agreement_score'] = np.where(iaaData['question_Number'] == child, iaaData['agreement_score'] * avgParentScores, iaaData['agreement_score'])
-        #iaaData['agreement_score'] = np.zeros(3)
+        weighted_avgParentScores = 1 - ((1 - avgParentScores) * weight)
+        iaaData['agreement_score'] = np.where(iaaData['question_Number'] == child,
+        iaaData['agreement_score'] * weighted_avgParentScores,  iaaData['agreement_score'])
     print("NEW AGREEMENT SCORES:")
     print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
     return iaaData
-
-#Agreement scoring but scores of parents of parents don't affect children
-def AgreementScoreReverse(iaaData, schemaPath):
-    print("PARENT AGREEMENT SCORING TIME!!!")
-    print("OLD AGREEMENT SCORES:")
-    print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
-    #TODO: AGREEMENT SCORE CHANGES HERE
-    schemData = pd.read_csv(schemaPath, encoding = 'utf-8')
-    dependencies = create_dependencies_dict(schemData)
-    iaaQuestions = iaaData['question_Number'].tolist()
-    reversed_keys = list(dependencies.keys())[::-1]
-    for child in reversed_keys:
-        if child not in iaaQuestions:
-            continue
-        parents = dependencies[child].keys()
-        #TODO: clean this up
-        temp = []
-        for parent in parents:
-            answers = dependencies[child][parent]
-            parentScores = iaaData[(iaaData['question_Number'] == parent)]
-            parentScores = parentScores[parentScores['agreed_Answer'].astype(int).isin(answers)]
-            temp.append(np.mean(parentScores['agreement_score']))
-        avgParentScores = np.mean(temp)
-        iaaData['agreement_score'] = np.where(iaaData['question_Number'] == child, iaaData['agreement_score'] * avgParentScores, iaaData['agreement_score'])
-        #iaaData['agreement_score'] = np.zeros(3)
-    print("NEW AGREEMENT SCORES:")
-    print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
-    return iaaData
-
-#Just for testing out the differences betwene metrics
-def metricTest():
-    coder1 = [1,0,2,0,1,1,2,0,1,1]
-    coder2 = [1,1,0,0,1,1,2,1,1,0]
-    coder3 = [1,2,2,1,2,1,2,1,1,0]
-    formatted_codes = [[1,i,coder1[i]] for i in range(len(coder1))] + [[2,i,coder2[i]] for i in range(len(coder2))]  + [[3,i,coder3[i]] for i in range(len(coder3))]
-    print('RUNNING METRIC TEST')
-    ratingtask = agreement.AnnotationTask(data=formatted_codes)
-    print('Average pairwise agreement: ',ratingtask.avg_Ao())
-    print('Cohen\'s Kappa:',ratingtask.kappa())
-    print('Fleiss\'s Kappa:',ratingtask.multi_kappa())
-    print('Krippendorff\'s alpha:',ratingtask.alpha())
-    print('Scott\'s pi:',ratingtask.pi())
-#metricTest()
-
-# Creates a dictionary of Parent Question: Answer: Child Questions
-# ex. {1: {1: [2], 2: [2]}, 2: {1: [4], 5: [4, 5], 8: [3]}, 5: {1: [6], 2: [6], 3: [6]}, 9: {1: [10, 11], 2: [10, 11]}}
-# T1.Q1.A1 changes T1.Q2, etc.
-# I wrote this function and it works but didn't actually end up using it since create_dependencies_dict was better
-def create_parents_dict(schemadata):
-    df = schemadata[schemadata['answer_next_questions'].notna()]
-    parents = df['answer_label'].tolist()
-    children = df['answer_next_questions'].tolist()
-    dict = {}
-    for i in range(len(parents)):
-        parent_q = int(re.findall(r"Q(\d+)", parents[i])[0])
-        parent_a = int(re.findall(r"A(\d+)", parents[i])[0])
-        child_q = [int(q) for q in re.findall(r"Q(\d+)", children[i])]
-        if parent_q not in dict:
-            dict[parent_q] = {parent_a:child_q}
-        else:
-            dict[parent_q][parent_a] = child_q
-    return dict
diff --git a/consensus_and_scoring/ChecklistCoding.py b/consensus_and_scoring/ChecklistCoding.py
index 8ecdd58..ebe7b72 100644
--- a/consensus_and_scoring/ChecklistCoding.py
+++ b/consensus_and_scoring/ChecklistCoding.py
@@ -2,7 +2,7 @@
 from AgreementScoring import highlightAgreementScore
 #from repScores import *
 
-def scoreChecklist(answers,numUsers, num_choices, starts, ends):
+def scoreChecklist(answers,numUsers, num_choices):
     out = []
     #print('answers', answers, num_choices)
     length = num_choices+1
@@ -11,26 +11,9 @@ def scoreChecklist(answers,numUsers, num_choices, starts, ends):
     scores = np.zeros(length)
     for a in answers:
         scores[a] = scores[a]+1
-
-    starts_i = {}
-    ends_i = {}
-    for i in range(len(answers)):
-        a = answers[i]
-        if a not in starts_i:
-            starts_i[a] = [starts[i]]
-            ends_i[a] = [ends[i]]
-        else:
-            starts_i[a] += [starts[i]]
-            ends_i[a] += [ends[i]]
-    print(starts_i, ends_i)
-
     for i in range(len(scores)):
         #print('scores', scores, numUsers)
-        hlAgreeFactor = 1
-        if i in starts_i:
-            hlAgreeFactor = highlightAgreementScore(starts_i[i], ends_i[i])
-        out.append(scores[i]/numUsers * hlAgreeFactor)
-
+        out.append(scores[i]/numUsers)
     return out
 
 def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sourceText, hlUsers, hlAns,
@@ -39,7 +22,7 @@ def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sour
     repScaledAnswers, repScaledUsers = repScaleAnsUsers(answers, users, repDF, useRep=useRep)
     #assert len(starts) == len(users), 'starts, users mismatched'
     #TODO: scale numUsers when repScaled gets scaled up
-    percArray = scoreChecklist(repScaledAnswers, numUsers, num_choices, starts, ends)
+    percArray = scoreChecklist(repScaledAnswers, numUsers, num_choices)
     out = []
     for i in range(1,len(percArray)):
         codingScore = percArray[i]
diff --git a/consensus_and_scoring/Dependency.py b/consensus_and_scoring/Dependency.py
index 8d66f2d..5880bbe 100644
--- a/consensus_and_scoring/Dependency.py
+++ b/consensus_and_scoring/Dependency.py
@@ -133,7 +133,7 @@ def handleDependencies(schemaPath, iaaPath, out_dir):
                     indices = merge_indices(row_indices, indices).tolist()
                     iaaData.at[row, 'highlighted_indices'] = json.dumps(indices)
 
-    iaaData = AgreementScore(iaaData, schemaPath)
+    iaaData = parentAgreementScore(iaaData, schemaPath)
 
     print('exporting to csv')
     path, name = get_path(iaaPath)
diff --git a/consensus_and_scoring/IAA.py b/consensus_and_scoring/IAA.py
index 8e06fb7..71f9fb0 100644
--- a/consensus_and_scoring/IAA.py
+++ b/consensus_and_scoring/IAA.py
@@ -252,6 +252,43 @@ def score(article, ques, data, config_path, text_file, schemaFile, repDF = None,
     elif question_type == 'checklist':
         out = evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF, sourceText, hlUsers, hlAns,
                                 num_choices = num_choices, useRep=useRep, threshold_func = threshold_func)
+
+    #Only change agreement score by highlights if highlights exist
+    if (isinstance(starts, list) and len(answers) == len(starts) and len(starts) == len(ends)):
+        starts_i = {}
+        ends_i = {}
+        #For this question, map all answers to their starting and ending highlights
+        #e.g. starts_i = {1: [5, 5]} means Answer 1 for this question has two users start highlights on index 5
+        for i in range(len(answers)):
+            a = answers[i]
+            try:
+                if a not in starts_i:
+                    starts_i[a] = [starts[i]]
+                    ends_i[a] = [ends[i]]
+            except:
+                print("ERROR", a, i)
+            else:
+                starts_i[a] += [starts[i]]
+                ends_i[a] += [ends[i]]
+        print("Question", ques, "{Answer:Highlight_Starts}:", starts_i, "{Answer:Highlight_Ends}:", ends_i)
+        #Change each answer's agreement score based on the answer's highlighting agreement
+        if question_type == 'checklist':
+            for stuff in out:
+                ans_num = stuff[0]
+                old_ag_score = stuff[4]
+                if ans_num in starts_i:
+                    hlAgreeFactor = highlightAgreementScore(starts_i[ans_num], ends_i[ans_num])
+                    print("Agreement Score transformed from", old_ag_score, "to", old_ag_score * hlAgreeFactor,"\n")
+                    stuff[4] = old_ag_score * hlAgreeFactor
+        else:
+            ans_num = out[0]
+            old_ag_score = out[4]
+            if ans_num in starts_i:
+                hlAgreeFactor = highlightAgreementScore(starts_i[ans_num], ends_i[ans_num])
+                print("Agreement Score transformed from", old_ag_score, "to", old_ag_score * hlAgreeFactor,"\n")
+                temp_out = list(out)
+                temp_out[4] = old_ag_score * hlAgreeFactor
+                out = tuple(temp_out)
     return out
 
 
diff --git a/consensus_and_scoring/test/test_IAA_basic.py b/consensus_and_scoring/test/test_IAA_basic.py
index 509ed46..7585ceb 100644
--- a/consensus_and_scoring/test/test_IAA_basic.py
+++ b/consensus_and_scoring/test/test_IAA_basic.py
@@ -14,16 +14,26 @@ def test_iaa_constructor(config, tmpdir):
     #source_task_id generated by smashing keyboard
     dh = datahunt(out_folder=test_path, source_task_id = 'oogabooga')
 
-    # dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'start_pos':1, 'end_pos':4})
-    # dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'start_pos':2, 'end_pos':4})
-    # dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'start_pos':1, 'end_pos':4})
-    # dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'start_pos':1, 'end_pos':4})
-
-    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+
+
+    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A4', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':5, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A5', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A5', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'E', 'highlight_count':10, 'start_pos':1, 'end_pos':9, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q2.A5', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'F', 'highlight_count':10, 'start_pos':1, 'end_pos':8, 'article_text_length': 100})
+
+    dh.add_row({'answer_label': 'T1.Q4.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q4.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':8, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q4.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':9, 'article_text_length': 100})
+
+    dh.add_row({'answer_label': 'T1.Q5.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q5.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q5.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
 
     fin_path = dh.export()
     data_path = config['data_dir']
diff --git a/consensus_and_scoring/test/test_agreement_score.py b/consensus_and_scoring/test/test_agreement_score.py
index e0f0a02..d544b4c 100644
--- a/consensus_and_scoring/test/test_agreement_score.py
+++ b/consensus_and_scoring/test/test_agreement_score.py
@@ -16,11 +16,13 @@ def test_sample(config):
     #source_task_id generated by smashing keyboard
     dh = datahunt(out_folder=test_path, source_task_id = 'oogabooga')
 
-    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
-    dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':3, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+    dh.add_row({'answer_label': 'T1.Q1.A3', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'E', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
+
+    dh.add_row({'answer_label': 'T1.Q2.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
 
     fin_path = dh.export()
     data_path = config['data_dir']
@@ -35,3 +37,4 @@ def test_sample(config):
             #should be only 1 file for this case, so just run it on the only one
             # if there's more than 1 then you can get fancy
             out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
+            print(out_df[['question_Number', 'agreed_Answer', 'agreement_score']])

From d25225d1b1b5b04674dd4952d218f2f6a89262bc Mon Sep 17 00:00:00 2001
From: Jamie <probablyjamie@gmail.com>
Date: Thu, 10 Dec 2020 15:42:37 -0800
Subject: [PATCH 7/8] cleaned up code a bit, added comments

---
 consensus_and_scoring/AgreementScoring.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/consensus_and_scoring/AgreementScoring.py b/consensus_and_scoring/AgreementScoring.py
index 17c8fb4..71f7750 100644
--- a/consensus_and_scoring/AgreementScoring.py
+++ b/consensus_and_scoring/AgreementScoring.py
@@ -32,14 +32,14 @@ def highlightAgreementScore(starts, ends, weight=1, use=True):
         #print("Highlights for Annotator " + str(i+1) + ": ", highlights)
         coders.append(highlights)
 
-    #Formats the codes properly as (coder,item,label) tuples
+    #Formats the codes properly as (coder,item,label) tuples (required by avg_Ao)
     formatted_codes = []
     for annotator_num in range(len(coders)):
         coder = coders[annotator_num]
         formatted_codes += [[annotator_num+1, ind, coder[ind]] for ind in range(len(coder))]
     ratingtask = agreement.AnnotationTask(data=formatted_codes)
 
-    #Return the average agreement score of all highlights
+    #Return the weighted average agreement score of all highlights
     avgAg = ratingtask.avg_Ao()
     weighted_avgAg = 1 - ((1 - avgAg) * weight)
     print('Average Pairwise Agreement: ' + str(avgAg) + ', Weighted: ' + str(weighted_avgAg))
@@ -54,14 +54,21 @@ def parentAgreementScore(iaaData, schemaPath, weight=1, use=True):
     print("PARENT AGREEMENT SCORING TIME!!!")
     print("OLD AGREEMENT SCORES:")
     print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
+
+    #Get a dictionary of children and parents
     schemData = pd.read_csv(schemaPath, encoding = 'utf-8')
     dependencies = create_dependencies_dict(schemData)
     iaaQuestions = iaaData['question_Number'].tolist()
+
+    #For each child, if present in the iaaData, calculate a new agreement score
     for child in dependencies.keys():
         if child not in iaaQuestions:
             continue
         parents = dependencies[child].keys()
+
         #TODO: clean this bit up?
+        #Children can have multiple parent questions that each can have multiple parent answers
+        #For each parent question, assign each parent answer score to parentScores, then append the mean score to temp
         temp = []
         for parent in parents:
             answers = dependencies[child][parent]

From 0d40af14b4edfb43b0c403cf67877b99adc6dfea Mon Sep 17 00:00:00 2001
From: Jamie <probablyjamie@gmail.com>
Date: Thu, 10 Dec 2020 15:49:16 -0800
Subject: [PATCH 8/8] minor change

---
 consensus_and_scoring/IAA.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/consensus_and_scoring/IAA.py b/consensus_and_scoring/IAA.py
index 71f9fb0..56faa85 100644
--- a/consensus_and_scoring/IAA.py
+++ b/consensus_and_scoring/IAA.py
@@ -261,12 +261,9 @@ def score(article, ques, data, config_path, text_file, schemaFile, repDF = None,
         #e.g. starts_i = {1: [5, 5]} means Answer 1 for this question has two users start highlights on index 5
         for i in range(len(answers)):
             a = answers[i]
-            try:
-                if a not in starts_i:
-                    starts_i[a] = [starts[i]]
-                    ends_i[a] = [ends[i]]
-            except:
-                print("ERROR", a, i)
+            if a not in starts_i:
+                starts_i[a] = [starts[i]]
+                ends_i[a] = [ends[i]]
             else:
                 starts_i[a] += [starts[i]]
                 ends_i[a] += [ends[i]]