Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions consensus_and_scoring/AgreementScoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import pandas as pd
import numpy as np
import re
from dataV3 import create_dependencies_dict
from nltk import agreement

#Changing Agreement Scores based on Highlights
#To enable, set use=True
#To dimnish the value it scales by, set weight to a lower value
#e.g. if score = 0.5 and weight = 0.5, it scales agscore by 0.75 instead of 0.5
def highlightAgreementScore(starts, ends, weight=1, use=True):
if not use:
return 1
if (not isinstance(starts, list) or not isinstance(ends, list)):
print("INVALID HIGHLIGHTS")
return 1
if len(starts) != len(ends):
print("INVALID HIGHLIGHTS")
return 1
if len(starts) <= 1:
return 1

# print("HIGHLIGHT AGREEMENT SCORING TIME!!!")
first_start = min(starts)
last_end = max(ends) + 1
coders = []
#Creates a list of each annotator's highlights as a list where 0 is an unhighlighted index and 1 is a highlighted index
#e.g highlightAgreementScore([4, 3, 2], [6, 7, 5]) becomes [[0,0,1,1,1,0], [0,1,1,1,1,1], [1,1,1,1,0,0]]
for i in range(len(starts)):
highlights = np.zeros(last_end - first_start)
highlights[[x for x in range(starts[i] - first_start, ends[i] - first_start + 1)]] = 1
#print("Highlights for Annotator " + str(i+1) + ": ", highlights)
coders.append(highlights)

#Formats the codes properly as (coder,item,label) tuples (required by avg_Ao)
formatted_codes = []
for annotator_num in range(len(coders)):
coder = coders[annotator_num]
formatted_codes += [[annotator_num+1, ind, coder[ind]] for ind in range(len(coder))]
ratingtask = agreement.AnnotationTask(data=formatted_codes)

#Return the weighted average agreement score of all highlights
avgAg = ratingtask.avg_Ao()
weighted_avgAg = 1 - ((1 - avgAg) * weight)
print('Average Pairwise Agreement: ' + str(avgAg) + ', Weighted: ' + str(weighted_avgAg))
return weighted_avgAg

#Changing Agreement Scores based on Parent Agreement Scores
#To enable, set use=True
#To dimnish the value it scales by, set weight to a lower value
def parentAgreementScore(iaaData, schemaPath, weight=1, use=True):
if not use:
return iaaData
print("PARENT AGREEMENT SCORING TIME!!!")
print("OLD AGREEMENT SCORES:")
print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])

#Get a dictionary of children and parents
schemData = pd.read_csv(schemaPath, encoding = 'utf-8')
dependencies = create_dependencies_dict(schemData)
iaaQuestions = iaaData['question_Number'].tolist()

#For each child, if present in the iaaData, calculate a new agreement score
for child in dependencies.keys():
if child not in iaaQuestions:
continue
parents = dependencies[child].keys()

#TODO: clean this bit up?
#Children can have multiple parent questions that each can have multiple parent answers
#For each parent question, assign each parent answer score to parentScores, then append the mean score to temp
temp = []
for parent in parents:
answers = dependencies[child][parent]
parentScores = iaaData[(iaaData['question_Number'] == parent)]
parentScores = parentScores[parentScores['agreed_Answer'].astype(int).isin(answers)]
temp.append(np.mean(parentScores['agreement_score']))
avgParentScores = np.mean(temp)
weighted_avgParentScores = 1 - ((1 - avgParentScores) * weight)
iaaData['agreement_score'] = np.where(iaaData['question_Number'] == child,
iaaData['agreement_score'] * weighted_avgParentScores, iaaData['agreement_score'])
print("NEW AGREEMENT SCORES:")
print(iaaData[['question_Number', 'agreed_Answer', 'agreement_score']])
return iaaData
2 changes: 1 addition & 1 deletion consensus_and_scoring/ChecklistCoding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from CodingScoring import *
from AgreementScoring import highlightAgreementScore
#from repScores import *

def scoreChecklist(answers,numUsers, num_choices):
Expand Down Expand Up @@ -54,4 +55,3 @@ def evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF,sour
#do_rep_calculation_nominal(users, answers, out[0], units, starts, ends, length, repDF,last30, checkListScale=(1/num_choices))

return out

5 changes: 4 additions & 1 deletion consensus_and_scoring/Dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import json
from dataV3 import *
from AgreementScoring import *

def eval_dependency(directory, iaa_dir, schema_dir, out_dir):
print("DEPENDENCY STARTING")
Expand All @@ -14,7 +15,7 @@ def eval_dependency(directory, iaa_dir, schema_dir, out_dir):
# minimal check here; everything in the schema directory should be a schema csv
if file.endswith('.csv'):
file_path = os.path.join(dirpath, file)
print("found schema " + file_path)
#print("found schema " + file_path)
schema.append(file_path)
print("looking for IAA", iaa_dir)
for dirpath, dirnames, files in os.walk(iaa_dir):
Expand Down Expand Up @@ -132,6 +133,8 @@ def handleDependencies(schemaPath, iaaPath, out_dir):
indices = merge_indices(row_indices, indices).tolist()
iaaData.at[row, 'highlighted_indices'] = json.dumps(indices)

iaaData = parentAgreementScore(iaaData, schemaPath)

print('exporting to csv')
path, name = get_path(iaaPath)
outputpath = os.path.join(out_dir, 'Dep_'+name)
Expand Down
34 changes: 34 additions & 0 deletions consensus_and_scoring/IAA.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,40 @@ def score(article, ques, data, config_path, text_file, schemaFile, repDF = None,
elif question_type == 'checklist':
out = evaluateChecklist(answers, users, starts, ends, numUsers, length, repDF, sourceText, hlUsers, hlAns,
num_choices = num_choices, useRep=useRep, threshold_func = threshold_func)

#Only change agreement score by highlights if highlights exist
if (isinstance(starts, list) and len(answers) == len(starts) and len(starts) == len(ends)):
starts_i = {}
ends_i = {}
#For this question, map all answers to their starting and ending highlights
#e.g. starts_i = {1: [5, 5]} means Answer 1 for this question has two users start highlights on index 5
for i in range(len(answers)):
a = answers[i]
if a not in starts_i:
starts_i[a] = [starts[i]]
ends_i[a] = [ends[i]]
else:
starts_i[a] += [starts[i]]
ends_i[a] += [ends[i]]
print("Question", ques, "{Answer:Highlight_Starts}:", starts_i, "{Answer:Highlight_Ends}:", ends_i)
#Change each answer's agreement score based on the answer's highlighting agreement
if question_type == 'checklist':
for stuff in out:
ans_num = stuff[0]
old_ag_score = stuff[4]
if ans_num in starts_i:
hlAgreeFactor = highlightAgreementScore(starts_i[ans_num], ends_i[ans_num])
print("Agreement Score transformed from", old_ag_score, "to", old_ag_score * hlAgreeFactor,"\n")
stuff[4] = old_ag_score * hlAgreeFactor
else:
ans_num = out[0]
old_ag_score = out[4]
if ans_num in starts_i:
hlAgreeFactor = highlightAgreementScore(starts_i[ans_num], ends_i[ans_num])
print("Agreement Score transformed from", old_ag_score, "to", old_ag_score * hlAgreeFactor,"\n")
temp_out = list(out)
temp_out[4] = old_ag_score * hlAgreeFactor
out = tuple(temp_out)
return out


Expand Down
49 changes: 49 additions & 0 deletions consensus_and_scoring/test/test_IAA_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import sys
import os
import pandas as pd

import test_utils
from filegen_utils import *
from IAA import *

sys.path.append('../../')

def test_iaa_constructor(config, tmpdir):
test_path = test_utils.make_test_directory(config, 'test_basic_b')
out_path = test_utils.make_test_directory(config, 'test_basic_b_out')
#source_task_id generated by smashing keyboard
dh = datahunt(out_folder=test_path, source_task_id = 'oogabooga')

dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})


dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q2.A4', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':5, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q2.A5', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q2.A5', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'E', 'highlight_count':10, 'start_pos':1, 'end_pos':9, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q2.A5', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'F', 'highlight_count':10, 'start_pos':1, 'end_pos':8, 'article_text_length': 100})

dh.add_row({'answer_label': 'T1.Q4.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q4.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':8, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q4.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':9, 'article_text_length': 100})

dh.add_row({'answer_label': 'T1.Q5.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q5.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q5.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})

fin_path = dh.export()
data_path = config['data_dir']
schema_path = data_path+'/schemas'

iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory = out_path)
print(iaa_out)
for root, dir, files in os.walk(iaa_out):
for file in files:
#should be only 1 file for this case, so just run it on the only one
# if there's more than 1 then you can get fancy
out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
print(out_df[['question_Number', 'agreed_Answer', 'agreement_score']])
30 changes: 19 additions & 11 deletions consensus_and_scoring/test/test_IAA_jamie.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,33 @@ def test_user_highlighting_consensus(config, tmpdir):
#N users on schema v1 and N users on schema v2--ensure output rows identical
def test_diff_schemas(config, tmpdir):
test_path = test_utils.make_test_directory(config, 'test_diff_schemas')
out_path = test_utils.make_test_directory(config, 'out_test_diff_schemas')
out_path = test_utils.make_test_directory(config, 'test_diff_schemas_out')
#Covid_Evidence2020_03_21_copy is a copy with Q13 set to Ordinal, which should be detected as a new schema
for x in [('jamietest_old', 'Covid_Evidence2020_03_21'), ('jamietest_new', 'Covid_Evidence2020_03_21_copy')]:
dh = datahunt(out_folder=test_path, source_task_id = x[0])
dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': x[1], 'contributor_uuid':'A'})
dh.add_row({'answer_label': 'T1.Q1.A3', 'namespace': x[1], 'contributor_uuid':'B'})
dh.add_row({'answer_label': 'T1.Q3.A1', 'namespace': x[1], 'contributor_uuid':'C'})
dh.add_row({'answer_label': 'T1.Q14.A1', 'namespace': x[1], 'contributor_uuid':'D'})
dh.add_row({'answer_label': 'T1.Q14.A10', 'namespace': x[1], 'contributor_uuid':'E'})
dh.add_row({'answer_label': 'T1.Q14.A10', 'namespace': x[1], 'contributor_uuid':'F'})
dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': x[1], 'contributor_uuid':'A'})
dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': x[1], 'contributor_uuid':'B'})
dh.add_row({'answer_label': 'T1.Q1.A3', 'namespace': x[1], 'contributor_uuid':'A'})
dh.add_row({'answer_label': 'T1.Q2.A1', 'namespace': x[1], 'contributor_uuid':'A'})
dh.add_row({'answer_label': 'T1.Q2.A2', 'namespace': x[1], 'contributor_uuid':'A'})
dh.add_row({'answer_label': 'T1.Q2.A8', 'namespace': x[1], 'contributor_uuid':'A'})
dh.add_row({'answer_label': 'T1.Q2.A7', 'namespace': x[1], 'contributor_uuid':'B'})
dh.add_row({'answer_label': 'T1.Q2.A8', 'namespace': x[1], 'contributor_uuid':'B'})
dh.add_row({'answer_label': 'T1.Q3.A1', 'namespace': x[1], 'contributor_uuid':'A'})
fin_path = dh.export()
data_path = config['data_dir']
schema_path = config['persistent_test_dir']+'/schemas'

iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory = out_path)
for root, dir, files in os.walk(iaa_out):
out_df_old = pd.read_csv(os.path.join(iaa_out, files[0]), encoding='utf-8')
out_df_new = pd.read_csv(os.path.join(iaa_out, files[1]), encoding='utf-8')
out_df_new = out_df_new.drop(['schema_sha256', 'namespace'], axis=1)
out_df_old = out_df_old.drop(['schema_sha256', 'namespace'], axis=1)
out_df_old = pd.read_csv(os.path.join(iaa_out, 'DataHunt_jamietest_old.IAA-Default-Tags.csv'), encoding='utf-8')
out_df_new = pd.read_csv(os.path.join(iaa_out, 'DataHunt_jamietest_new.IAA-Default-Tags.csv'), encoding='utf-8')

assert out_df_old.equals(out_df_new)
assert out_df_old.equals(out_df_new) == False

schema_columns = ['article_sha256', 'article_id', 'schema_sha256', 'namespace']
out_df_old = out_df_old.drop(schema_columns, axis=1)
out_df_new = out_df_new.drop(schema_columns, axis=1)

assert out_df_old.equals(out_df_new) == True
40 changes: 40 additions & 0 deletions consensus_and_scoring/test/test_agreement_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import sys
import os
import pandas as pd

import test_utils
from filegen_utils import *
from IAA import *
from Dependency import *
import conftest

#REFERENCE: in Evidence, parents = {1.1:[2], 1.2:[2], 2.1:[4], 2.5:[4,5], 2.8:[3], 5.1:[6], 5.2:[6], 5.3:[6], 9.1:[10,11], 9.2:[10,11]}
def test_sample(config):
test_path = test_utils.make_test_directory(config, 'test_agscore')
iaa_files_path = test_utils.make_test_directory(config, 'test_agscore_iaa')
out_path = test_utils.make_test_directory(config, 'test_agscore_out')
#source_task_id generated by smashing keyboard
dh = datahunt(out_folder=test_path, source_task_id = 'oogabooga')

dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'B', 'highlight_count':10, 'start_pos':1, 'end_pos':5, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'C', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A2', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'D', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})
dh.add_row({'answer_label': 'T1.Q1.A3', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'E', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})

dh.add_row({'answer_label': 'T1.Q2.A1', 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid':'A', 'highlight_count':10, 'start_pos':1, 'end_pos':10, 'article_text_length': 100})

fin_path = dh.export()
data_path = config['data_dir']
schema_path = data_path+'/schemas'

iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory = iaa_files_path)

eval_dependency(test_path, iaa_files_path, schema_path, out_path)

for root, dir, files in os.walk(out_path):
for file in files:
#should be only 1 file for this case, so just run it on the only one
# if there's more than 1 then you can get fancy
out_df = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
print(out_df[['question_Number', 'agreed_Answer', 'agreement_score']])
Loading