Skip to content

Commit 4266126

Browse files
authored
Merge pull request #4 from Lab42-Team/dev
First version of Ontogen
2 parents d36ed50 + 5dca748 commit 4266126

File tree

6 files changed

+90
-83
lines changed

6 files changed

+90
-83
lines changed

f_measure_identidier.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22

33

4-
def f_measure(json_path, json_path3, json_path4, owl_path):
4+
def f_measure(json_path, json_path4, owl_path):
55
with open(json_path, 'r', encoding='utf-8') as f:
66
text = json.load(f)
77
k = 0
@@ -26,6 +26,7 @@ def f_measure(json_path, json_path3, json_path4, owl_path):
2626
if text[i][obj_json] == "LITERAL":
2727
count = count + 1
2828
literal = literal + 1
29+
compare = subject + categorical
2930
count1 = 0
3031
count2 = 0
3132
flag_object = 0
@@ -35,6 +36,7 @@ def f_measure(json_path, json_path3, json_path4, owl_path):
3536
"<owl:DatatypeProperty") != -1:
3637
count1 = count1 + 1
3738
with open(owl_path, 'r', encoding='utf-8') as f:
39+
count_obj = 0
3840
for line in f:
3941
if line.find("<owl:NamedIndividual") != -1:
4042
flag_object = 1
@@ -43,39 +45,57 @@ def f_measure(json_path, json_path3, json_path4, owl_path):
4345
if line.find("</owl:NamedIndividual") != -1 and count2 != 3:
4446
break
4547
if line.find("</owl:NamedIndividual") != -1 and count2 == 3:
48+
count_obj = count_obj + 1
4649
count2 = 0
4750
flag_object = 0
51+
compare1 = count2 - 3
4852
count2 = count2 - 2 + categorical
4953
precision = count2 / count
5054
recall = count2 / count1
5155
f1 = (2 * precision * recall) / (precision + recall)
56+
print()
5257
print(json_path)
58+
print()
5359
print("Precision: ", precision)
5460
print("Recall: ", recall)
5561
print("F1: ", f1)
62+
print()
5663

5764
count_individual = 0
5865
with open(owl_path, 'r', encoding='utf-8') as f:
5966
for line in f:
6067
if line.find("<owl:NamedIndividual") != -1:
6168
count_individual = count_individual + 1
62-
count_individual1 = count2 + categorical
63-
precision1 = count_individual / count_individual1
64-
print("Precision: ", precision1)
65-
with open(json_path3, 'r', encoding='utf-8') as f:
69+
with open(json_path4, 'r', encoding='utf-8') as f, open(json_path, 'r', encoding='utf-8') as fs:
6670
text = json.load(f)
71+
text1 = json.load(fs)
6772
i = 0
6873
categorical_individual = 0
6974
subject_individual = 0
70-
for str_json in text:
75+
entity_name = set()
76+
for str_json in text1:
7177
for obj_json in str_json:
72-
if text[i][obj_json] == "CATEGORICAL":
73-
categorical_individual = categorical_individual + 1
74-
if text[i][obj_json] == "SUBJECT":
78+
length_set = len(entity_name)
79+
if text[0][obj_json] == "CATEGORICAL":
80+
entity_name.add(text1[i][obj_json])
81+
if length_set == len(entity_name):
82+
continue
83+
else:
84+
categorical_individual = categorical_individual + 1
85+
if text[0][obj_json] == "SUBJECT":
7586
subject_individual = subject_individual + 1
87+
7688
i = i + 1
77-
count_individual2 = (subject_individual + categorical_individual) * k
78-
recall1 = count_individual2 / count_individual1
89+
count_individual1 = (subject_individual + categorical_individual)
90+
if compare == compare1:
91+
count_individual1 = count_individual
92+
precision1 = count_individual / count_individual1
93+
print("Precision: ", precision1)
94+
count_individual2 = categorical_individual + subject_individual
95+
recall1 = count_individual1 / count_individual2
7996
f11 = (2 * precision1 * recall1) / (precision1 + recall1)
97+
8098
print("Recall: ", recall1)
8199
print("F1: ", f11)
100+
print()
101+
return precision, recall, f1, precision1, recall1, f11

input.csv

Lines changed: 0 additions & 4 deletions
This file was deleted.

main.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -168,18 +168,17 @@ def folder_owl(name):
168168
path_in = name
169169
path = [path_in]
170170
# path = ['fj:\\test']
171+
precision, recall, f11, precision1, recall1, f111 = 0, 0, 0, 0, 0, 0
172+
counter = 0
171173
for el in path:
172174
if os.path.exists(el):
173-
print('Такой путь существует: ', el)
174175
for dirs, folder, files in os.walk(el):
175176
for awhile in files:
176177
if check_path_ent(awhile) == 1 and awhile[len(awhile) - 5:len(awhile)] != '.json':
177178
continue
178179
else:
179180
cl = dirs + '/json'
180-
if os.path.exists(cl):
181-
print('Такой путь существует: ', cl)
182-
else:
181+
if not os.path.exists(cl):
183182
os.mkdir(cl)
184183
csv_path = dirs + '/' + awhile
185184
if awhile[len(awhile) - 5:len(awhile)] != '.json':
@@ -196,14 +195,10 @@ def folder_owl(name):
196195
open_json_file(json_path, rows)
197196
shutil.copyfile(json_path, cl + '/' + json_path)
198197
oa = cl + '/owl'
199-
if os.path.exists(oa):
200-
print('Такой путь существует: ', cl)
201-
else:
198+
if not os.path.exists(oa):
202199
os.mkdir(oa)
203200
cl = cl + '/jsondocs'
204-
if os.path.exists(cl):
205-
print('Такой путь существует: ', cl)
206-
else:
201+
if not os.path.exists(cl):
207202
os.mkdir(cl)
208203
json_path1 = json_path[0:len(json_path) - 5] + '1' + '.json'
209204
json_path2 = json_path[0:len(json_path) - 5] + '2' + '.json'
@@ -229,8 +224,16 @@ def folder_owl(name):
229224
dictionary3)
230225
with open(owl_path, "w", encoding='utf-8') as my_file:
231226
my_file.write(new_string)
232-
f_measure_identidier.f_measure(json_path, json_path3, json_path4, owl_path)
233-
statistics_writer.write_statistic(owl_path)
227+
count1, count2, count3, count4, count5, count6 = f_measure_identidier.f_measure(json_path, json_path4, owl_path)
228+
precision += count1
229+
recall += count2
230+
f11 = f11 + count3
231+
precision1 += count4
232+
recall1 += count5
233+
f111 += count6
234+
counter += 1
235+
statistics_writer.write_statistic(owl_path, count1,
236+
count2, count3, count4, count5, count6)
234237
shutil.copyfile(json_path1, cl + '/' + json_path1)
235238
shutil.copyfile(json_path2, cl + '/' + json_path2)
236239
shutil.copyfile(json_path3, cl + '/' + json_path3)
@@ -243,8 +246,8 @@ def folder_owl(name):
243246
os.remove(json_path4)
244247
os.remove(owl_path)
245248
new_file, new_file1 = unifer_crietor.unifier(path_in)
246-
print()
247-
statistic = statistics_writer.write_statistic(new_file)
249+
statistic = statistics_writer.write_statistic(new_file, precision,
250+
recall, f11, precision1, recall1, f111, counter)
248251
shutil.copyfile(new_file, path_in + '/json' + '/' + new_file)
249252
shutil.copyfile(statistic, path_in + '/json' + '/' + statistic)
250253
os.remove(statistic)

ontology_creator.py

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -236,18 +236,11 @@ def create_ontology(json_path, json_path1, json_path4, dictionary3):
236236
j = 0
237237
entity_name = set()
238238
subject_name = ""
239-
list1 = []
240239
for str_json in text:
241240
i = i + 1
242241
for obj_json in str_json:
243-
j = 0
244-
while j < len(text1):
245-
if text1[j][obj_json] == "SUBJECT":
246-
subject_name = obj_json
247-
list1.append((text[i][obj_json].replace(" ", "")).replace("_", ""))
248-
list1.sort()
249-
j += 1
250-
i = -1
242+
if text1[0][obj_json] == "SUBJECT":
243+
subject_name = obj_json
251244
for str_json in text:
252245
i = i + 1
253246
for obj_json in str_json:
@@ -300,14 +293,15 @@ def create_ontology(json_path, json_path1, json_path4, dictionary3):
300293
ont_data_prop.extend(lst)
301294
i = -1
302295
entity_name = set()
296+
k = 1
303297
for str_json in text:
304298
i = i + 1
305299
for obj_json in str_json:
306300
length_set = len(entity_name)
307301
entity_name.add((text[i][obj_json].replace(" ", "")).replace("_", ""))
308302
if (length_set == len(entity_name)) and (
309-
text1[0][obj_json] == "CATEGORICAL"):
310-
text[i][obj_json] = text[i][obj_json] + '(' + str(1) + ')'
303+
text1[0][obj_json] == "CATEGORICAL" or text1[0][obj_json] == "SUBJECT"):
304+
text[i][obj_json] = text[i][obj_json] + str(k)
311305
entity_name = set()
312306
i = -1
313307
j = 0
@@ -329,30 +323,25 @@ def create_ontology(json_path, json_path1, json_path4, dictionary3):
329323
entity_name = set()
330324
j = 0
331325
dictionary2 = {}
332-
k = 1
326+
k = 0
333327
for str_json in text:
334328
i = i + 1
335329
length_set = len(entity_name)
336-
entity_name.add((list1[i].replace(" ", "")).replace("_", ""))
330+
entity_name.add((text[i][subject_name].replace(" ", "")).replace("_", ""))
337331
if length_set == len(entity_name):
338-
list1[i] = list1[i] + str(k)
339-
subject_value = list1[i]
340-
dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
341-
k = k + 1
332+
continue
342333
else:
343-
k = 1
344-
subject_value = list1[i]
334+
subject_value = text[i][subject_name]
345335
dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
346336
for obj_json in str_json:
347337
length_set = len(entity_name)
348338
if text1[0][obj_json] == "SUBJECT":
349-
entity_name.add((list1[i].replace(" ", "")).replace("_", ""))
339+
entity_name.add((text[i][obj_json].replace(" ", "")).replace("_", ""))
350340
if length_set == len(entity_name):
351-
continue
352-
else:
353-
subject_name = obj_json
354-
subject_value = list1[i]
355-
dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
341+
k = k + 1
342+
subject_name = obj_json
343+
subject_value = text[i][obj_json] + str(k)
344+
dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
356345
if text1[0][obj_json] == "CATEGORICAL":
357346
categorical_obj_name = obj_json
358347
categorical_obj_value = text[i][obj_json]

statistics_writer.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
def write_statistic(owl_path):
1+
def write_statistic(owl_path, precision, recall, f11, precision1,
2+
recall1, f111, counter = None):
23
text = "statistic.txt"
34
with open(owl_path, 'r', encoding='utf-8') as f:
45
count_individual = 0
@@ -15,12 +16,38 @@ def write_statistic(owl_path):
1516
if line.find("<owl:DatatypeProperty") != -1:
1617
count_datatype_prop = count_datatype_prop + 1
1718
with open(text, 'a', encoding='utf-8') as f:
19+
print(owl_path)
1820
print("Number of classes: ", count_class)
1921
print("Number of object properties: ", count_object_prop)
2022
print("Number of datatype properties: ", count_datatype_prop)
2123
print("Number of Named individuals: ", count_individual)
24+
print()
25+
f.write(owl_path + "\n")
2226
f.write("Number of classes: " + str(count_class) + "\n")
2327
f.write("Number of object properties: " + str(count_object_prop) + "\n")
2428
f.write("Number of datatype properties: " + str(count_datatype_prop) + "\n")
2529
f.write("Number of Named individuals: " + str(count_individual) + "\n" + "\n")
30+
if owl_path == "new_file1.owl":
31+
print()
32+
print("Total:")
33+
print("Precision ", precision/counter)
34+
print("Recall ", recall / counter)
35+
print("F1 ", f11 / counter)
36+
print()
37+
print("Precision ", precision1 / counter)
38+
print("Recall ", recall1 / counter)
39+
print("F1 ", f111 / counter)
40+
f.write("Total:" + "\n" + "Precision " + str(precision/counter) + "\n" + "Recall " +
41+
str(recall / counter) + "\n" + "F1 " + str(f11 / counter) + "\n" + "Precision " +
42+
str(precision1 / counter) + "\n" + "Recall " + str(recall1 / counter) + "\n" + "F1 " +
43+
str(f111 / counter) + "\n")
44+
else:
45+
f.write("Accuracy for ontology schema:" + "\n")
46+
f.write("Precision " + str(precision) + "\n")
47+
f.write("Recall " + str(recall) + "\n")
48+
f.write("F1 " + str(f11) + "\n")
49+
f.write("Accuracy for named individuals:" + "\n")
50+
f.write("Precision " + str(precision1) + "\n")
51+
f.write("Recall " + str(recall1) + "\n")
52+
f.write("F1 " + str(f111) + "\n")
2653
return text

test.owl

Lines changed: 0 additions & 28 deletions
This file was deleted.

0 commit comments

Comments
 (0)