Merge pull request #4 from Lab42-Team/dev

LedZeppe1in · web-flow · commit 4266126eae8c · 2021-10-14T15:05:07.000+08:00
First version of Ontogen
diff --git a/f_measure_identidier.py b/f_measure_identidier.py
@@ -1,7 +1,7 @@
 import json
 
 
-def f_measure(json_path, json_path3, json_path4, owl_path):
+def f_measure(json_path, json_path4, owl_path):
     with open(json_path, 'r', encoding='utf-8') as f:
         text = json.load(f)
         k = 0
@@ -26,6 +26,7 @@ def f_measure(json_path, json_path3, json_path4, owl_path):
                 if text[i][obj_json] == "LITERAL":
                     count = count + 1
                     literal = literal + 1
+    compare = subject + categorical
     count1 = 0
     count2 = 0
     flag_object = 0
@@ -35,6 +36,7 @@ def f_measure(json_path, json_path3, json_path4, owl_path):
                     "<owl:DatatypeProperty") != -1:
                 count1 = count1 + 1
     with open(owl_path, 'r', encoding='utf-8') as f:
+        count_obj = 0
         for line in f:
             if line.find("<owl:NamedIndividual") != -1:
                 flag_object = 1
@@ -43,39 +45,57 @@ def f_measure(json_path, json_path3, json_path4, owl_path):
             if line.find("</owl:NamedIndividual") != -1 and count2 != 3:
                 break
             if line.find("</owl:NamedIndividual") != -1 and count2 == 3:
+                count_obj = count_obj + 1
                 count2 = 0
                 flag_object = 0
+    compare1 = count2 - 3
     count2 = count2 - 2 + categorical
     precision = count2 / count
     recall = count2 / count1
     f1 = (2 * precision * recall) / (precision + recall)
+    print()
     print(json_path)
+    print()
     print("Precision: ", precision)
     print("Recall: ", recall)
     print("F1: ", f1)
+    print()
 
     count_individual = 0
     with open(owl_path, 'r', encoding='utf-8') as f:
         for line in f:
             if line.find("<owl:NamedIndividual") != -1:
                 count_individual = count_individual + 1
-    count_individual1 = count2 + categorical
-    precision1 = count_individual / count_individual1
-    print("Precision: ", precision1)
-    with open(json_path3, 'r', encoding='utf-8') as f:
+    with open(json_path4, 'r', encoding='utf-8') as f, open(json_path, 'r', encoding='utf-8') as fs:
         text = json.load(f)
+        text1 = json.load(fs)
         i = 0
         categorical_individual = 0
         subject_individual = 0
-        for str_json in text:
+        entity_name = set()
+        for str_json in text1:
             for obj_json in str_json:
-                if text[i][obj_json] == "CATEGORICAL":
-                    categorical_individual = categorical_individual + 1
-                if text[i][obj_json] == "SUBJECT":
+                length_set = len(entity_name)
+                if text[0][obj_json] == "CATEGORICAL":
+                    entity_name.add(text1[i][obj_json])
+                    if length_set == len(entity_name):
+                        continue
+                    else:
+                        categorical_individual = categorical_individual + 1
+                if text[0][obj_json] == "SUBJECT":
                     subject_individual = subject_individual + 1
+
             i = i + 1
-    count_individual2 = (subject_individual + categorical_individual) * k
-    recall1 = count_individual2 / count_individual1
+    count_individual1 = (subject_individual + categorical_individual)
+    if compare == compare1:
+        count_individual1 = count_individual
+    precision1 = count_individual / count_individual1
+    print("Precision: ", precision1)
+    count_individual2 = categorical_individual + subject_individual
+    recall1 = count_individual1 / count_individual2
     f11 = (2 * precision1 * recall1) / (precision1 + recall1)
+
     print("Recall: ", recall1)
     print("F1: ", f11)
+    print()
+    return precision, recall, f1, precision1, recall1, f11
diff --git a/input.csv b/input.csv
diff --git a/main.py b/main.py
@@ -168,18 +168,17 @@ def folder_owl(name):
     path_in = name
     path = [path_in]
     # path = ['fj:\\test']
+    precision, recall, f11, precision1, recall1, f111 = 0, 0, 0, 0, 0, 0
+    counter = 0
     for el in path:
         if os.path.exists(el):
-            print('Такой путь существует: ', el)
             for dirs, folder, files in os.walk(el):
                 for awhile in files:
                     if check_path_ent(awhile) == 1 and awhile[len(awhile) - 5:len(awhile)] != '.json':
                         continue
                     else:
                         cl = dirs + '/json'
-                        if os.path.exists(cl):
-                            print('Такой путь существует: ', cl)
-                        else:
+                        if not os.path.exists(cl):
                             os.mkdir(cl)
                         csv_path = dirs + '/' + awhile
                         if awhile[len(awhile) - 5:len(awhile)] != '.json':
@@ -196,14 +195,10 @@ def folder_owl(name):
                             open_json_file(json_path, rows)
                             shutil.copyfile(json_path, cl + '/' + json_path)
                             oa = cl + '/owl'
-                            if os.path.exists(oa):
-                                print('Такой путь существует: ', cl)
-                            else:
+                            if not os.path.exists(oa):
                                 os.mkdir(oa)
                             cl = cl + '/jsondocs'
-                            if os.path.exists(cl):
-                                print('Такой путь существует: ', cl)
-                            else:
+                            if not os.path.exists(cl):
                                 os.mkdir(cl)
                             json_path1 = json_path[0:len(json_path) - 5] + '1' + '.json'
                             json_path2 = json_path[0:len(json_path) - 5] + '2' + '.json'
@@ -229,8 +224,16 @@ def folder_owl(name):
                                                                           dictionary3)
                             with open(owl_path, "w", encoding='utf-8') as my_file:
                                 my_file.write(new_string)
-                            f_measure_identidier.f_measure(json_path, json_path3, json_path4, owl_path)
-                            statistics_writer.write_statistic(owl_path)
+                            count1, count2, count3, count4, count5, count6 = f_measure_identidier.f_measure(json_path, json_path4, owl_path)
+                            precision += count1
+                            recall += count2
+                            f11 = f11 + count3
+                            precision1 += count4
+                            recall1 += count5
+                            f111 += count6
+                            counter += 1
+                            statistics_writer.write_statistic(owl_path, count1,
+                                                              count2, count3, count4, count5, count6)
                             shutil.copyfile(json_path1, cl + '/' + json_path1)
                             shutil.copyfile(json_path2, cl + '/' + json_path2)
                             shutil.copyfile(json_path3, cl + '/' + json_path3)
@@ -243,8 +246,8 @@ def folder_owl(name):
                             os.remove(json_path4)
                             os.remove(owl_path)
             new_file, new_file1 = unifer_crietor.unifier(path_in)
-            print()
-            statistic = statistics_writer.write_statistic(new_file)
+            statistic = statistics_writer.write_statistic(new_file, precision,
+                                                          recall, f11, precision1, recall1, f111, counter)
             shutil.copyfile(new_file, path_in + '/json' + '/' + new_file)
             shutil.copyfile(statistic, path_in + '/json' + '/' + statistic)
             os.remove(statistic)
diff --git a/ontology_creator.py b/ontology_creator.py
@@ -236,18 +236,11 @@ def create_ontology(json_path, json_path1, json_path4, dictionary3):
     j = 0
     entity_name = set()
     subject_name = ""
-    list1 = []
     for str_json in text:
         i = i + 1
         for obj_json in str_json:
-            j = 0
-            while j < len(text1):
-                if text1[j][obj_json] == "SUBJECT":
-                    subject_name = obj_json
-                    list1.append((text[i][obj_json].replace(" ", "")).replace("_", ""))
-                    list1.sort()
-                j += 1
-    i = -1
+            if text1[0][obj_json] == "SUBJECT":
+                subject_name = obj_json
     for str_json in text:
         i = i + 1
         for obj_json in str_json:
@@ -300,14 +293,15 @@ def create_ontology(json_path, json_path1, json_path4, dictionary3):
                     ont_data_prop.extend(lst)
     i = -1
     entity_name = set()
+    k = 1
     for str_json in text:
         i = i + 1
         for obj_json in str_json:
             length_set = len(entity_name)
             entity_name.add((text[i][obj_json].replace(" ", "")).replace("_", ""))
             if (length_set == len(entity_name)) and (
-                    text1[0][obj_json] == "CATEGORICAL"):
-                text[i][obj_json] = text[i][obj_json] + '(' + str(1) + ')'
+                    text1[0][obj_json] == "CATEGORICAL" or text1[0][obj_json] == "SUBJECT"):
+                text[i][obj_json] = text[i][obj_json] + str(k)
     entity_name = set()
     i = -1
     j = 0
@@ -329,30 +323,25 @@ def create_ontology(json_path, json_path1, json_path4, dictionary3):
     entity_name = set()
     j = 0
     dictionary2 = {}
-    k = 1
+    k = 0
     for str_json in text:
         i = i + 1
         length_set = len(entity_name)
-        entity_name.add((list1[i].replace(" ", "")).replace("_", ""))
+        entity_name.add((text[i][subject_name].replace(" ", "")).replace("_", ""))
         if length_set == len(entity_name):
-            list1[i] = list1[i] + str(k)
-            subject_value = list1[i]
-            dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
-            k = k + 1
+            continue
         else:
-            k = 1
-            subject_value = list1[i]
+            subject_value = text[i][subject_name]
             dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
         for obj_json in str_json:
             length_set = len(entity_name)
             if text1[0][obj_json] == "SUBJECT":
-                entity_name.add((list1[i].replace(" ", "")).replace("_", ""))
+                entity_name.add((text[i][obj_json].replace(" ", "")).replace("_", ""))
                 if length_set == len(entity_name):
-                    continue
-                else:
-                    subject_name = obj_json
-                    subject_value = list1[i]
-                    dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
+                    k = k + 1
+                subject_name = obj_json
+                subject_value = text[i][obj_json] + str(k)
+                dictionary3 = create_sub_atr(subject_name.title(), subject_value, dictionary3)
             if text1[0][obj_json] == "CATEGORICAL":
                 categorical_obj_name = obj_json
                 categorical_obj_value = text[i][obj_json]
diff --git a/statistics_writer.py b/statistics_writer.py
@@ -1,4 +1,5 @@
-def write_statistic(owl_path):
+def write_statistic(owl_path, precision, recall, f11, precision1,
+                    recall1, f111, counter = None):
     text = "statistic.txt"
     with open(owl_path, 'r', encoding='utf-8') as f:
         count_individual = 0
@@ -15,12 +16,38 @@ def write_statistic(owl_path):
             if line.find("<owl:DatatypeProperty") != -1:
                 count_datatype_prop = count_datatype_prop + 1
     with open(text, 'a', encoding='utf-8') as f:
+        print(owl_path)
         print("Number of classes: ", count_class)
         print("Number of object properties: ", count_object_prop)
         print("Number of datatype properties: ", count_datatype_prop)
         print("Number of Named individuals: ", count_individual)
+        print()
+        f.write(owl_path + "\n")
         f.write("Number of classes: " + str(count_class) + "\n")
         f.write("Number of object properties: " + str(count_object_prop) + "\n")
         f.write("Number of datatype properties: " + str(count_datatype_prop) + "\n")
         f.write("Number of Named individuals: " + str(count_individual) + "\n" + "\n")
+        if owl_path == "new_file1.owl":
+            print()
+            print("Total:")
+            print("Precision ", precision/counter)
+            print("Recall ", recall / counter)
+            print("F1 ", f11 / counter)
+            print()
+            print("Precision ", precision1 / counter)
+            print("Recall ", recall1 / counter)
+            print("F1 ", f111 / counter)
+            f.write("Total:" + "\n" + "Precision " + str(precision/counter) + "\n" + "Recall " +
+                    str(recall / counter) + "\n" + "F1 " + str(f11 / counter) + "\n" + "Precision " +
+                    str(precision1 / counter) + "\n" + "Recall " + str(recall1 / counter) + "\n" + "F1 " +
+                    str(f111 / counter) + "\n")
+        else:
+            f.write("Accuracy for ontology schema:" + "\n")
+            f.write("Precision " + str(precision) + "\n")
+            f.write("Recall " + str(recall) + "\n")
+            f.write("F1 " + str(f11) + "\n")
+            f.write("Accuracy for named individuals:" + "\n")
+            f.write("Precision " + str(precision1) + "\n")
+            f.write("Recall " + str(recall1) + "\n")
+            f.write("F1 " + str(f111) + "\n")
     return text
diff --git a/test.owl b/test.owl