diff --git a/docling_eval/dataset_builders/cvat_dataset_builder.py b/docling_eval/dataset_builders/cvat_dataset_builder.py index 0ca8d51c..d7d4d08c 100644 --- a/docling_eval/dataset_builders/cvat_dataset_builder.py +++ b/docling_eval/dataset_builders/cvat_dataset_builder.py @@ -379,7 +379,188 @@ def parse_annotation(self, image_annot: Dict) -> Tuple: merges, group, ) + + def _validate_annotations_consistency(self, + boxes, + lines, + reading_order, + to_captions, + to_footnotes, + to_values, + merges, + groups) -> bool: + """Validate annotations consistency.""" + + def _validate_to_captions(): + """validate to captions. + + - Make sure the first bbox is a FloatingItem and the subsequent + are of type caption. + - Make sure that the to_caption only starts at the beginning of + a group, if a group is present. + """ + for to_caption in to_captions: + + boxids = to_caption["boxids"] + if len(boxids)!=2: + _log.error(f"to_caption is longer than 2: {len(to_caption['boxids'])}") + return False + + box_source = boxes[boxids[0]] + box_target = boxes[boxids[1]] + + label_source = DocItemLabel(box_source["@label"]) + label_target = DocItemLabel(box_target["@label"]) + + if label_source not in [ + DocItemLabel.PICTURE, + DocItemLabel.TABLE, + DocItemLabel.CODE, + DocItemLabel.FORM, + ]: + _log.error(f"to_caption label-source: {label_source}") + return False + + if label_target!=DocItemLabel.CAPTION: + _log.error(f"to_caption label-target: {label_target}") + return False + + return True + + def _validate_to_footnotes(): + """validate to footnotes. + + - Make sure the first bbox is a FloatingItem and the subsequent + are of type footnote. + - Make sure that the to_footnote only starts at the beginning of + a group, if a group is present. + """ + for to_footnote in to_footnotes: + + boxids = to_footnote["boxids"] + if len(boxids)!=2: + _log.error(f"to_footnote is longer than 2: {len(to_footnote['boxids'])}") + return False + + box_source = boxes[boxids[0]] + box_target = boxes[boxids[1]] + + label_source = DocItemLabel(box_source["@label"]) + label_target = DocItemLabel(box_target["@label"]) + + if label_source not in [ + DocItemLabel.PICTURE, + DocItemLabel.TABLE, + DocItemLabel.CODE, + DocItemLabel.FORM, + ]: + _log.error(f"to_footnote label-source: {label_source}") + return False + + if label_target!=DocItemLabel.FOOTNOTE: + _log.error(f"to_footnote label-target: {label_target}") + return False + + return True + + def _validate_captions(): + """validate captions. + + Make sure each caption either is the start of a to_caption or + has a merge node. + """ + caption_ids = [] + for to_caption in to_captions: + boxids = to_caption["boxids"] + if len(boxids)!=2: + _log.error(f"to_caption is longer than 2: {len(to_caption['boxids'])}") + return False + + box_target = boxes[boxids[1]] + label_target = DocItemLabel(box_target["@label"]) + + if label_target==DocItemLabel.CAPTION: + caption_ids.append(boxids[1]) + + for i,box in enumerate(boxes): + label = DocItemLabel(box_source["@label"]) + + caption_is_linked = False + if label == DocItemLabel.CAPTION: + if i not in caption_ids: + caption_is_linked = True + + if not caption_is_linked: + _log.error(f"caption {box} is not linked to anything") + return False + + return True + + def _validate_list_items(): + """validate list items. + + Make sure every list-item has a group or merge point. + """ + for i,box in enumerate(boxes): + label = DocItemLabel(box_source["@label"]) + + listitem_is_grouped = True + if label == DocItemLabel.LIST_ITEM: + for group in groups: + if i in group["boxids"]: + listitem_is_grouped = True + + listitem_is_merged = True + if label == DocItemLabel.LIST_ITEM: + for merge in merges: + if i in merge["boxids"]: + listitem_is_merged = True + + if (not listitem_is_linked) or (not listitem_is_merged): + _log.error(f"listitem {box} is not grouped or merged.") + return False + + return True + + def _validate_reading_order(): + """validate reading-order. + + Make sure that every bbox has a reading-order point or is + entirely overlapped with a bbox that has a reading-order. + """ + return True + + def _validate_merges(): + """validate merges. + + Make sure that all bbox in a merge are of same type + """ + return True + + def _validate_group(): + """validate groups. + Make sure that all bbox in a group are of same type + """ + return True + + def _validate_to_values_links(): + """validate to_value links. + + Make sure that all to_value links are between + """ + return True + + + return ( + _validate_to_captions() and + _validate_to_footnotes() and + _validate_reading_order() and + _validate_merges() and + _validate_group() and + __validate_list_items() + ) + def create_prov( self, box: Dict, @@ -895,6 +1076,17 @@ def create_true_document( _log.error(f"Incorrect annotation for {basename}: no reading order found") return None + if self._validate_annotations_consistency(boxes=boxes, + lines=lines, + reading_order=reading_order, + to_captions=to_captions, + to_footnotes=to_footnotes, + to_values=to_values, + merges=merges, + group=group): + _log.error(f"Inconsistent annotation for {basename}: skipping") + return None + # Original Groundtruth and Prediction files orig_file = desc.document_file