From d6c775d159a1adfd8e4938c5935921675951546f Mon Sep 17 00:00:00 2001 From: Vaclav Haisman Date: Thu, 29 Jun 2017 22:11:05 +0200 Subject: [PATCH 1/2] Fix parsing of entities with `itemscope` and unanchored `itemprop`. This patch makes the parser accept input where `itemscope` is on top level item but there is also `itemprop` on the same tag that is not anchored by higher level `itemscope`. --- lib/HTML/Microdata.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/HTML/Microdata.pm b/lib/HTML/Microdata.pm index e6bc826..430f28d 100644 --- a/lib/HTML/Microdata.pm +++ b/lib/HTML/Microdata.pm @@ -62,7 +62,7 @@ sub _parse { $items->{ $scope->id } = $item; - unless ($scope->attr('itemprop')) { + unless (scalar @{$scope->findnodes('./ancestor::*[@itemscope]')}) { # This is top level item push @{ $self->{items} }, $item; } @@ -87,6 +87,7 @@ sub _parse { for my $prop (@$props) { my $value = $self->extract_value($prop, items => $items); my $scope = $prop->findnodes('./ancestor::*[@itemscope]')->[-1]; + next if ! defined $scope; for my $name (split /\s+/, $prop->attr('itemprop')) { $items->{ $scope->id }->{properties}->add($name => $value); } From de993f4554dfbe7a45ba01f822cbfe6f98a8f669 Mon Sep 17 00:00:00 2001 From: Vaclav Haisman Date: Sat, 1 Jul 2017 01:48:17 +0200 Subject: [PATCH 2/2] Partially revert previous change to top level item identification. --- lib/HTML/Microdata.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/HTML/Microdata.pm b/lib/HTML/Microdata.pm index 430f28d..fe5f98d 100644 --- a/lib/HTML/Microdata.pm +++ b/lib/HTML/Microdata.pm @@ -62,7 +62,8 @@ sub _parse { $items->{ $scope->id } = $item; - unless (scalar @{$scope->findnodes('./ancestor::*[@itemscope]')}) { + if (!scalar @{$scope->findnodes('./ancestor::*[@itemscope]')} + || !$scope->attr('itemprop')) { # This is top level item push @{ $self->{items} }, $item; }