From 6dc3a70dbf8d5a35539fd3bee1c0e38edd3161d7 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:40:41 +0530 Subject: [PATCH 01/11] matching template till the end in ptwiki.py --- articlequality/feature_lists/ptwiki.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/articlequality/feature_lists/ptwiki.py b/articlequality/feature_lists/ptwiki.py index 6983cca..b208cd3 100644 --- a/articlequality/feature_lists/ptwiki.py +++ b/articlequality/feature_lists/ptwiki.py @@ -15,7 +15,7 @@ # Templates infobox_templates = wikitext.revision.template_names_matching( - r"(Info|Infobox)", name="ptwiki.revision.infobox_templates") + r"(Info|Infobox)$", name="ptwiki.revision.infobox_templates") CN_TEMPLATES = [ r"Carece[ _]de[ _]fontes", r"Carece[ _]de[ _]fontes2", @@ -23,7 +23,7 @@ r"Carece[ _]de[ _]fontes/bloco2" ] cn_templates = wikitext.revision.template_names_matching( - "|".join(CN_TEMPLATES), name="ptwiki.revision.cn_templates") + "$|".join(CN_TEMPLATES)+"$", name="ptwiki.revision.cn_templates") MAIN_TEMPLATES = [ r"Artigo[ _]principal", r"Ver[ _]artigo[ _]principal", @@ -36,7 +36,7 @@ r"AP", r"Details", r"Ver[ _]artigo" ] main_article_templates = wikitext.revision.template_names_matching( - "|".join(MAIN_TEMPLATES), name="ptwiki.main_article_templates") + "$|".join(MAIN_TEMPLATES)+"$", name="ptwiki.main_article_templates") CITE_TEMPLATES = [ r"Cite", r"Citar", @@ -50,9 +50,9 @@ r"Harvp" ] cite_templates = wikitext.revision.template_names_matching( - "|".join(CITE_TEMPLATES), name="ptwiki.revision.cite_templates") + "$|".join(CITE_TEMPLATES)+"$", name="ptwiki.revision.cite_templates") shortened_footnote_templates = wikitext.revision.template_names_matching( - r"sfn", name="ptwiki.revision.shortened_footnote_templates") + r"sfn$", name="ptwiki.revision.shortened_footnote_templates") all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags all_cite_templates = cite_templates + shortened_footnote_templates proportion_of_templated_references = \ @@ -71,12 +71,19 @@ r"(File|Ficheiro|Arquivo|Imagem?)\s*\:", name="ptwiki.revision.image_links") +IMG_TEMPLATES = [ + r"Scalable[ _]image", + r"Panorama", + r"Imagem[ _]vertical", + r"Panorama", + r"Panorama 2" +] + image_templates = wikitext.revision.template_names_matching( - r"(Scalable[ _]image|Panorama|Imagem[ _]vertical|Panorama|Panorama 2)", - name='ptwiki.revision.image_template') + "$|".join(IMG_TEMPLATES)+"$", name='ptwiki.revision.image_template') side_by_side_image_templates = wikitext.revision.template_names_matching( - r"Imagem[ _]dupla", + r"Imagem[ _]dupla$", name='ptwiki.revision.side_by_side_image_templates') From 8f5bde65ab7e12fae113a05b63f271d7e8ae3745 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:41:54 +0530 Subject: [PATCH 02/11] matching template till the end in enwiki.py --- articlequality/feature_lists/enwiki.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/articlequality/feature_lists/enwiki.py b/articlequality/feature_lists/enwiki.py index f536621..4412ae1 100644 --- a/articlequality/feature_lists/enwiki.py +++ b/articlequality/feature_lists/enwiki.py @@ -15,18 +15,18 @@ # Templates infobox_templates = wikitext.revision.template_names_matching( - r"infobox", name="enwiki.revision.infobox_templates") + r"infobox$", name="enwiki.revision.infobox_templates") CN_TEMPLATES = [ r"Citation[_ ]needed", r"Cn", r"Fact" ] cn_templates = wikitext.revision.template_names_matching( - "|".join(CN_TEMPLATES), name="enwiki.revision.cn_templates") + "$|".join(CN_TEMPLATES)+"$", name="enwiki.revision.cn_templates") who_templates = wikitext.revision.template_names_matching( - "Who", name="enwiki.revision.who_templates") + "Who$", name="enwiki.revision.who_templates") main_article_templates = wikitext.revision.template_names_matching( - "Main", name="enwiki.main_article_templates") + "Main$", name="enwiki.main_article_templates") CITE_TEMPLATES = [ r"Cite", r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb", @@ -39,7 +39,7 @@ r"Harvp" ] cite_templates = wikitext.revision.template_names_matching( - "|".join(CITE_TEMPLATES), name="enwiki.revision.cite_templates") + "$|".join(CITE_TEMPLATES)+"$", name="enwiki.revision.cite_templates") SFN_TEMPLATES = [ r"Shortened footnote template", r"sfn", r"Sfnp", @@ -47,7 +47,7 @@ r"Sfnmp" ] shortened_footnote_templates = wikitext.revision.template_names_matching( - "|".join(SFN_TEMPLATES), + "$|".join(SFN_TEMPLATES)+"$", name="enwiki.revision.shortened_footnote_templates") all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags all_cite_templates = cite_templates + shortened_footnote_templates @@ -67,7 +67,7 @@ r"File|Image\:", name="enwiki.revision.image_links") image_templates = wikitext.revision.template_names_matching( - r"((Wide|Tall|scalable) image)|Panorama|Panorama 2", + r"((Wide$|Tall$|scalable) image)$|Panorama$|Panorama 2$", name='enwiki.revision.image_template') From 80e7d1b1d3d580f53c7e476e32d8276c6a35e1d5 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:42:44 +0530 Subject: [PATCH 03/11] matching template regex till the end in euwiki.py --- articlequality/feature_lists/euwiki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/articlequality/feature_lists/euwiki.py b/articlequality/feature_lists/euwiki.py index 966a62d..ac2a149 100644 --- a/articlequality/feature_lists/euwiki.py +++ b/articlequality/feature_lists/euwiki.py @@ -17,7 +17,7 @@ r"[\w\s_]*infotaula[ _]automatikoa$", name="euwiki.revision.infobox_templates") cn_templates = wikitext.revision.template_names_matching( - r"erref[ _]behar", name="euwiki.revision.cn_templates") + r"erref[ _]behar$", name="euwiki.revision.cn_templates") # Links # Excluding category_links based on https://phabricator.wikimedia.org/T240467 From 86bd1716eaf7ad0c489dd44b64b7c99586b5c887 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:47:18 +0530 Subject: [PATCH 04/11] matching template regex till the end in fawiki.py --- articlequality/feature_lists/fawiki.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/articlequality/feature_lists/fawiki.py b/articlequality/feature_lists/fawiki.py index a525f0e..3b068a4 100644 --- a/articlequality/feature_lists/fawiki.py +++ b/articlequality/feature_lists/fawiki.py @@ -13,7 +13,7 @@ # Templates infobox_templates = wikitext.revision.template_names_matching( - r"infobox|جعبه", name="fawiki.revision.infobox_templates") + r"infobox$|جعبه$", name="fawiki.revision.infobox_templates") CN_TEMPLATES = [ r"Citation[_ ]needed", r"Cn", @@ -21,13 +21,13 @@ r"مدرک" ] cn_templates = wikitext.revision.template_names_matching( - "|".join(CN_TEMPLATES), name="fawiki.revision.cn_templates") + "$|".join(CN_TEMPLATES)+"$", name="fawiki.revision.cn_templates") who_templates = wikitext.revision.template_names_matching( "Who|چه کسی|چه‌کسی", name="fawiki.revision.who_templates") main_article_templates = wikitext.revision.template_names_matching( "Main|اصلی", name="fawiki.main_article_templates") cite_templates = wikitext.revision.template_names_matching( - r"cite|یادکرد", name="fawiki.revision.cite_templates") + r"cite$|یادکرد$", name="fawiki.revision.cite_templates") proportion_of_templated_references = \ cite_templates / max(wikitext.revision.ref_tags, 1) non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0) From bc4ffd3dd80fe7de0f444567e5db3a7794f0e639 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:48:27 +0530 Subject: [PATCH 05/11] matching template regex till the end in frwiki.py --- articlequality/feature_lists/frwiki.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/articlequality/feature_lists/frwiki.py b/articlequality/feature_lists/frwiki.py index eb4ce8a..f61dcab 100644 --- a/articlequality/feature_lists/frwiki.py +++ b/articlequality/feature_lists/frwiki.py @@ -39,37 +39,37 @@ r"Référence[ _]à[ _]confirmer", r"Référence[ _]nécessaire", r"Inédit"] lvl1_cn_templates = wikitext.revision.template_names_matching( - "|".join(LVL1_CN_TEMPLATES), + "$|".join(LVL1_CN_TEMPLATES)+"$", name="frwiki.revision.lvl1_cn_templates") LVL2_CN_TEMPLATES = [r"Référence[ _]insuffisante", r"Référence[ _]incomplète", r"Détournement[ _]de[ _]sources", r"Section[ _]à[ _]sourcer"] lvl2_cn_templates = wikitext.revision.template_names_matching( - "|".join(LVL2_CN_TEMPLATES), + "$|".join(LVL2_CN_TEMPLATES)+"$", name="frwiki.revision.lvl2_cn_templates") LVL3_CN_TEMPLATES = [r"Sources[ _]à[ _]lier", r"Sources[ _]obsolètes", r"Référence[ _]obsolète", r"À[ _]sourcer", r"Sources[ _]secondaires", r"BPV[ _]à[ _]sourcer"] lvl3_cn_templates = wikitext.revision.template_names_matching( - "|".join(LVL3_CN_TEMPLATES), + "$|".join(LVL3_CN_TEMPLATES)+"$", name="frwiki.revision.lvl3_cn_templates") LVL4_CN_TEMPLATES = [r"À[ _]prouver", r"Faut[ _]sourcer"] lvl4_cn_templates = wikitext.revision.template_names_matching( - "|".join(LVL4_CN_TEMPLATES), + "$|".join(LVL4_CN_TEMPLATES)+"$", name="frwiki.revision.lvl4_cn_templates") LVL5_CN_TEMPLATES = [r"À[ _]vérifier", r"Vérifiabilité"] lvl5_cn_templates = wikitext.revision.template_names_matching( - "|".join(LVL5_CN_TEMPLATES), + "$|".join(LVL5_CN_TEMPLATES)+"$", name="frwiki.revision.lvl5_cn_templates") main_article_templates = wikitext.revision.template_names_matching( - r"Article[ _](principal|détaillé)", + r"Article[ _](principal$|détaillé)$", name="frwiki.main_article_templates") date_templates = wikitext.revision.template_names_matching( - r"date", + r"date$", name="frwiki.revision.date_templates") # Links From 13440e8f8aa30fb7497b3b1d037861eb6e49c8f3 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:51:06 +0530 Subject: [PATCH 06/11] minor fix in frwiki.py --- articlequality/feature_lists/frwiki.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/articlequality/feature_lists/frwiki.py b/articlequality/feature_lists/frwiki.py index f61dcab..bf29066 100644 --- a/articlequality/feature_lists/frwiki.py +++ b/articlequality/feature_lists/frwiki.py @@ -20,7 +20,7 @@ r"Ouvrage" ] cite_templates = wikitext.revision.template_names_matching( - "|".join(CITE_TEMPLATES), + "$|".join(CITE_TEMPLATES)+"$", name="frwiki.revision.cite_templates") proportion_of_templated_references = \ cite_templates / max(wikitext.revision.ref_tags, 1) @@ -30,7 +30,7 @@ name="frwiki.revision.non_cite_templates" ) infobox_templates = wikitext.revision.template_names_matching( - r"^infobox", + r"^infobox$", name="frwiki.revision.infobox_templates") # Copied (2015-10-29) from: From c6b867bd119ee8a71d1915ba74a4308a6855ed24 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:55:37 +0530 Subject: [PATCH 07/11] matching template regex till the end in glwiki.py --- articlequality/feature_lists/glwiki.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/articlequality/feature_lists/glwiki.py b/articlequality/feature_lists/glwiki.py index 057b292..ca77387 100644 --- a/articlequality/feature_lists/glwiki.py +++ b/articlequality/feature_lists/glwiki.py @@ -15,10 +15,12 @@ # Infoboxes: # https://gl.wikipedia.org/wiki/Categor%C3%ADa:Caixas_de_informaci%C3%B3n # They don't have a common naming scheme. + + cn_templates = wikitext.revision.template_names_matching( - r"cómpre[ _]referencia|cando|quen|clarificar|" + - r"sen[ _]referencias|cómpre[ _]páxina|" + - r"verificar[ _]credibilidade", name="glwiki.revision.cn_templates") + r"cómpre[ _]referencia$|cando$|quen$|clarificar$|" + + r"sen[ _]referencias$|cómpre[ _]páxina$|" + + r"verificar[ _]credibilidade$", name="glwiki.revision.cn_templates") # Links category_links = wikitext.revision.wikilink_titles_matching( From eabc505e838e8ed0ffd4098921b812ed7d9d8b41 Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:57:06 +0530 Subject: [PATCH 08/11] matching template regex till the end in nuwiki.py --- articlequality/feature_lists/ruwiki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/articlequality/feature_lists/ruwiki.py b/articlequality/feature_lists/ruwiki.py index b107187..1ea6a2c 100644 --- a/articlequality/feature_lists/ruwiki.py +++ b/articlequality/feature_lists/ruwiki.py @@ -10,7 +10,7 @@ from . import wikipedia cn_templates = wikitext.revision.template_names_matching( - r"Нет[ _]АИ", + r"Нет[ _]АИ$", name="ruwiki.revision.cn_templates") # Links From 4653ef6ac529ab2ebeeb7c44e6a1f29086c5b3de Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:57:28 +0530 Subject: [PATCH 09/11] matching template regex till the end in svwiki.py --- articlequality/feature_lists/svwiki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/articlequality/feature_lists/svwiki.py b/articlequality/feature_lists/svwiki.py index 873963f..5a97472 100644 --- a/articlequality/feature_lists/svwiki.py +++ b/articlequality/feature_lists/svwiki.py @@ -10,7 +10,7 @@ from . import wikipedia cn_templates = wikitext.revision.template_names_matching( - r"Källa[ _]behövs|Kb", + r"Källa[ _]behövs$|Kb$", name="svwiki.revision.cn_templates") # Links From 19cc5f294f417e5625af3173aac576bcede7898d Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 00:58:13 +0530 Subject: [PATCH 10/11] matching template regex till the end in trwiki.py --- articlequality/feature_lists/trwiki.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/articlequality/feature_lists/trwiki.py b/articlequality/feature_lists/trwiki.py index 19780cb..942d600 100644 --- a/articlequality/feature_lists/trwiki.py +++ b/articlequality/feature_lists/trwiki.py @@ -9,7 +9,7 @@ from . import wikipedia cite_templates = wikitext.revision.template_names_matching( - r"Kaynak|.*[ _]kaynağı", + r"Kaynak$|.*[ _]kaynağı$", name="trwiki.revision.cite_templates") proportion_of_templated_references = \ cite_templates / max(wikitext.revision.ref_tags, 1) @@ -19,17 +19,17 @@ name="trwiki.revision.non_cite_templates" ) infobox_templates = wikitext.revision.template_names_matching( - r".*[ _]bilgi[ _]kutusu", + r".*[ _]bilgi[ _]kutusu$", name="trwiki.revision.infobox_templates") # Copied (2015-10-29) from: # https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Citez_vos_sources#R.C3.A9clamation_et_contestation_de_sources cn_templates = wikitext.revision.template_names_matching( - r"Kaynak[ _]belirt|Olgu|Fact|Delil", + r"Kaynak[ _]belirt$|Olgu$|Fact$|Delil$", name="trwiki.revision.lvl1_cn_templates") main_article_templates = wikitext.revision.template_names_matching( - r"Ana|Anamadde", + r"Ana$|Anamadde$", name="trwiki.main_article_templates") # Links From ab2352d82e6fa56a2ad5805c0866a70533d10cde Mon Sep 17 00:00:00 2001 From: Sahethi Date: Sat, 22 Jan 2022 01:00:11 +0530 Subject: [PATCH 11/11] matching template regex till the end in ukwiki.py --- articlequality/feature_lists/ukwiki.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/articlequality/feature_lists/ukwiki.py b/articlequality/feature_lists/ukwiki.py index 804dacd..413a416 100644 --- a/articlequality/feature_lists/ukwiki.py +++ b/articlequality/feature_lists/ukwiki.py @@ -19,13 +19,13 @@ r"Fact" ] cn_templates = wikitext.revision.template_names_matching( - "|".join(CN_TEMPLATES), name="ukwiki.revision.cn_templates") + "$|".join(CN_TEMPLATES)+"$", name="ukwiki.revision.cn_templates") MAIN_TEMPLATES = [ r"Main", r"Докладніше" ] main_article_templates = wikitext.revision.template_names_matching( - "|".join(MAIN_TEMPLATES), name="ukwiki.revision.main_article_templates") + "$|".join(MAIN_TEMPLATES)+"$", name="ukwiki.revision.main_article_templates") CITE_TEMPLATES = [ r"Cite", r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb", @@ -39,7 +39,7 @@ r"Citation" ] cite_templates = wikitext.revision.template_names_matching( - "|".join(CITE_TEMPLATES), name="ukwiki.revision.cite_templates") + "$|".join(CITE_TEMPLATES)+"$", name="ukwiki.revision.cite_templates") shortened_footnote_templates = wikitext.revision.template_names_matching( "sfn", name="ukwiki.revision.shortened_footnote_templates") all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags @@ -72,7 +72,7 @@ "|".join(IMAGE_LINKS), name="ukwiki.revision.image_links") image_templates = wikitext.revision.template_names_matching( - r"((Wide|Tall|scalable) image)|Panorama|Panorama 2", + r"((Wide$|Tall$|scalable) image)$|Panorama$|Panorama 2$", name="ukwiki.revision.image_template")