Skip to content

Commit d0fff94

Browse files
authored
Merge pull request #91 from mpacer/pretty_articles
Pretty articles
2 parents a2f2b75 + 1c1349f commit d0fff94

File tree

7 files changed

+91
-35
lines changed

7 files changed

+91
-35
lines changed

allofplos/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ def get_corpus_dir():
3030
# import after creating global variables that they may rely upon
3131
# (e.g., corpusdir)
3232

33-
from .article_class import Article
33+
from .article import Article
3434
from .corpus import Corpus

allofplos/article_class.py renamed to allofplos/article.py

Lines changed: 59 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99

1010
from . import get_corpus_dir
1111
from .transformations import (filename_to_doi, _get_base_page, LANDING_PAGE_SUFFIX,
12-
URL_SUFFIX, plos_page_dict, doi_url)
12+
URL_SUFFIX, plos_page_dict, doi_url, doi_to_url, doi_to_path)
1313
from .plos_regex import validate_doi
1414
from .elements import (parse_article_date, get_contrib_info,
1515
Journal, License, match_contribs_to_dicts)
16+
from .utils import dedent
1617

1718

18-
class Article():
19+
class Article:
1920
"""The primary object of a PLOS article, initialized by a valid PLOS DOI.
2021
2122
"""
@@ -46,6 +47,62 @@ def __eq__(self, other):
4647
dir_eq = self.directory == other.directory
4748
return doi_eq and dir_eq
4849

50+
def __str__(self, exclude_refs=True):
51+
"""Output when you print an article object on the command line.
52+
53+
For parsing and viewing the XML of a local article. Should not be used for hashing
54+
Excludes <back> element (including references list) for easier viewing
55+
:param exclude_refs: remove references from the article tree (eases print viewing)
56+
"""
57+
parser = et.XMLParser(remove_blank_text=True)
58+
tree = et.parse(self.filename, parser)
59+
if exclude_refs:
60+
root = tree.getroot()
61+
back = tree.xpath('./back')
62+
if back:
63+
root.remove(back[0])
64+
local_xml = et.tostring(tree,
65+
method='xml',
66+
encoding='unicode',
67+
pretty_print=True)
68+
return local_xml
69+
70+
def __repr__(self):
71+
"""Value of an article object when you call it directly on the command line.
72+
73+
Shows the DOI and title of the article
74+
:returns: DOI and title
75+
:rtype: {str}
76+
"""
77+
out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title)
78+
return out
79+
80+
81+
def _repr_html_(self):
82+
"""Nice display for Jupyter notebook"""
83+
84+
titlestyle = 'display:inline-flex;'
85+
titletextstyle = 'margin-left:.5em;'
86+
titlelink = ('<span style="{titlestyle}"><a href="{url}">'
87+
'<em>{title}</em></a></span>').format(
88+
url=self.page,
89+
title=self.title,
90+
titlestyle=titlestyle+titletextstyle,
91+
)
92+
93+
doilink = '<span><a href="{url}"><code>{doi}</code></a></span>'.format(
94+
url=self.doi_link(),
95+
doi=self.doi,
96+
)
97+
out = dedent("""<div>
98+
<span style="{titlestyle}">Title: {titlelink}</span></br>
99+
<span>DOI: <span>{doilink}
100+
</div>
101+
""").format(doilink=doilink, titlelink=titlelink, titlestyle=titlestyle)
102+
103+
return out
104+
105+
49106
def reset_memoized_attrs(self):
50107
"""Reset attributes to None when instantiating a new article object.
51108
@@ -111,34 +168,6 @@ def doi(self, d):
111168
self.reset_memoized_attrs()
112169
self._doi = d
113170

114-
def __str__(self, exclude_refs=True):
115-
"""Output when you print an article object on the command line.
116-
117-
For parsing and viewing the XML of a local article. Should not be used for hashing
118-
Excludes <back> element (including references list) for easier viewing
119-
:param exclude_refs: remove references from the article tree (eases print viewing)
120-
"""
121-
parser = et.XMLParser(remove_blank_text=True)
122-
tree = et.parse(self.filename, parser)
123-
if exclude_refs:
124-
root = tree.getroot()
125-
back = tree.xpath('./back')
126-
root.remove(back[0])
127-
local_xml = et.tostring(tree,
128-
method='xml',
129-
encoding='unicode',
130-
pretty_print=True)
131-
return local_xml
132-
133-
def __repr__(self):
134-
"""Value of an article object when you call it directly on the command line.
135-
136-
Shows the DOI and title of the article
137-
:returns: DOI and title
138-
:rtype: {str}
139-
"""
140-
out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title)
141-
return out
142171

143172
def doi_link(self):
144173
"""The link of the DOI, which redirects to the journal URL."""

allofplos/corpus/plos_corpus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
from ..plos_regex import validate_doi
3939
from ..transformations import (BASE_URL_API, filename_to_doi, doi_to_path, doi_to_url)
40-
from ..article_class import Article
40+
from ..article import Article
4141
from .gdrive import (download_file_from_google_drive, get_zip_metadata, unzip_articles,
4242
ZIP_ID, LOCAL_ZIP, LOCAL_TEST_ZIP, TEST_ZIP_ID, min_files_for_valid_corpus)
4343

allofplos/makedb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from .corpus import Corpus
2121
from .transformations import filename_to_doi, convert_country
2222
from . import starterdir
23-
from .article_class import Article
23+
from .article import Article
2424

2525
journal_title_dict = {
2626
'PLOS ONE': 'PLOS ONE',

allofplos/samples/corpus_analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from ..plos_corpus import (listdir_nohidden, uncorrected_proofs_text_list,
2323
download_updated_xml, get_all_solr_dois,
2424
download_check_and_move)
25-
from ..article_class import Article
25+
from ..article import Article
2626

2727
counter = collections.Counter
2828
pmcdir = "pmc_articles"

allofplos/tests/test_corpus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from . import TESTDATADIR
22
from .. import Corpus, starterdir
3-
from ..article_class import Article
3+
from ..article import Article
44
from ..corpus import listdir_nohidden
55

66
import random

allofplos/utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import textwrap
2+
3+
def dedent(text):
4+
"""Equivalent of textwrap.dedent that ignores unindented first line.
5+
This means it will still dedent strings like:
6+
'''foo
7+
is a bar
8+
'''
9+
For use in wrap_paragraphs.
10+
11+
Taken from https://github.com/ipython/ipython_genutils/text.py
12+
"""
13+
14+
if text.startswith('\n'):
15+
# text starts with blank line, don't ignore the first line
16+
return textwrap.dedent(text)
17+
18+
# split first line
19+
splits = text.split('\n',1)
20+
if len(splits) == 1:
21+
# only one line
22+
return textwrap.dedent(text)
23+
24+
first, rest = splits
25+
# dedent everything but the first line
26+
rest = textwrap.dedent(rest)
27+
return '\n'.join([first, rest])

0 commit comments

Comments
 (0)