|
9 | 9 |
|
10 | 10 | from . import get_corpus_dir
|
11 | 11 | from .transformations import (filename_to_doi, _get_base_page, LANDING_PAGE_SUFFIX,
|
12 |
| - URL_SUFFIX, plos_page_dict, doi_url) |
| 12 | + URL_SUFFIX, plos_page_dict, doi_url, doi_to_url, doi_to_path) |
13 | 13 | from .plos_regex import validate_doi
|
14 | 14 | from .elements import (parse_article_date, get_contrib_info,
|
15 | 15 | Journal, License, match_contribs_to_dicts)
|
| 16 | +from .utils import dedent |
16 | 17 |
|
17 | 18 |
|
18 |
| -class Article(): |
| 19 | +class Article: |
19 | 20 | """The primary object of a PLOS article, initialized by a valid PLOS DOI.
|
20 | 21 |
|
21 | 22 | """
|
@@ -46,6 +47,62 @@ def __eq__(self, other):
|
46 | 47 | dir_eq = self.directory == other.directory
|
47 | 48 | return doi_eq and dir_eq
|
48 | 49 |
|
| 50 | + def __str__(self, exclude_refs=True): |
| 51 | + """Output when you print an article object on the command line. |
| 52 | +
|
| 53 | + For parsing and viewing the XML of a local article. Should not be used for hashing |
| 54 | + Excludes <back> element (including references list) for easier viewing |
| 55 | + :param exclude_refs: remove references from the article tree (eases print viewing) |
| 56 | + """ |
| 57 | + parser = et.XMLParser(remove_blank_text=True) |
| 58 | + tree = et.parse(self.filename, parser) |
| 59 | + if exclude_refs: |
| 60 | + root = tree.getroot() |
| 61 | + back = tree.xpath('./back') |
| 62 | + if back: |
| 63 | + root.remove(back[0]) |
| 64 | + local_xml = et.tostring(tree, |
| 65 | + method='xml', |
| 66 | + encoding='unicode', |
| 67 | + pretty_print=True) |
| 68 | + return local_xml |
| 69 | + |
| 70 | + def __repr__(self): |
| 71 | + """Value of an article object when you call it directly on the command line. |
| 72 | +
|
| 73 | + Shows the DOI and title of the article |
| 74 | + :returns: DOI and title |
| 75 | + :rtype: {str} |
| 76 | + """ |
| 77 | + out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title) |
| 78 | + return out |
| 79 | + |
| 80 | + |
| 81 | + def _repr_html_(self): |
| 82 | + """Nice display for Jupyter notebook""" |
| 83 | + |
| 84 | + titlestyle = 'display:inline-flex;' |
| 85 | + titletextstyle = 'margin-left:.5em;' |
| 86 | + titlelink = ('<span style="{titlestyle}"><a href="{url}">' |
| 87 | + '<em>{title}</em></a></span>').format( |
| 88 | + url=self.page, |
| 89 | + title=self.title, |
| 90 | + titlestyle=titlestyle+titletextstyle, |
| 91 | + ) |
| 92 | + |
| 93 | + doilink = '<span><a href="{url}"><code>{doi}</code></a></span>'.format( |
| 94 | + url=self.doi_link(), |
| 95 | + doi=self.doi, |
| 96 | + ) |
| 97 | + out = dedent("""<div> |
| 98 | + <span style="{titlestyle}">Title: {titlelink}</span></br> |
| 99 | + <span>DOI: <span>{doilink} |
| 100 | + </div> |
| 101 | + """).format(doilink=doilink, titlelink=titlelink, titlestyle=titlestyle) |
| 102 | + |
| 103 | + return out |
| 104 | + |
| 105 | + |
49 | 106 | def reset_memoized_attrs(self):
|
50 | 107 | """Reset attributes to None when instantiating a new article object.
|
51 | 108 |
|
@@ -111,34 +168,6 @@ def doi(self, d):
|
111 | 168 | self.reset_memoized_attrs()
|
112 | 169 | self._doi = d
|
113 | 170 |
|
114 |
| - def __str__(self, exclude_refs=True): |
115 |
| - """Output when you print an article object on the command line. |
116 |
| -
|
117 |
| - For parsing and viewing the XML of a local article. Should not be used for hashing |
118 |
| - Excludes <back> element (including references list) for easier viewing |
119 |
| - :param exclude_refs: remove references from the article tree (eases print viewing) |
120 |
| - """ |
121 |
| - parser = et.XMLParser(remove_blank_text=True) |
122 |
| - tree = et.parse(self.filename, parser) |
123 |
| - if exclude_refs: |
124 |
| - root = tree.getroot() |
125 |
| - back = tree.xpath('./back') |
126 |
| - root.remove(back[0]) |
127 |
| - local_xml = et.tostring(tree, |
128 |
| - method='xml', |
129 |
| - encoding='unicode', |
130 |
| - pretty_print=True) |
131 |
| - return local_xml |
132 |
| - |
133 |
| - def __repr__(self): |
134 |
| - """Value of an article object when you call it directly on the command line. |
135 |
| -
|
136 |
| - Shows the DOI and title of the article |
137 |
| - :returns: DOI and title |
138 |
| - :rtype: {str} |
139 |
| - """ |
140 |
| - out = "DOI: {0}\nTitle: {1}".format(self.doi, self.title) |
141 |
| - return out |
142 | 171 |
|
143 | 172 | def doi_link(self):
|
144 | 173 | """The link of the DOI, which redirects to the journal URL."""
|
|
0 commit comments