|
3 | 3 | import sublime |
4 | 4 | import traceback |
5 | 5 |
|
| 6 | +from ...vendor.charset_normalizer import from_bytes as charset_from_bytes |
| 7 | + |
6 | 8 | from ...latextools.latextools_plugin import LaTeXToolsPlugin |
7 | 9 | from ...latextools.utils import bibcache |
8 | 10 | from ...latextools.utils.logging import logger |
@@ -53,69 +55,87 @@ def get_entries(self, *bib_files): |
53 | 55 | pass |
54 | 56 |
|
55 | 57 | try: |
56 | | - with open(bibfname, "r", encoding="utf-8", errors="ignore", newline="\n") as bibf: |
57 | | - bib_entries = [] |
58 | | - entry = {} |
59 | | - for line in bibf.readlines(): |
60 | | - line = line.strip() |
61 | | - # Let's get rid of irrelevant lines first |
62 | | - if line == "" or line[0] == "%": |
63 | | - continue |
64 | | - if line.lower()[0:8] == "@comment": |
65 | | - continue |
66 | | - if line.lower()[0:7] == "@string": |
67 | | - continue |
68 | | - if line.lower()[0:9] == "@preamble": |
69 | | - continue |
70 | | - if line[0] == "@": |
71 | | - if "keyword" in entry: |
72 | | - bib_entries.append(entry) |
73 | | - entry = {} |
74 | | - |
75 | | - kp_match = kp.search(line) |
76 | | - if kp_match: |
77 | | - entry["keyword"] = kp_match.group(1) |
78 | | - else: |
79 | | - logger.error(f"Cannot process this @ line: {line}") |
80 | | - logger.error( |
81 | | - "Previous keyword (if any): " + entry.get("keyword", ""), |
82 | | - ) |
83 | | - continue |
84 | | - |
85 | | - # Now test for title, author, etc. |
86 | | - # Note: we capture only the first line, but that's OK for our purposes |
87 | | - multip_match = multip.search(line) |
88 | | - if multip_match: |
89 | | - key = multip_match.group(1).lower() |
90 | | - value = codecs.decode(multip_match.group(2), "latex") |
91 | | - |
92 | | - if key == "title": |
93 | | - value = ( |
94 | | - value.replace("{\\textquoteright}", "") |
95 | | - .replace("{", "") |
96 | | - .replace("}", "") |
97 | | - ) |
98 | | - entry[key] = value |
99 | | - |
100 | | - # at the end, we have a single record |
101 | | - if "keyword" in entry: |
102 | | - bib_entries.append(entry) |
103 | | - |
104 | | - logger.info(f"Loaded {len(bib_entries)} bibitems") |
105 | | - |
106 | | - try: |
107 | | - fmt_entries = bib_cache.set(bib_entries) |
108 | | - entries.extend(fmt_entries) |
109 | | - except Exception: |
110 | | - traceback.print_exc() |
111 | | - logger.warning("Using bibliography without caching it") |
112 | | - entries.extend(bib_entries) |
113 | | - |
| 58 | + with open(bibfname, "rb") as bibf: |
| 59 | + content = bibf.read() |
114 | 60 | except OSError: |
115 | 61 | msg = f'Cannot open bibliography file "{bibfname}"!' |
116 | 62 | logger.error(msg) |
117 | 63 | sublime.status_message(msg) |
118 | | - continue |
| 64 | + else: |
| 65 | + bib_entries = [] |
| 66 | + entry = {} |
| 67 | + |
| 68 | + # detect encoding |
| 69 | + charset_match = charset_from_bytes(content).best() |
| 70 | + if not charset_match: |
| 71 | + msg = f'Cannot determine encoding of file "{bibfname}"!' |
| 72 | + logger.error(msg) |
| 73 | + sublime.status_message(msg) |
| 74 | + continue |
| 75 | + encoding = charset_match.encoding |
| 76 | + if charset_match.bom and encoding == "utf_8": |
| 77 | + content = content[len(codecs.BOM_UTF8):] |
| 78 | + |
| 79 | + # decode bytes |
| 80 | + text = content.decode(encoding=encoding) |
| 81 | + text = text.replace("\r\n", "\n").replace("\r", "\n") |
| 82 | + |
| 83 | + # parse text |
| 84 | + for line in text.splitlines(): |
| 85 | + line = line.strip() |
| 86 | + # Let's get rid of irrelevant lines first |
| 87 | + if line == "" or line[0] == "%": |
| 88 | + continue |
| 89 | + if line.lower()[0:8] == "@comment": |
| 90 | + continue |
| 91 | + if line.lower()[0:7] == "@string": |
| 92 | + continue |
| 93 | + if line.lower()[0:9] == "@preamble": |
| 94 | + continue |
| 95 | + if line[0] == "@": |
| 96 | + if "keyword" in entry: |
| 97 | + bib_entries.append(entry) |
| 98 | + entry = {} |
| 99 | + |
| 100 | + kp_match = kp.search(line) |
| 101 | + if kp_match: |
| 102 | + entry["keyword"] = kp_match.group(1) |
| 103 | + else: |
| 104 | + logger.error(f"Cannot process this @ line: {line}") |
| 105 | + logger.error( |
| 106 | + "Previous keyword (if any): " + entry.get("keyword", ""), |
| 107 | + ) |
| 108 | + continue |
| 109 | + |
| 110 | + # Now test for title, author, etc. |
| 111 | + # Note: we capture only the first line, but that's OK for our purposes |
| 112 | + multip_match = multip.search(line) |
| 113 | + if multip_match: |
| 114 | + key = multip_match.group(1).lower() |
| 115 | + value = codecs.decode(multip_match.group(2), "latex") |
| 116 | + |
| 117 | + if key == "title": |
| 118 | + value = ( |
| 119 | + value.replace("{\\textquoteright}", "") |
| 120 | + .replace("{", "") |
| 121 | + .replace("}", "") |
| 122 | + ) |
| 123 | + entry[key] = value |
| 124 | + |
| 125 | + # at the end, we have a single record |
| 126 | + if "keyword" in entry: |
| 127 | + bib_entries.append(entry) |
| 128 | + |
| 129 | + logger.info(f"Loaded {len(bib_entries)} bibitems") |
| 130 | + |
| 131 | + try: |
| 132 | + fmt_entries = bib_cache.set(bib_entries) |
| 133 | + entries.extend(fmt_entries) |
| 134 | + except Exception: |
| 135 | + traceback.print_exc() |
| 136 | + logger.warning("Using bibliography without caching it") |
| 137 | + entries.extend(bib_entries) |
| 138 | + |
119 | 139 |
|
120 | 140 | logger.info(f"Found {len(entries)} total bib entries") |
121 | 141 |
|
|
0 commit comments