SublimeText
diff --git a/‎plugins/bibliography/new_bibliography.py‎
Lines changed: 54 additions & 36 deletions b/‎plugins/bibliography/new_bibliography.py‎
Lines changed: 54 additions & 36 deletions
diff --git a/‎plugins/bibliography/traditional_bibliography.py‎
Lines changed: 79 additions & 59 deletions b/‎plugins/bibliography/traditional_bibliography.py‎
Lines changed: 79 additions & 59 deletions
diff --git a/‎vendor/charset_normalizer/__init__.py‎
Lines changed: 48 additions & 0 deletions b/‎vendor/charset_normalizer/__init__.py‎
Lines changed: 48 additions & 0 deletions
@@ -3,6 +3,8 @@
 import sublime
 import traceback
 
+from ...vendor.charset_normalizer import from_bytes as charset_from_bytes
+
 from ...latextools.latextools_plugin import LaTeXToolsPlugin
 from ...latextools.utils import bibcache
 from ...latextools.utils.logging import logger
@@ -122,46 +124,62 @@ def get_entries(self, *bib_files):
                 pass
 
             try:
-                with open(bibfname, "r", encoding="utf-8", errors="ignore", newline="\n") as bibf:
-                    bib_entries = []
-
-                    excluded_types = ("xdata", "comment", "string")
-                    excluded_fields = (
-                        "abstract",
-                        "annotation",
-                        "annote",
-                        "execute",
-                        "langidopts",
-                        "options",
-                    )
-
-                    for key, entry in parser.parse(bibf.read()).items():
-                        if entry.entry_type in excluded_types:
-                            continue
-
-                        # purge some unnecessary fields from the bib entry to save
-                        # some space and time reloading
-                        for k in excluded_fields:
-                            if k in entry:
-                                del entry[k]
-
-                        bib_entries.append(EntryWrapper(entry))
-
-                    logger.info(f"Loaded {len(bib_entries)} bibitems")
-
-                    try:
-                        fmt_entries = bib_cache.set(bib_entries)
-                        entries.extend(fmt_entries)
-                    except Exception:
-                        traceback.print_exc()
-                        logger.warning("Using bibliography without caching it")
-                        entries.extend(bib_entries)
-
+                with open(bibfname, "rb") as bibf:
+                    content = bibf.read()
             except OSError:
                 msg = f'Cannot open bibliography file "{bibfname}"!'
                 logger.error(msg)
                 sublime.status_message(msg)
-                continue
+            else:
+                bib_entries = []
+
+                excluded_types = ("xdata", "comment", "string")
+                excluded_fields = (
+                    "abstract",
+                    "annotation",
+                    "annote",
+                    "execute",
+                    "langidopts",
+                    "options",
+                )
+
+                # detect encoding
+                charset_match = charset_from_bytes(content).best()
+                if not charset_match:
+                    msg = f'Cannot determine encoding of file "{bibfname}"!'
+                    logger.error(msg)
+                    sublime.status_message(msg)
+                    continue
+                encoding = charset_match.encoding
+                if charset_match.bom and encoding == "utf_8":
+                    content = content[len(codecs.BOM_UTF8):]
+
+                # decode bytes
+                text = content.decode(encoding=encoding)
+                text = text.replace("\r\n", "\n").replace("\r", "\n")
+
+                # parse text
+                for key, entry in parser.parse(text).items():
+                    if entry.entry_type in excluded_types:
+                        continue
+
+                    # purge some unnecessary fields from the bib entry to save
+                    # some space and time reloading
+                    for k in excluded_fields:
+                        if k in entry:
+                            del entry[k]
+
+                    bib_entries.append(EntryWrapper(entry))
+
+                logger.info(f"Loaded {len(bib_entries)} bibitems")
+
+                try:
+                    fmt_entries = bib_cache.set(bib_entries)
+                    entries.extend(fmt_entries)
+                except Exception:
+                    traceback.print_exc()
+                    logger.warning("Using bibliography without caching it")
+                    entries.extend(bib_entries)
 
         logger.info(f"Found {len(entries)} total bib entries")
 
 
@@ -3,6 +3,8 @@
 import sublime
 import traceback
 
+from ...vendor.charset_normalizer import from_bytes as charset_from_bytes
+
 from ...latextools.latextools_plugin import LaTeXToolsPlugin
 from ...latextools.utils import bibcache
 from ...latextools.utils.logging import logger
@@ -53,69 +55,87 @@ def get_entries(self, *bib_files):
                 pass
 
             try:
-                with open(bibfname, "r", encoding="utf-8", errors="ignore", newline="\n") as bibf:
-                    bib_entries = []
-                    entry = {}
-                    for line in bibf.readlines():
-                        line = line.strip()
-                        # Let's get rid of irrelevant lines first
-                        if line == "" or line[0] == "%":
-                            continue
-                        if line.lower()[0:8] == "@comment":
-                            continue
-                        if line.lower()[0:7] == "@string":
-                            continue
-                        if line.lower()[0:9] == "@preamble":
-                            continue
-                        if line[0] == "@":
-                            if "keyword" in entry:
-                                bib_entries.append(entry)
-                                entry = {}
-
-                            kp_match = kp.search(line)
-                            if kp_match:
-                                entry["keyword"] = kp_match.group(1)
-                            else:
-                                logger.error(f"Cannot process this @ line: {line}")
-                                logger.error(
-                                    "Previous keyword (if any): " + entry.get("keyword", ""),
-                                )
-                            continue
-
-                        # Now test for title, author, etc.
-                        # Note: we capture only the first line, but that's OK for our purposes
-                        multip_match = multip.search(line)
-                        if multip_match:
-                            key = multip_match.group(1).lower()
-                            value = codecs.decode(multip_match.group(2), "latex")
-
-                            if key == "title":
-                                value = (
-                                    value.replace("{\\textquoteright}", "")
-                                    .replace("{", "")
-                                    .replace("}", "")
-                                )
-                            entry[key] = value
-
-                    # at the end, we have a single record
-                    if "keyword" in entry:
-                        bib_entries.append(entry)
-
-                    logger.info(f"Loaded {len(bib_entries)} bibitems")
-
-                    try:
-                        fmt_entries = bib_cache.set(bib_entries)
-                        entries.extend(fmt_entries)
-                    except Exception:
-                        traceback.print_exc()
-                        logger.warning("Using bibliography without caching it")
-                        entries.extend(bib_entries)
-
+                with open(bibfname, "rb") as bibf:
+                    content = bibf.read()
             except OSError:
                 msg = f'Cannot open bibliography file "{bibfname}"!'
                 logger.error(msg)
                 sublime.status_message(msg)
-                continue
+            else:
+                bib_entries = []
+                entry = {}
+
+                # detect encoding
+                charset_match = charset_from_bytes(content).best()
+                if not charset_match:
+                    msg = f'Cannot determine encoding of file "{bibfname}"!'
+                    logger.error(msg)
+                    sublime.status_message(msg)
+                    continue
+                encoding = charset_match.encoding
+                if charset_match.bom and encoding == "utf_8":
+                    content = content[len(codecs.BOM_UTF8):]
+
+                # decode bytes
+                text = content.decode(encoding=encoding)
+                text = text.replace("\r\n", "\n").replace("\r", "\n")
+
+                # parse text
+                for line in text.splitlines():
+                    line = line.strip()
+                    # Let's get rid of irrelevant lines first
+                    if line == "" or line[0] == "%":
+                        continue
+                    if line.lower()[0:8] == "@comment":
+                        continue
+                    if line.lower()[0:7] == "@string":
+                        continue
+                    if line.lower()[0:9] == "@preamble":
+                        continue
+                    if line[0] == "@":
+                        if "keyword" in entry:
+                            bib_entries.append(entry)
+                            entry = {}
+
+                        kp_match = kp.search(line)
+                        if kp_match:
+                            entry["keyword"] = kp_match.group(1)
+                        else:
+                            logger.error(f"Cannot process this @ line: {line}")
+                            logger.error(
+                                "Previous keyword (if any): " + entry.get("keyword", ""),
+                            )
+                        continue
+
+                    # Now test for title, author, etc.
+                    # Note: we capture only the first line, but that's OK for our purposes
+                    multip_match = multip.search(line)
+                    if multip_match:
+                        key = multip_match.group(1).lower()
+                        value = codecs.decode(multip_match.group(2), "latex")
+
+                        if key == "title":
+                            value = (
+                                value.replace("{\\textquoteright}", "")
+                                .replace("{", "")
+                                .replace("}", "")
+                            )
+                        entry[key] = value
+
+                # at the end, we have a single record
+                if "keyword" in entry:
+                    bib_entries.append(entry)
+
+                logger.info(f"Loaded {len(bib_entries)} bibitems")
+
+                try:
+                    fmt_entries = bib_cache.set(bib_entries)
+                    entries.extend(fmt_entries)
+                except Exception:
+                    traceback.print_exc()
+                    logger.warning("Using bibliography without caching it")
+                    entries.extend(bib_entries)
+
 
         logger.info(f"Found {len(entries)} total bib entries")
 
 
@@ -0,0 +1,48 @@
+"""
+Charset-Normalizer
+~~~~~~~~~~~~~~
+The Real First Universal Charset Detector.
+A library that helps you read text from an unknown charset encoding.
+Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
+All IANA character set names for which the Python core library provides codecs are supported.
+
+Basic usage:
+   >>> from charset_normalizer import from_bytes
+   >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
+   >>> best_guess = results.best()
+   >>> str(best_guess)
+   'Bсеки човек има право на образование. Oбразованието!'
+
+Others methods and usages are available - see the full documentation
+at <https://github.com/Ousret/charset_normalizer>.
+:copyright: (c) 2021 by Ahmed TAHRI
+:license: MIT, see LICENSE for more details.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from .api import from_bytes, from_fp, from_path, is_binary
+from .legacy import detect
+from .models import CharsetMatch, CharsetMatches
+from .utils import set_logging_handler
+from .version import VERSION, __version__
+
+__all__ = (
+    "from_fp",
+    "from_path",
+    "from_bytes",
+    "is_binary",
+    "detect",
+    "CharsetMatch",
+    "CharsetMatches",
+    "__version__",
+    "VERSION",
+    "set_logging_handler",
+)
+
+# Attach a NullHandler to the top level logger by default
+# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
+
+logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())