Skip to content

Commit fc18980

Browse files
committed
⚡️ refactor(exceptions): Rename exceptions for clarity and update error handling in detection methods
1 parent cfe3e1a commit fc18980

File tree

5 files changed

+93
-77
lines changed

5 files changed

+93
-77
lines changed

README.md

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,18 @@
1919
2020
> ### Memory note
2121
>
22-
> The lite model runs offline and is memory-friendly; the full model is larger and offers higher accuracy. Choose the model that best fits your constraints.
22+
> The lite model runs offline and is memory-friendly; the full model is larger and offers higher accuracy.
23+
>
24+
> Approximate memory usage (RSS after load):
25+
> - Lite: ~45–60 MB
26+
> - Full: ~170–210 MB
27+
> - Auto: tries full first, falls back to lite only on MemoryError.
28+
>
29+
> Notes:
30+
> - Measurements vary by Python version, OS, allocator, and import graph; treat these as practical ranges.
31+
> - Validate on your system if constrained; see `examples/memory_usage_check.py` (credit: script by github@JackyHe398).
32+
>
33+
> Choose the model that best fits your constraints.
2334
2435
## Installation 💻
2536

@@ -75,30 +86,35 @@ from fast_langdetect import LangDetectConfig, detect
7586

7687
cfg = LangDetectConfig(cache_dir="/custom/cache/path")
7788
print(detect("Hello", model='full', config=cfg))
89+
90+
# Set a default model via config and let calls omit model
91+
cfg_lite = LangDetectConfig(model="lite")
92+
print(detect("Hello", config=cfg_lite)) # uses lite by default
93+
print(detect("Bonjour", config=cfg_lite)) # uses lite by default
94+
print(detect("Hello", model='full', config=cfg_lite)) # per-call override to full
95+
7896
```
7997

8098
### Native API (Recommended)
8199

82100
```python
83-
from fast_langdetect import detect, LangDetector, LangDetectConfig, DetectError
101+
from fast_langdetect import detect, LangDetector, LangDetectConfig
84102

85-
# Simple detection (auto behavior)
86-
print(detect("Hello, world!", model='auto', k=1))
103+
# Simple detection (uses config default if not provided; defaults to 'auto')
104+
print(detect("Hello, world!", k=1))
87105
# Output: [{'lang': 'en', 'score': 0.98}]
88106

89107
# Using full model for better accuracy
90108
print(detect("Hello, world!", model='full', k=1))
91109
# Output: [{'lang': 'en', 'score': 0.99}]
92110

93111
# Custom configuration
94-
config = LangDetectConfig(cache_dir="/custom/cache/path") # Custom model cache directory
112+
config = LangDetectConfig(cache_dir="/custom/cache/path", model="auto") # Custom cache + default model
95113
detector = LangDetector(config)
96114

97-
try:
98-
result = detector.detect("Hello world", model='full', k=1)
99-
print(result) # [{'lang': 'en', 'score': 0.98}]
100-
except DetectError as e:
101-
print(f"Detection failed: {e}")
115+
# Omit model to use config.model; pass model to override
116+
result = detector.detect("Hello world", k=1)
117+
print(result) # [{'lang': 'en', 'score': 0.98}]
102118

103119
# Multiline text is handled automatically (newlines are replaced)
104120
multiline_text = "Hello, world!\nThis is a multiline text."
@@ -121,10 +137,16 @@ print(results)
121137

122138
#### Fallback Policy (Keep It Simple)
123139

124-
- Only MemoryError triggers fallback (in `model='auto'`): when loading the full model runs out of memory, it falls back to the lite model.
125-
- I/O/network/permission/path/integrity errors raise `DetectError` (with original exception) — no silent fallback.
140+
- Only `MemoryError` triggers fallback (in `model='auto'`): when loading the full model runs out of memory, it falls back to the lite model.
141+
- I/O/network/permission/path/integrity errors raise standard exceptions (e.g., `FileNotFoundError`, `PermissionError`) or library-specific errors where applicable — no silent fallback.
126142
- `model='lite'` and `model='full'` never fallback by design.
127143

144+
#### Errors
145+
146+
- Base error: `FastLangdetectError` (library-specific failures).
147+
- Model loading failures: `ModelLoadError`.
148+
- Standard Python exceptions (e.g., `ValueError`, `TypeError`, `FileNotFoundError`, `MemoryError`) propagate when they are not library-specific.
149+
128150
### Convenient `detect_language` Function
129151

130152
```python
@@ -177,7 +199,7 @@ print(detector.detect("Some very long text..."))
177199
### Cache Directory Behavior
178200

179201
- Default cache: if `cache_dir` is not set, models are stored under a system temp-based directory specified by `FTLANG_CACHE` or an internal default. This directory is created automatically when needed.
180-
- User-provided cache_dir: if you set `LangDetectConfig(cache_dir=...)` to a path that does not exist, the library raises `DetectError` instead of silently creating or using another location. Create the directory yourself if that’s intended.
202+
- User-provided cache_dir: if you set `LangDetectConfig(cache_dir=...)` to a path that does not exist, the library raises `FileNotFoundError` instead of silently creating or using another location. Create the directory yourself if that’s intended.
181203

182204
### Advanced Options (Optional)
183205

feature_test/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
detect_language,
77
LangDetector,
88
LangDetectConfig,
9-
DetectError,
109
)
1110

1211
# 多语言候选(使用 full 模型,返回前 5 个候选)
@@ -28,7 +27,7 @@
2827
)
2928
)
3029

31-
# 当离线或无网络时,使用 full 模型会抛出 DetectError;lite 模型离线可用
30+
# 当离线或无网络时,使用 full 模型可能抛出标准 I/O/网络异常或库内异常;lite 模型离线可用
3231
try:
3332
print(
3433
detect(
@@ -38,7 +37,7 @@
3837
config=LangDetectConfig(),
3938
)
4039
)
41-
except DetectError as e:
40+
except Exception as e:
4241
print(f"Detection failed: {e}")
4342

4443
# 使用自定义配置与实例化 Detector

src/fast_langdetect/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
# -*- coding: utf-8 -*-
22
# @Time : 2024/1/17 下午4:00
33

4-
from .infer import LangDetector, LangDetectConfig, DetectError # noqa: F401
5-
from .infer import detect
4+
from .infer import (
5+
LangDetector,
6+
LangDetectConfig,
7+
detect,
8+
FastLangdetectError,
9+
ModelLoadError,
10+
) # noqa: F401
611

712

813
def is_japanese(string):

src/fast_langdetect/infer.py

Lines changed: 48 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@
2727
_LOCAL_SMALL_MODEL_PATH = Path(__file__).parent / "resources" / "lid.176.ftz"
2828

2929

30-
class DetectError(Exception):
31-
"""Base exception for language detection errors."""
30+
class FastLangdetectError(Exception):
31+
"""Base exception for library-specific failures."""
32+
pass
33+
3234

35+
class ModelLoadError(FastLangdetectError):
36+
"""Raised when a FastText model fails to load."""
3337
pass
3438

3539

@@ -46,7 +50,8 @@ def download(url: str, save_path: Path, proxy: Optional[str] = None) -> None:
4650
:param proxy: Optional proxy URL
4751
4852
:raises:
49-
DetectError: If download fails
53+
FastLangdetectError: If download fails
54+
FileNotFoundError: If a user-provided cache directory does not exist
5055
"""
5156
if save_path.exists():
5257
logger.info(f"fast-langdetect: Model exists at {save_path}")
@@ -62,10 +67,12 @@ def download(url: str, save_path: Path, proxy: Optional[str] = None) -> None:
6267
try:
6368
parent_dir.mkdir(parents=True, exist_ok=True)
6469
except Exception as e:
65-
raise DetectError(f"fast-langdetect: Cannot create cache directory {parent_dir}: {e}") from e
70+
raise FastLangdetectError(
71+
f"fast-langdetect: Cannot create cache directory {parent_dir}: {e}"
72+
) from e
6673
else:
6774
# For user-specified cache_dir, do not fallback; raise
68-
raise DetectError(f"fast-langdetect: Cache directory not found: {parent_dir}")
75+
raise FileNotFoundError(f"fast-langdetect: Cache directory not found: {parent_dir}")
6976
try:
7077
download(
7178
url=url,
@@ -77,7 +84,8 @@ def download(url: str, save_path: Path, proxy: Optional[str] = None) -> None:
7784
timeout=7,
7885
)
7986
except Exception as e:
80-
raise DetectError(f"fast-langdetect: Download failed: {e}") from e
87+
# Download failures are library-specific
88+
raise FastLangdetectError(f"fast-langdetect: Download failed: {e}") from e
8189

8290

8391
class ModelLoader:
@@ -89,7 +97,8 @@ def __init__(self):
8997
def load_local(self, model_path: Path) -> Any:
9098
"""Load model from local file."""
9199
if not model_path.exists():
92-
raise DetectError(f"Model file not found: {model_path}")
100+
# Missing path is a standard I/O error
101+
raise FileNotFoundError(f"Model file not found: {model_path}")
93102

94103
if platform.system() == "Windows":
95104
return self._load_windows_compatible(model_path)
@@ -112,7 +121,7 @@ def _load_windows_compatible(self, model_path: Path) -> Any:
112121
113122
:param model_path: Path to the model file
114123
:return: Loaded FastText model
115-
:raises DetectError: If all loading strategies fail
124+
:raises ModelLoadError: If all loading strategies fail
116125
"""
117126
model_path_str = str(model_path.resolve())
118127

@@ -142,7 +151,7 @@ def _load_windows_compatible(self, model_path: Path) -> Any:
142151
shutil.copy2(model_path, tmp_path)
143152
return fasttext.load_model(tmp_path)
144153
except Exception as e:
145-
raise DetectError(f"Failed to load model using temporary file: {e}") from e
154+
raise ModelLoadError(f"Failed to load model using temporary file: {e}") from e
146155
finally:
147156
# Clean up temporary file
148157
if tmp_path and os.path.exists(tmp_path):
@@ -166,7 +175,7 @@ def _load_unix(self, model_path: Path) -> Any:
166175
# Let MemoryError propagate up to be handled by _get_model
167176
raise e
168177
except Exception as e:
169-
raise DetectError(f"fast-langdetect: Failed to load model: {e}") from e
178+
raise ModelLoadError(f"fast-langdetect: Failed to load model: {e}") from e
170179

171180

172181
class LangDetectConfig:
@@ -178,6 +187,7 @@ class LangDetectConfig:
178187
:param proxy: HTTP proxy for downloads
179188
:param normalize_input: Whether to normalize input text (e.g. lowercase for uppercase text)
180189
:param max_input_length: If set, truncate input to this many characters (always debug-log the change)
190+
:param model: Default model selection ('auto' | 'full' | 'lite') used when detect() is called without a model
181191
"""
182192

183193
def __init__(
@@ -187,13 +197,15 @@ def __init__(
187197
proxy: Optional[str] = None,
188198
normalize_input: bool = True,
189199
max_input_length: Optional[int] = 80,
200+
model: Literal["lite", "full", "auto"] = "auto",
190201
):
191202
self.cache_dir = cache_dir or CACHE_DIRECTORY
192203
self.custom_model_path = custom_model_path
193204
self.proxy = proxy
194205
self.normalize_input = normalize_input
195206
# Input handling
196207
self.max_input_length = max_input_length
208+
self.model: Literal["lite", "full", "auto"] = model
197209
if self.custom_model_path and not Path(self.custom_model_path).exists():
198210
raise FileNotFoundError(f"fast-langdetect: Target model file not found: {self.custom_model_path}")
199211

@@ -285,41 +297,17 @@ def _get_model(self, low_memory: bool = True, *, fallback_on_memory_error: bool
285297
self._models[cache_key] = model
286298
return model
287299
except MemoryError as e:
288-
if low_memory is not True and fallback_on_memory_error:
300+
if (not low_memory) and fallback_on_memory_error:
289301
logger.info("fast-langdetect: Falling back to low-memory model...")
290302
return self._get_model(low_memory=True, fallback_on_memory_error=False)
291-
raise DetectError("Failed to load model") from e
292-
293-
def detect(self, text: str) -> Dict[str, Union[str, float]]:
294-
"""
295-
Detect primary language of text.
296-
297-
:param text: Input text
298-
299-
:return: Dictionary with language and confidence score
300-
301-
:raises:
302-
DetectError: If detection fails
303-
"""
304-
# Default to smart behavior: try large, fallback on MemoryError
305-
model = self._get_model(low_memory=False, fallback_on_memory_error=True)
306-
text = self._preprocess_text(text)
307-
normalized_text = self._normalize_text(text, self.config.normalize_input)
308-
try:
309-
labels, scores = model.predict(normalized_text)
310-
return {
311-
"lang": labels[0].replace("__label__", ""),
312-
"score": min(float(scores[0]), 1.0),
313-
}
314-
except Exception as e:
315-
logger.error(f"fast-langdetect: Language detection error: {e}")
316-
raise DetectError("Language detection failed") from e
303+
# Preserve original MemoryError and traceback
304+
raise
317305

318306
def detect(
319307
self,
320308
text: str,
321309
*,
322-
model: Literal["lite", "full", "auto"] = "auto",
310+
model: Optional[Literal["lite", "full", "auto"]] = None,
323311
k: int = 1,
324312
threshold: float = 0.0,
325313
) -> List[Dict[str, Any]]:
@@ -330,34 +318,37 @@ def detect(
330318
:param model: 'lite' | 'full' | 'auto' (auto falls back on MemoryError)
331319
:param k: Number of top languages to return
332320
:param threshold: Minimum confidence threshold
333-
:raises DetectError: On detection failures
321+
:raises FastLangdetectError: For library-specific failures (e.g., invalid model)
322+
:raises Exception: Standard Python exceptions propagate, such as MemoryError, FileNotFoundError
334323
"""
335-
if model not in {"lite", "full", "auto"}:
336-
raise DetectError(f"Invalid model: {model}")
324+
# Determine model to use (config default if not provided)
325+
sel_model: Literal["lite", "full", "auto"]
326+
if model is None:
327+
sel_model = self.config.model
328+
else:
329+
if model not in {"lite", "full", "auto"}: # type: ignore[comparison-overlap]
330+
raise FastLangdetectError(f"Invalid model: {model}")
331+
sel_model = model
337332

338333
# Select model backend
339-
if model == "lite":
334+
if sel_model == "lite":
340335
ft_model = self._get_model(low_memory=True, fallback_on_memory_error=False)
341-
elif model == "full":
336+
elif sel_model == "full":
342337
ft_model = self._get_model(low_memory=False, fallback_on_memory_error=False)
343338
else:
344339
ft_model = self._get_model(low_memory=False, fallback_on_memory_error=True)
345340

346341
text = self._preprocess_text(text)
347342
normalized_text = self._normalize_text(text, self.config.normalize_input)
348-
try:
349-
labels, scores = ft_model.predict(normalized_text, k=k, threshold=threshold)
350-
results = [
351-
{
352-
"lang": label.replace("__label__", ""),
353-
"score": min(float(score), 1.0),
354-
}
355-
for label, score in zip(labels, scores)
356-
]
357-
return sorted(results, key=lambda x: x["score"], reverse=True)
358-
except Exception as e:
359-
logger.error(f"fast-langdetect: Detection error: {e}")
360-
raise DetectError("Language detection failed") from e
343+
labels, scores = ft_model.predict(normalized_text, k=k, threshold=threshold)
344+
results = [
345+
{
346+
"lang": label.replace("__label__", ""),
347+
"score": min(float(score), 1.0),
348+
}
349+
for label, score in zip(labels, scores)
350+
]
351+
return sorted(results, key=lambda x: x["score"], reverse=True)
361352

362353

363354
# Global instance for simple usage
@@ -367,7 +358,7 @@ def detect(
367358
def detect(
368359
text: str,
369360
*,
370-
model: Literal["lite", "full", "auto"] = "auto",
361+
model: Optional[Literal["lite", "full", "auto"]] = None,
371362
k: int = 1,
372363
threshold: float = 0.0,
373364
config: Optional[LangDetectConfig] = None,

tests/test_real_detection.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
detect,
66
LangDetector,
77
LangDetectConfig,
8-
DetectError,
98
)
109

1110
# Test samples with known languages
@@ -96,7 +95,7 @@ def test_not_found_model_without_fallback_on_io_error(self):
9695
cache_dir="/nonexistent/path",
9796
)
9897
detector = LangDetector(config)
99-
with pytest.raises(DetectError):
98+
with pytest.raises(FileNotFoundError):
10099
detector.detect("Hello world", model="full", k=1)
101100

102101
@pytest.mark.real

0 commit comments

Comments
 (0)