Skip to content

Commit 197ed22

Browse files
committed
🆕 feat(examples): add memory usage check script for fast-langdetect models
1 parent 34e852a commit 197ed22

File tree

1 file changed

+130
-0
lines changed

1 file changed

+130
-0
lines changed

examples/memory_usage_check.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Measure memory behavior when loading fast-langdetect models.
4+
5+
Credit: script prepared by github@JackyHe398 (adapted for examples/).
6+
7+
Examples
8+
9+
# Check lite model without limiting memory
10+
python examples/memory_usage_check.py --model lite
11+
12+
# Check full model with a 200 MB limit (should pass on many systems)
13+
python examples/memory_usage_check.py --model full --limit-mb 200
14+
15+
# Force fallback or failure by using a tight limit
16+
python examples/memory_usage_check.py --model full --limit-mb 100
17+
18+
Notes
19+
- RSS measurement uses ru_maxrss which is OS-dependent (kB on Linux, bytes on macOS).
20+
- Address space limits rely on resource.RLIMIT_AS (primarily effective on Unix-like systems).
21+
- For accurate results, run this script from a clean terminal session. Running inside IDEs/REPLs can inflate the
22+
process peak RSS before the script runs, making ru_maxrss appear very large with ~0 delta.
23+
"""
24+
25+
import argparse
26+
import os
27+
import sys
28+
import time
29+
import platform
30+
import resource
31+
from typing import Optional
32+
33+
try:
34+
from fast_langdetect import detect
35+
except Exception: # pragma: no cover
36+
# Support running from repo root without installation
37+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
38+
from fast_langdetect import detect # type: ignore
39+
40+
41+
def set_address_space_limit(limit_mb: int | None) -> None:
42+
if limit_mb is None:
43+
return
44+
limit_bytes = int(limit_mb) * 1024 * 1024
45+
resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
46+
47+
48+
def format_ru_maxrss_mb(val: int) -> float:
49+
"""Convert ru_maxrss to MB based on OS semantics.
50+
51+
- Linux: ru_maxrss is in kilobytes
52+
- macOS (Darwin): ru_maxrss is in bytes
53+
- BSDs often follow macOS/bytes; treat non-Linux as bytes by default
54+
"""
55+
system = platform.system()
56+
if system == "Linux":
57+
return val / 1024.0
58+
# Darwin, FreeBSD, etc.: assume bytes
59+
return val / (1024.0 * 1024.0)
60+
61+
62+
def current_rss_mb() -> Optional[float]:
63+
"""Return current RSS in MB if available; otherwise None.
64+
65+
Priority:
66+
1) psutil (if installed)
67+
2) /proc/self/status (Linux)
68+
"""
69+
try:
70+
import psutil # type: ignore
71+
72+
p = psutil.Process()
73+
return p.memory_info().rss / (1024.0 * 1024.0)
74+
except Exception:
75+
pass
76+
77+
if platform.system() == "Linux":
78+
try:
79+
with open("/proc/self/status", "r") as f:
80+
for line in f:
81+
if line.startswith("VmRSS:"):
82+
parts = line.split()
83+
# Example: VmRSS: 123456 kB
84+
if len(parts) >= 2:
85+
kb = float(parts[1])
86+
return kb / 1024.0
87+
except Exception:
88+
pass
89+
return None
90+
91+
92+
def main() -> int:
93+
parser = argparse.ArgumentParser(description="Check fast-langdetect memory usage and limits.")
94+
parser.add_argument("--model", choices=["lite", "full", "auto"], default="auto")
95+
parser.add_argument("--limit-mb", type=int, default=None, help="Set RLIMIT_AS in MB (Unix-like only)")
96+
parser.add_argument("--text", default="Hello world", help="Text to detect")
97+
parser.add_argument("--k", type=int, default=1, help="Top-k predictions")
98+
args = parser.parse_args()
99+
100+
set_address_space_limit(args.limit_mb)
101+
102+
print(f"Model: {args.model}")
103+
if args.limit_mb is not None:
104+
print(f"Address space limit: {args.limit_mb} MB")
105+
106+
peak_before = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
107+
curr_before = current_rss_mb()
108+
try:
109+
res = detect(args.text, model=args.model, k=args.k)
110+
except MemoryError:
111+
print("MemoryError: model load or inference exceeded limit.")
112+
return 2
113+
peak_after = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
114+
curr_after = current_rss_mb()
115+
116+
peak_used_mb = max(0.0, format_ru_maxrss_mb(peak_after) - format_ru_maxrss_mb(peak_before))
117+
peak_mb = format_ru_maxrss_mb(peak_after)
118+
119+
print(f"Result: {res}")
120+
print(f"Peak RSS (ru_maxrss): ~{peak_mb:.1f} MB")
121+
print(f"Approx. peak delta: ~{peak_used_mb:.1f} MB")
122+
if curr_before is not None and curr_after is not None:
123+
print(f"Current RSS before: ~{curr_before:.1f} MB; after: ~{curr_after:.1f} MB; delta: ~{(curr_after-curr_before):.1f} MB")
124+
else:
125+
print("Current RSS: psutil or /proc not available; showing peak only.")
126+
return 0
127+
128+
129+
if __name__ == "__main__":
130+
raise SystemExit(main())

0 commit comments

Comments
 (0)