Skip to content

Commit a417ff5

Browse files
committed
wip: add spell checking to notebooks CI
1 parent a92da30 commit a417ff5

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

.codespellrc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[codespell]
2+
skip = .git,*.pdf,*.png,*.jpg,*.jpeg,*.gif,*.svg,*.bmp,*.tiff,*.pyc,venv,.venv,.ipynb_checkpoints
3+
check-filenames = true
4+
quiet-level = 2
5+
ignore-words-list = flexcompute,tidy3d,TE,TM,te,tm,FOM,fom,Commun,Thru
6+
ignore-regex = [a-f0-9]{40}
7+
builtin = clear,rare,informal

spellcheck.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import os
5+
import re
6+
import subprocess
7+
import sys
8+
9+
10+
def get_relative_path(notebook):
11+
"""Get the relative path of the notebook from the current directory."""
12+
return os.path.relpath(notebook, os.getcwd())
13+
14+
15+
def check_spelling(notebook):
16+
"""Check spelling in a notebook and return any errors."""
17+
rel_path = get_relative_path(notebook)
18+
19+
with open(notebook, encoding="utf-8") as f:
20+
content = f.read()
21+
22+
# nbstripout to remove outputs
23+
nbstripout_proc = subprocess.run(
24+
["uvx", "nbstripout"], input=content, capture_output=True, text=True
25+
)
26+
27+
# remove image tags with base64 data
28+
stripped_content = re.sub(
29+
r'<img\s+src="data:image/[^"]+;base64,[^"]+"[^>]*>|<img\s+src="data:image/[^"]+;base64,[^"]+"[^/>]*/>',
30+
"",
31+
nbstripout_proc.stdout,
32+
flags=re.DOTALL,
33+
)
34+
35+
# remove any remaining base64 strings that might appear without proper HTML tags
36+
stripped_content = re.sub(
37+
r"data:image/[^;]+;base64,[A-Za-z0-9+/=]+",
38+
"",
39+
stripped_content,
40+
flags=re.DOTALL,
41+
)
42+
43+
codespell_proc = subprocess.run(
44+
["uvx", "codespell", "-"], input=stripped_content, capture_output=True, text=True
45+
)
46+
47+
# sadly we can't get rid of the "Used config files: ..." so we filter it here
48+
output_lines = []
49+
for line in codespell_proc.stdout.splitlines():
50+
if "Used config files:" in line or " 1: .codespellrc" in line:
51+
continue
52+
output_lines.append(line)
53+
54+
output = "\n".join(output_lines)
55+
56+
if output:
57+
print(f"{rel_path}:")
58+
print(output.replace("-", ""))
59+
print("-------------------------------------------")
60+
61+
return bool(output)
62+
63+
64+
def main():
65+
parser = argparse.ArgumentParser(description="Check spelling in Jupyter notebooks")
66+
parser.add_argument("notebooks", nargs="+", help="List of notebook files to check")
67+
args = parser.parse_args()
68+
69+
has_errors = False
70+
71+
for notebook in args.notebooks:
72+
if check_spelling(notebook):
73+
has_errors = True
74+
75+
if has_errors:
76+
sys.exit(1)
77+
78+
79+
if __name__ == "__main__":
80+
main()

0 commit comments

Comments
 (0)