Skip to content

Commit 973241b

Browse files
committed
add spellcheck workflow
1 parent 03df3ce commit 973241b

File tree

2 files changed

+152
-67
lines changed

2 files changed

+152
-67
lines changed
Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,55 @@
1-
name: "notebooks-linting"
1+
name: "lint-notebooks"
22

33
on:
4-
workflow_dispatch:
54
push:
6-
branches: [ main, develop ]
5+
branches: [ develop ]
76
pull_request:
8-
branches: [ main, develop ]
7+
branches: [ develop ]
8+
9+
permissions:
10+
contents: read
11+
pull-requests: write
912

1013
jobs:
1114
lint:
12-
name: Run notebook linting
15+
name: Run notebook linting and spell check
1316
runs-on: ubuntu-latest
1417
steps:
15-
- uses: actions/checkout@v4
16-
with:
17-
fetch-depth: 1
18-
19-
- uses: astral-sh/ruff-action@v3
18+
- name: Checkout code
19+
uses: actions/checkout@v4
2020
with:
21-
version: 0.5.5
22-
21+
fetch-depth: 2
22+
23+
- name: Install uv
24+
uses: astral-sh/setup-uv@v5
25+
2326
- name: Run ruff format
24-
run: ruff format --check --diff .
25-
27+
run: uvx ruff format --check --diff .
28+
2629
- name: Run ruff check
27-
run: ruff check .
30+
run: uvx ruff check .
31+
32+
- name: Get changed notebook files
33+
id: changed_notebooks
34+
if: github.event_name == 'pull_request'
35+
uses: tj-actions/changed-files@v46
36+
with:
37+
files: |
38+
**.ipynb
39+
40+
- name: Run spell check on changed notebooks
41+
id: spellcheck
42+
if: github.event_name == 'pull_request' && steps.changed_notebooks.outputs.any_changed == 'true'
43+
continue-on-error: true
44+
run: |
45+
uvx python spellcheck.py ${{ steps.changed_notebooks.outputs.all_changed_files }} > spellcheck_output.txt || true
46+
47+
- name: Post spell check comment
48+
if: github.event_name == 'pull_request' && steps.changed_notebooks.outputs.any_changed == 'true' && steps.spellcheck.outcome != 'skipped' && hashFiles('spellcheck_output.txt') != ''
49+
uses: peter-evans/create-or-update-comment@v4
50+
with:
51+
token: ${{ secrets.GITHUB_TOKEN }}
52+
repository: ${{ github.repository }}
53+
issue-number: ${{ github.event.pull_request.number }}
54+
body-path: spellcheck_output.txt
55+
edit-mode: replace

spellcheck.py

Lines changed: 109 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,75 +5,132 @@
55
import re
66
import subprocess
77
import sys
8+
from typing import Optional
89

910

10-
def get_relative_path(notebook):
11+
def get_relative_path(notebook: str) -> str:
1112
"""Get the relative path of the notebook from the current directory."""
1213
return os.path.relpath(notebook, os.getcwd())
1314

1415

15-
def check_spelling(notebook):
16-
"""Check spelling in a notebook and return any errors."""
17-
rel_path = get_relative_path(notebook)
18-
19-
with open(notebook, encoding="utf-8") as f:
20-
content = f.read()
21-
22-
# nbstripout to remove outputs
23-
nbstripout_proc = subprocess.run(
24-
["uvx", "nbstripout"], input=content, capture_output=True, text=True
25-
)
26-
27-
# remove image tags with base64 data
28-
stripped_content = re.sub(
29-
r'<img\s+src="data:image/[^"]+;base64,[^"]+"[^>]*>|<img\s+src="data:image/[^"]+;base64,[^"]+"[^/>]*/>',
30-
"",
31-
nbstripout_proc.stdout,
32-
flags=re.DOTALL,
33-
)
34-
35-
# remove any remaining base64 strings that might appear without proper HTML tags
36-
stripped_content = re.sub(
37-
r"data:image/[^;]+;base64,[A-Za-z0-9+/=]+",
38-
"",
39-
stripped_content,
40-
flags=re.DOTALL,
41-
)
42-
43-
codespell_proc = subprocess.run(
44-
["uvx", "codespell", "-"], input=stripped_content, capture_output=True, text=True
45-
)
16+
def check_spelling(notebook: str) -> Optional[str]:
17+
"""
18+
Check spelling in a notebook.
4619
47-
# sadly we can't get rid of the "Used config files: ..." so we filter it here
48-
output_lines = []
49-
for line in codespell_proc.stdout.splitlines():
50-
if "Used config files:" in line or " 1: .codespellrc" in line:
51-
continue
52-
output_lines.append(line)
53-
54-
output = "\n".join(output_lines)
55-
56-
if output:
57-
print(f"{rel_path}:")
58-
print(output.replace("-", ""))
59-
print("-------------------------------------------")
60-
61-
return bool(output)
20+
Returns:
21+
A formatted Markdown string containing spelling errors for the notebook,
22+
using a code block to show codespell's output, or None if no errors were found.
23+
"""
24+
rel_path = get_relative_path(notebook)
25+
error_message_block = None
26+
27+
try:
28+
with open(notebook, encoding="utf-8") as f:
29+
content = f.read()
30+
31+
# nbstripout to remove outputs
32+
nbstripout_proc = subprocess.run(
33+
["uvx", "nbstripout"],
34+
input=content,
35+
capture_output=True,
36+
text=True,
37+
check=True,
38+
)
39+
40+
# remove image tags with base64 data
41+
stripped_content = re.sub(
42+
r'<img\s+src="data:image/[^"]+;base64,[^"]+"[^>]*>|<img\s+src="data:image/[^"]+;base64,[^"]+"[^/>]*/>',
43+
"",
44+
nbstripout_proc.stdout,
45+
flags=re.DOTALL,
46+
)
47+
48+
# remove any remaining base64 strings that might appear without proper HTML tags
49+
stripped_content = re.sub(
50+
r"data:image/[^;]+;base64,[A-Za-z0-9+/=]+",
51+
"",
52+
stripped_content,
53+
flags=re.DOTALL,
54+
)
55+
56+
codespell_proc = subprocess.run(
57+
["uvx", "codespell", "-"],
58+
input=stripped_content,
59+
capture_output=True,
60+
text=True,
61+
check=False, # codespell exits non-zero on errors, which is expected
62+
)
63+
64+
# filter codespell's config file lines
65+
output_lines = []
66+
for line in codespell_proc.stdout.splitlines():
67+
if line.strip().startswith("Used config files:") or re.match(
68+
r"^\s+\d+:\s+\.codespellrc", line
69+
):
70+
continue
71+
output_lines.append(line.replace("-:", "Line ", 1))
72+
73+
filtered_output = "\n".join(output_lines).strip()
74+
75+
if filtered_output:
76+
error_message_block = f"**{rel_path}**:\n```\n{filtered_output}\n```"
77+
78+
except FileNotFoundError:
79+
error_message_block = f"**{rel_path}**: Error - File not found."
80+
except subprocess.CalledProcessError as e:
81+
cmd_str = " ".join(e.cmd)
82+
error_message_block = (
83+
f"**{rel_path}**: Error running command `{cmd_str}`:\n```\n{e.stderr}\n```"
84+
)
85+
except Exception as e:
86+
error_message_block = f"**{rel_path}**: An unexpected error occurred:\n```\n{str(e)}\n```"
87+
88+
return error_message_block
6289

6390

6491
def main():
6592
parser = argparse.ArgumentParser(description="Check spelling in Jupyter notebooks")
6693
parser.add_argument("notebooks", nargs="+", help="List of notebook files to check")
6794
args = parser.parse_args()
6895

69-
has_errors = False
96+
all_errors: list[str] = []
97+
num_files_processed = 0
98+
num_files_with_errors = 0
99+
num_files_with_processing_errors = 0
70100

71101
for notebook in args.notebooks:
72-
if check_spelling(notebook):
73-
has_errors = True
74-
75-
if has_errors:
102+
num_files_processed += 1
103+
error_output = check_spelling(notebook)
104+
if error_output:
105+
all_errors.append(error_output)
106+
if (
107+
"Error running command" in error_output
108+
or "An unexpected error occurred" in error_output
109+
or "Error - File not found" in error_output
110+
):
111+
num_files_with_processing_errors += 1
112+
else:
113+
num_files_with_errors += 1
114+
115+
if all_errors:
116+
print("## Spell Check Report\n")
117+
print("\n\n---\n\n".join(all_errors))
118+
119+
summary_lines = []
120+
if num_files_with_errors > 0:
121+
summary_lines.append(f"Found spelling errors in {num_files_with_errors} file(s).")
122+
if num_files_with_processing_errors > 0:
123+
summary_lines.append(
124+
f"Encountered processing errors in {num_files_with_processing_errors} file(s)."
125+
)
126+
if not summary_lines:
127+
summary_lines.append(f"Found issues in {len(all_errors)} file(s).")
128+
129+
print(f"\n---\nChecked {num_files_processed} notebook(s). " + " ".join(summary_lines))
76130
sys.exit(1)
131+
else:
132+
print(f"Spell check passed successfully for {num_files_processed} notebook(s).")
133+
sys.exit(0)
77134

78135

79136
if __name__ == "__main__":

0 commit comments

Comments
 (0)