Skip to content

Commit 99d5ade

Browse files
authored
Merge pull request #335 from mkekez-SIE/issue-334
Add subject to body of emails for OpenAI parser
2 parents 70ca5a3 + a9fbddd commit 99d5ade

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

changes/335.housekeeping

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add subject to body of emails for OpenAI parser

circuit_maintenance_parser/provider.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
133133
if os.getenv("PARSER_OPENAI_API_KEY"):
134134
self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser]))
135135

136+
# Add subject to all html or text/* data_parts if not already present.
137+
self.add_subject_to_text(data)
138+
136139
for processor in self._processors:
137140
try:
138141
return processor.process(data, self.get_extended_data())
@@ -152,6 +155,28 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
152155
related_exceptions=related_exceptions,
153156
)
154157

158+
def add_subject_to_text(self, data: NotificationData):
159+
"""Append the subject to all text/* data_parts if not already present."""
160+
subject = None
161+
for part in data.data_parts:
162+
if part.type == "email-header-subject":
163+
subject = part.content.decode(errors="ignore")
164+
break
165+
if subject:
166+
new_data_parts = []
167+
for part in data.data_parts:
168+
if part.type.startswith("text/") or part.type.startswith("html"):
169+
content_str = part.content.decode(errors="ignore")
170+
if subject not in content_str:
171+
# Append subject and update content
172+
new_content = (content_str + "\n" + subject).encode()
173+
new_data_parts.append(type(part)(part.type, new_content))
174+
else:
175+
new_data_parts.append(part)
176+
else:
177+
new_data_parts.append(part)
178+
data.data_parts = new_data_parts
179+
155180
@classmethod
156181
def get_default_organizer(cls) -> str:
157182
"""Expose default_organizer as class attribute."""

tests/unit/test_providers.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,102 @@ def test_provider_gets_mlparser(provider_class):
131131
assert provider._processors[-1] == CombinedProcessor( # pylint: disable=protected-access
132132
data_parsers=[EmailDateParser, OpenAIParser]
133133
)
134+
135+
136+
def test_add_subject_to_text_appends_subject_to_text_parts():
137+
"""Test that add_subject_to_text appends subject to text/* and html parts when not already present."""
138+
provider = GenericProvider()
139+
140+
# Create test data with email subject and various content types
141+
data = NotificationData()
142+
data.add_data_part("email-header-subject", b"Test Maintenance Subject")
143+
data.add_data_part("text/plain", b"This is plain text content")
144+
data.add_data_part("text/html", b"<html><body>This is HTML content</body></html>")
145+
data.add_data_part("html", b"<div>Another HTML content</div>")
146+
data.add_data_part("application/pdf", b"binary pdf content")
147+
148+
# Verify initial state - subject should not be in content
149+
text_part = data.data_parts[1] # text/plain part
150+
html_part = data.data_parts[2] # text/html part
151+
html_part2 = data.data_parts[3] # html part
152+
153+
assert b"Test Maintenance Subject" not in text_part.content
154+
assert b"Test Maintenance Subject" not in html_part.content
155+
assert b"Test Maintenance Subject" not in html_part2.content
156+
157+
# Call the method
158+
provider.add_subject_to_text(data)
159+
160+
# Verify subject was appended to text/* and html parts
161+
text_part_after = data.data_parts[1] # text/plain part
162+
html_part_after = data.data_parts[2] # text/html part
163+
html_part2_after = data.data_parts[3] # html part
164+
pdf_part_after = data.data_parts[4] # application/pdf part
165+
166+
assert b"Test Maintenance Subject" in text_part_after.content
167+
assert text_part_after.content == b"This is plain text content\nTest Maintenance Subject"
168+
169+
assert b"Test Maintenance Subject" in html_part_after.content
170+
assert html_part_after.content == b"<html><body>This is HTML content</body></html>\nTest Maintenance Subject"
171+
172+
assert b"Test Maintenance Subject" in html_part2_after.content
173+
assert html_part2_after.content == b"<div>Another HTML content</div>\nTest Maintenance Subject"
174+
175+
# PDF part should remain unchanged
176+
assert pdf_part_after.content == b"binary pdf content"
177+
assert b"Test Maintenance Subject" not in pdf_part_after.content
178+
179+
180+
def test_add_subject_to_text_skips_when_subject_already_present():
181+
"""Test that add_subject_to_text skips parts that already contain the subject."""
182+
provider = GenericProvider()
183+
184+
# Create test data where subject is already in the content
185+
data = NotificationData()
186+
data.add_data_part("email-header-subject", b"Test Subject")
187+
data.add_data_part("text/plain", b"Content with Test Subject already included")
188+
data.add_data_part("text/html", b"<html>No subject here</html>")
189+
190+
# Call the method
191+
provider.add_subject_to_text(data)
192+
193+
# First part should remain unchanged since subject is already there
194+
text_part = data.data_parts[1]
195+
assert text_part.content == b"Content with Test Subject already included"
196+
197+
# Second part should have subject appended
198+
html_part = data.data_parts[2]
199+
assert html_part.content == b"<html>No subject here</html>\nTest Subject"
200+
201+
202+
def test_add_subject_to_text_no_subject_header():
203+
"""Test that add_subject_to_text does nothing when no email-header-subject part exists."""
204+
provider = GenericProvider()
205+
206+
# Create test data without email-header-subject
207+
data = NotificationData()
208+
data.add_data_part("text/plain", b"This is plain text content")
209+
data.add_data_part("text/html", b"<html><body>This is HTML content</body></html>")
210+
211+
original_text_content = data.data_parts[0].content
212+
original_html_content = data.data_parts[1].content
213+
214+
# Call the method
215+
provider.add_subject_to_text(data)
216+
217+
# Content should remain unchanged
218+
assert data.data_parts[0].content == original_text_content
219+
assert data.data_parts[1].content == original_html_content
220+
221+
222+
def test_add_subject_to_text_handles_decode_errors():
223+
"""Test that add_subject_to_text handles decode errors gracefully."""
224+
provider = GenericProvider()
225+
226+
# Create test data with invalid UTF-8 sequences
227+
data = NotificationData()
228+
data.add_data_part("email-header-subject", b"\xff\xfe") # Invalid UTF-8
229+
data.add_data_part("text/plain", b"\x80\x81") # Invalid UTF-8
230+
231+
# This should not raise an exception due to errors="ignore" in decode()
232+
provider.add_subject_to_text(data)

0 commit comments

Comments
 (0)