Skip to content

Commit 1ed1110

Browse files
committed
Make translations use list instead of dict
1 parent 863994a commit 1ed1110

File tree

1 file changed

+28
-10
lines changed

1 file changed

+28
-10
lines changed

llmstack/processors/providers/promptly/html_translation.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,25 +147,29 @@ def _translate_with_provider(self, chunk: str) -> str:
147147
raise ValueError("LLM client not found")
148148

149149
translation_prompt = (
150-
"You are provided a JSON object with key-value pairs."
151-
+ "The key is a unique identifier for the text provided in the value."
152-
+ "Your task is to tranlate text in the value. You will translate the text"
150+
"You are provided a JSON list of strings to translate."
151+
+ "Your task is to translate the text in the list. You will translate the text"
153152
+ f" from {self._input.input_language} language to {self._input.output_language}"
154-
+ " language. Always respond with a valid JSON object only."
153+
+ " language. Always respond with a valid JSON list only. If you are not able to translate the text, return the same text. Make sure the returned JSON list has the same order and length as the input JSON list. Do not use ```json in the response."
155154
)
156155

157156
if self._config.translation_guideline:
158157
translation_prompt += f"\nIn addition to the above instructions follow the following guidelines for translation {self._config.translation_guideline}"
159158

160159
chunks_json = json.loads(chunk)
161160
final_chunks_json = {}
161+
final_chunks_set = set()
162162
for key, value in chunks_json.items():
163163
if value.strip() == "":
164164
# Skip empty strings
165165
continue
166+
if value in self._translation_mapping or value in final_chunks_set:
167+
continue
168+
166169
final_chunks_json[key] = value
170+
final_chunks_set.add(value)
167171

168-
translation_prompt += f"\n---\n{json.dumps(final_chunks_json)}"
172+
translation_prompt += f"\n---\n{json.dumps(list(final_chunks_json.values()))}"
169173

170174
messages = [
171175
{"role": "system", "content": self._config.system_message},
@@ -191,11 +195,18 @@ def _translate_with_provider(self, chunk: str) -> str:
191195
except Exception as e:
192196
logger.error(f"Error: {e}, response: {model_response}")
193197

194-
for entry in json_input:
195-
if entry not in json_result:
196-
json_result[entry] = json_input[entry]
198+
output_json = {}
199+
for index, entry in enumerate(final_chunks_json.keys()):
200+
output_json[entry] = json_result[index]
201+
self._translation_mapping[final_chunks_json[entry]] = json_result[index]
197202

198-
return json_result
203+
for index, entry in enumerate(json_input.keys()):
204+
if json_input[entry] in self._translation_mapping:
205+
output_json[entry] = self._translation_mapping[json_input[entry]]
206+
elif entry not in output_json:
207+
output_json[entry] = json_input[entry]
208+
209+
return output_json
199210

200211
def _get_elements_with_text(self, html_element: BeautifulSoup) -> List[str]:
201212
if html_element.name is None:
@@ -211,8 +222,14 @@ def _split_element_text_dict(self, element_text_dict: dict) -> List[dict]:
211222
result = []
212223
current_dict = {}
213224
current_length = 0
225+
seen_values = set()
214226
for key, value in element_text_dict.items():
215-
entry_length = len(value) + len(key)
227+
if value not in seen_values:
228+
entry_length = len(value)
229+
seen_values.add(value)
230+
else:
231+
entry_length = 0
232+
216233
if current_dict and current_length + entry_length > self._config.chunk_size:
217234
result.append(current_dict)
218235
current_dict = {}
@@ -231,6 +248,7 @@ def process(self) -> dict:
231248
total_translated_strings = 0
232249
output_stream = self._output_stream
233250
html_input = self._input.html
251+
self._translation_mapping = {}
234252

235253
html_doc = BeautifulSoup(html_input, "html.parser")
236254

0 commit comments

Comments
 (0)