Skip to content

Commit 7120e38

Browse files
committed
fix and refine
1 parent fdecf03 commit 7120e38

File tree

2 files changed

+104
-115
lines changed

2 files changed

+104
-115
lines changed

tester/accuracy.py

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test(self):
5252
print("gen_numpy_input failed")
5353
return
5454
except Exception as err:
55-
print("[numpy error]", self.api_config.config, "\n", str(err))
55+
print(f"[numpy error] {self.api_config.config}\n{str(err)}")
5656
traceback.print_exc()
5757
write_to_log("numpy_error", self.api_config.config)
5858
return
@@ -121,7 +121,7 @@ def test(self):
121121

122122
paddle.base.core.eager._for_test_check_cuda_error()
123123
except Exception as err:
124-
print("[torch error]", self.api_config.config, "\n", str(err), flush=True)
124+
print(f"[torch error] {self.api_config.config}\n{str(err)}", flush=True)
125125
traceback.print_exc()
126126
write_to_log("torch_error", self.api_config.config)
127127
if "CUDA error" in str(err) or "memory corruption" in str(err) or "CUDA out of memory" in str(err):
@@ -145,7 +145,7 @@ def test(self):
145145
del inputs_list, result_outputs, result_outputs_grads
146146
except Exception as err:
147147
if str(err).startswith("Too large tensor to get cached numpy: "):
148-
print("[numpy error]", self.api_config.config, "\n", str(err))
148+
print(f"[numpy error] {self.api_config.config}\n{str(err)}")
149149
write_to_log("numpy_error", self.api_config.config)
150150
return
151151
print(str(err), flush=True)
@@ -154,7 +154,7 @@ def test(self):
154154
try:
155155
paddle.base.core.eager._for_test_check_cuda_error()
156156
except Exception as err:
157-
print("[torch error] backward", self.api_config.config, "\n", str(err), flush=True)
157+
print(f"[torch error] backward {self.api_config.config}\n{str(err)}", flush=True)
158158
write_to_log("torch_error", self.api_config.config)
159159
raise
160160
else:
@@ -200,10 +200,10 @@ def process_torch_outputs(obj):
200200
paddle_output = self.paddle_args[0] if len(self.paddle_args) > 0 else next(iter(self.paddle_kwargs.values()))
201201
except Exception as err:
202202
if self.should_ignore_paddle_error(str(err)):
203-
print("[Pass]", self.api_config.config, flush=True)
203+
print(f"[Pass] {self.api_config.config}", flush=True)
204204
write_to_log("pass", self.api_config.config)
205205
return
206-
print("[paddle error]", self.api_config.config, "\n", str(err), flush=True)
206+
print(f"[paddle error] {self.api_config.config}\n{str(err)}", flush=True)
207207
write_to_log("paddle_error", self.api_config.config)
208208
if "CUDA error" in str(err) or "memory corruption" in str(err):
209209
raise err
@@ -214,14 +214,14 @@ def process_torch_outputs(obj):
214214
try:
215215
paddle.base.core.eager._for_test_check_cuda_error()
216216
except Exception as err:
217-
print("[cuda error]", self.api_config.config, "\n", str(err), flush=True)
217+
print(f"[cuda error] {self.api_config.config}\n{str(err)}", flush=True)
218218
write_to_log("paddle_error", self.api_config.config)
219219
raise
220220

221221
paddle_output, torch_output = process_output(self.api_config, paddle_output, torch_output)
222222

223223
self.is_backward = False
224-
def compare_paddle_and_torch(paddle_tensor, torch_tensor) -> bool:
224+
def compare_paddle_and_torch(paddle_tensor, torch_tensor, idx=0) -> bool:
225225
try:
226226
# if paddle_tensor.dtype == paddle.bfloat16:
227227
# paddle_tensor = paddle.cast(paddle_tensor, dtype="float32")
@@ -231,9 +231,9 @@ def compare_paddle_and_torch(paddle_tensor, torch_tensor) -> bool:
231231
self.torch_assert_accuracy(paddle_tensor, torch_tensor, atol=self.atol, rtol=self.rtol)
232232
except Exception as err:
233233
if self.is_backward:
234-
print(f"[accuracy error] backward {self.api_config.config}\n{str(err)}", flush=True)
234+
print(f"[accuracy error] backward at {idx} {self.api_config.config}\n{str(err)}", flush=True)
235235
else:
236-
print(f"[accuracy error] {self.api_config.config}\n{str(err)}", flush=True)
236+
print(f"[accuracy error] at {idx} {self.api_config.config}\n{str(err)}", flush=True)
237237
write_to_log("accuracy_error", self.api_config.config)
238238
return False
239239
return True
@@ -249,57 +249,64 @@ def compare_paddle_and_torch(paddle_tensor, torch_tensor) -> bool:
249249
assert paddle_output.shape == [], "paddle_output shape is not []"
250250
assert bool(paddle_output) == torch_output, f"paddle_output {bool(paddle_output)} is not equal to torch_output {torch_output}"
251251
except Exception as err:
252-
print("[accuracy error]", self.api_config.config, "\n", str(err), flush=True)
252+
print(f"[not compare] {self.api_config.config}\n{str(err)}", flush=True)
253253
write_to_log("accuracy_error", self.api_config.config)
254254
return
255255
elif isinstance(torch_output, (torch.return_types.max, torch.return_types.min)):
256256
torch_output = torch_output.values
257257
if not compare_paddle_and_torch(paddle_output, torch_output):
258258
return
259-
elif (paddle_out is None or paddle_out.size == 0) and torch_output is None:
260-
pass
261259
else:
262-
print("[accuracy error]", self.api_config.config, "\n[output type diff error1], ", type(torch_output), flush=True)
260+
print(
261+
f"[not compare] {self.api_config.config}\n"
262+
f"torch is {type(torch_output)} but paddle is {type(paddle_output)}",
263+
flush=True,
264+
)
263265
write_to_log("accuracy_error", self.api_config.config)
264266
return
265267
elif isinstance(paddle_output, (list, tuple)):
266268
if not isinstance(torch_output, (list, tuple)):
267-
print("[output type diff error]", self.api_config.config, flush=True)
269+
print(f"[not compare] {self.api_config.config}\n"
270+
f"torch is {type(torch_output)} but paddle is {type(paddle_output)}",
271+
flush=True)
272+
write_to_log("accuracy_error", self.api_config.config)
268273
return
269274
paddle_output = list(paddle_output)
270275
torch_output = list(torch_output)
271276
if len(paddle_output) != len(torch_output):
272-
print("[accuracy error]", self.api_config.config, "\n[output type diff error2], ", len(paddle_output), len(torch_output), flush=True)
277+
print(f"[not compare] {self.api_config.config}\n"
278+
f"torch len is {len(torch_output)} but paddle len is {len(paddle_output)}",
279+
flush=True)
273280
write_to_log("accuracy_error", self.api_config.config)
274281
return
275-
for paddle_item, torch_item in zip(paddle_output, torch_output):
282+
for i, (paddle_item, torch_item) in enumerate(zip(paddle_output, torch_output)):
276283
if isinstance(paddle_item, int) or self.api_config.api_name.endswith('tolist'):
277284
self.np_assert_accuracy(numpy.array(paddle_item), numpy.array(torch_item), atol=self.atol, rtol=self.rtol)
278285
# especially for paddle.vision.ops.distribute_fpn_proposals
279286
elif isinstance(paddle_item, list) and isinstance(torch_item, list):
280287
if any(isinstance(x, paddle.Tensor) for x in paddle_item) and any(isinstance(x, torch.Tensor) for x in torch_item):
281288
for paddle_item_sub, torch_item_sub in zip(paddle_item, torch_item):
282-
if not compare_paddle_and_torch(paddle_item_sub, torch_item_sub):
289+
if not compare_paddle_and_torch(paddle_item_sub, torch_item_sub, i):
283290
return
284291
else:
285-
print("[accuracy error]", self.api_config.config, "\n[output type diff error4]", flush=True)
292+
print(f"[not compare] at {i} {self.api_config.config}\n"
293+
f"torch is {type(torch_item)} but paddle is {type(paddle_item)}",
294+
flush=True)
286295
write_to_log("accuracy_error", self.api_config.config)
287296
return
288-
elif (paddle_item is None or paddle_item.size == 0) and torch_item is None:
289-
continue
290-
elif not isinstance(paddle_item, paddle.Tensor):
291-
print("[not compare]", paddle_item, torch_item, flush=True)
292-
write_to_log("accuracy_error", self.api_config.config)
293-
return
294-
elif not isinstance(torch_item, torch.Tensor):
295-
print("[accuracy error]", self.api_config.config, "\n[output type diff error3], ", type(torch_item), flush=True)
297+
elif (paddle_item is None or not paddle_item._is_initialized()) and torch_item is None:
298+
pass
299+
elif not isinstance(paddle_item, paddle.Tensor) or not isinstance(torch_item, torch.Tensor):
300+
print(f"[not compare] at {i} {self.api_config.config}\n"
301+
f"torch is {type(torch_item)} but paddle is {type(paddle_item)}",
302+
flush=True)
296303
write_to_log("accuracy_error", self.api_config.config)
297304
return
298305
else:
299-
if not compare_paddle_and_torch(paddle_item, torch_item):
306+
if not compare_paddle_and_torch(paddle_item, torch_item, i):
300307
return
301308

302-
# Forward check now pass.
309+
# Forward check now pass.
303310
# Then do paddle backward and backward result check.
304311
if torch_grad_success:
305312
self.is_backward = True
@@ -313,14 +320,14 @@ def compare_paddle_and_torch(paddle_tensor, torch_tensor) -> bool:
313320
del inputs_list, result_outputs, result_outputs_grads
314321
except Exception as err:
315322
if str(err).startswith("Too large tensor to get cached numpy: "):
316-
print("[numpy error]", self.api_config.config, "\n", str(err))
323+
print(f"[numpy error] backward {self.api_config.config}\n{str(err)}", flush=True)
317324
write_to_log("numpy_error", self.api_config.config)
318325
return
319326
if self.should_ignore_paddle_error(str(err)):
320-
print("[Pass]", self.api_config.config, flush=True)
327+
print(f"[Pass] {self.api_config.config}", flush=True)
321328
write_to_log("pass", self.api_config.config)
322329
return
323-
print("[paddle error] backward", self.api_config.config, "\n", str(err), flush=True)
330+
print(f"[paddle error] backward {self.api_config.config}\n{str(err)}", flush=True)
324331
write_to_log("paddle_error", self.api_config.config)
325332
if "CUDA error" in str(err) or "memory corruption" in str(err):
326333
raise err
@@ -331,7 +338,7 @@ def compare_paddle_and_torch(paddle_tensor, torch_tensor) -> bool:
331338
try:
332339
paddle.base.core.eager._for_test_check_cuda_error()
333340
except Exception as err:
334-
print("[cuda error] backward", self.api_config.config, "\n", str(err), flush=True)
341+
print(f"[cuda error] backward {self.api_config.config}\n{str(err)}", flush=True)
335342
write_to_log("paddle_error", self.api_config.config)
336343
raise
337344

@@ -342,40 +349,40 @@ def compare_paddle_and_torch(paddle_tensor, torch_tensor) -> bool:
342349
if isinstance(torch_out_grads, torch.Tensor):
343350
if not compare_paddle_and_torch(paddle_out_grads, torch_out_grads):
344351
return
345-
elif (paddle_out_grads is None or paddle_out_grads.size == 0) and torch_out_grads is None:
346-
pass
347352
else:
348-
print("[accuracy error] backward", self.api_config.config, "\n[output type diff error1], ", type(torch_out_grads), flush=True)
353+
print(f"[not compare] backward {self.api_config.config}\n"
354+
f"torch is {type(torch_out_grads)} but paddle is {type(paddle_out_grads)}", flush=True)
349355
write_to_log("accuracy_error", self.api_config.config)
350356
return
351357
elif isinstance(paddle_out_grads, (list, tuple)):
352358
if not isinstance(torch_out_grads, (list, tuple)):
353-
print("[output type diff error]", self.api_config.config, flush=True)
359+
print(f"[not compare] backward {self.api_config.config}\n"
360+
f"torch is {type(torch_out_grads)} but paddle is {type(paddle_out_grads)}", flush=True)
361+
write_to_log("accuracy_error", self.api_config.config)
354362
return
355363
paddle_out_grads = list(paddle_out_grads)
356364
torch_out_grads = list(torch_out_grads)
357365
if len(paddle_out_grads) != len(torch_out_grads):
358-
print("[accuracy error] backward", self.api_config.config, "\n[output type diff error2], ", len(paddle_out_grads), len(torch_out_grads), flush=True)
366+
print(f"[not compare] backward {self.api_config.config}\n"
367+
f"torch len is {len(torch_out_grads)} but paddle len is {len(paddle_out_grads)}", flush=True)
359368
write_to_log("accuracy_error", self.api_config.config)
360369
return
361-
for paddle_item, torch_item in zip(paddle_out_grads, torch_out_grads):
370+
for i, (paddle_item, torch_item) in enumerate(zip(paddle_out_grads, torch_out_grads)):
362371
if isinstance(paddle_item, int):
363372
self.np_assert_accuracy(numpy.array(paddle_item), numpy.array(torch_item), atol=self.atol, rtol=self.rtol)
364-
elif (paddle_item is None or paddle_item.size == 0) and torch_item is None:
365-
continue
366-
elif not isinstance(paddle_item, paddle.Tensor):
367-
print("[not compare]", paddle_item, torch_item, flush=True)
368-
write_to_log("accuracy_error", self.api_config.config)
369-
return
370-
elif not isinstance(torch_item, torch.Tensor):
371-
print("[accuracy error] backward", self.api_config.config, "\n[output type diff error3], ", type(torch_out_grads[i]), flush=True)
373+
elif (paddle_item is None or not paddle_item._is_initialized()) and torch_item is None:
374+
pass
375+
elif not isinstance(paddle_item, paddle.Tensor) or not isinstance(torch_item, torch.Tensor):
376+
print(f"[not compare] backward at {i} {self.api_config.config}\n"
377+
f"torch is {type(torch_item)} but paddle is {type(paddle_item)}",
378+
flush=True)
372379
write_to_log("accuracy_error", self.api_config.config)
373380
return
374381
else:
375-
if not compare_paddle_and_torch(paddle_item, torch_item):
382+
if not compare_paddle_and_torch(paddle_item, torch_item, i):
376383
return
377384

378-
print("[Pass]", self.api_config.config, flush=True)
385+
print(f"[Pass] {self.api_config.config}", flush=True)
379386
write_to_log("pass", self.api_config.config)
380387

381388

@@ -479,4 +486,9 @@ def process_grad_output(api_config, paddle_out_grads, torch_out_grads):
479486
if is_upper
480487
else torch.tril(torch_out_grads[1])
481488
)
489+
elif api_config.api_name == "paddle.incubate.nn.functional.fused_rotary_position_embedding":
490+
# Paddle only has 3 outputs/grads Q, K, V
491+
valid_out_num = len([out for out in paddle_out_grads if out is not None])
492+
paddle_out_grads = paddle_out_grads[:valid_out_num]
493+
torch_out_grads = torch_out_grads[:valid_out_num]
482494
return paddle_out_grads, torch_out_grads

0 commit comments

Comments
 (0)