Skip to content

Commit 571499d

Browse files
committed
[GR-67567] Intrinsify _cpython_unicodedata.c
PullRequest: graalpython/3923
2 parents 9b1f66b + 0b3b4d3 commit 571499d

File tree

9 files changed

+221
-55
lines changed

9 files changed

+221
-55
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_unicodedata.py

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
22
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
33
#
44
# The Universal Permissive License (UPL), Version 1.0
@@ -37,34 +37,68 @@
3737
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3838
# SOFTWARE.
3939

40+
import unicodedata
41+
import unittest
4042

41-
def assert_raises(err, fn, *args, **kwargs):
42-
raised = False
43-
try:
44-
fn(*args, **kwargs)
45-
except err:
46-
raised = True
47-
assert raised
43+
class TestUnicodedata(unittest.TestCase):
4844

45+
def test_args_validation(self):
46+
self.assertRaises(TypeError, unicodedata.category, None)
47+
self.assertRaises(TypeError, unicodedata.bidirectional, None)
48+
self.assertRaises(TypeError, unicodedata.name, None)
4949

50-
def test_args_validation():
51-
import unicodedata
52-
assert_raises(TypeError, unicodedata.category, None)
53-
assert_raises(TypeError, unicodedata.bidirectional, None)
54-
assert_raises(TypeError, unicodedata.name, None)
5550

51+
def test_normalize(self):
52+
self.assertRaises(TypeError, unicodedata.normalize)
53+
self.assertRaises(ValueError, unicodedata.normalize, 'unknown', 'xx')
54+
assert unicodedata.normalize('NFKC', '') == ''
5655

57-
def test_normalize():
58-
import unicodedata
59-
assert_raises(TypeError, unicodedata.normalize)
60-
assert_raises(ValueError, unicodedata.normalize, 'unknown', 'xx')
61-
assert unicodedata.normalize('NFKC', '') == ''
6256

57+
def test_category(self):
58+
assert unicodedata.category('\uFFFE') == 'Cn'
59+
assert unicodedata.category('a') == 'Ll'
60+
assert unicodedata.category('A') == 'Lu'
61+
self.assertRaises(TypeError, unicodedata.category)
62+
self.assertRaises(TypeError, unicodedata.category, 'xx')
6363

64-
def test_category():
65-
import unicodedata
66-
assert unicodedata.category('\uFFFE') == 'Cn'
67-
assert unicodedata.category('a') == 'Ll'
68-
assert unicodedata.category('A') == 'Lu'
69-
assert_raises(TypeError, unicodedata.category)
70-
assert_raises(TypeError, unicodedata.category, 'xx')
64+
65+
def test_lookup(self):
66+
unicode_name = "ARABIC SMALL HIGH LIGATURE ALEF WITH LAM WITH YEH"
67+
self.assertEqual(unicodedata.lookup(unicode_name), "\u0616")
68+
69+
unicode_name_alias = "ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE"
70+
self.assertEqual(unicodedata.lookup(unicode_name_alias), "\u0616")
71+
72+
with self.assertRaisesRegex(KeyError, "undefined character name 'wrong-name'"):
73+
unicodedata.lookup("wrong-name")
74+
75+
with self.assertRaisesRegex(KeyError, "name too long"):
76+
unicodedata.lookup("a" * 257)
77+
78+
79+
def test_east_asian_width(self):
80+
list = [1, 2, 3]
81+
with self.assertRaisesRegex(TypeError, r"east_asian_width\(\) argument must be a unicode character, not list"):
82+
unicodedata.east_asian_width(list)
83+
84+
multi_character_string = "abc"
85+
with self.assertRaisesRegex(TypeError, r"east_asian_width\(\) argument must be a unicode character, not str"):
86+
unicodedata.east_asian_width(multi_character_string)
87+
88+
empty_string = ""
89+
with self.assertRaisesRegex(TypeError, r"east_asian_width\(\) argument must be a unicode character, not str"):
90+
unicodedata.east_asian_width(empty_string)
91+
92+
93+
def test_combining(self):
94+
list = [1, 2, 3]
95+
with self.assertRaisesRegex(TypeError, r"combining\(\) argument must be a unicode character, not list"):
96+
unicodedata.combining(list)
97+
98+
multi_character_string = "abc"
99+
with self.assertRaisesRegex(TypeError, r"combining\(\) argument must be a unicode character, not str"):
100+
unicodedata.combining(multi_character_string)
101+
102+
empty_string = ""
103+
with self.assertRaisesRegex(TypeError, r"combining\(\) argument must be a unicode character, not str"):
104+
unicodedata.combining(empty_string)

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_ucn.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,5 @@ test.test_ucn.UnicodeNamesTest.test_errors @ darwin-arm64,darwin-x86_64,linux-aa
66
test.test_ucn.UnicodeNamesTest.test_general @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
77
test.test_ucn.UnicodeNamesTest.test_hangul_syllables @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
88
test.test_ucn.UnicodeNamesTest.test_misc_symbols @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
9-
test.test_ucn.UnicodeNamesTest.test_named_sequences_full @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
109
test.test_ucn.UnicodeNamesTest.test_named_sequences_names_in_pua_range @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
11-
test.test_ucn.UnicodeNamesTest.test_named_sequences_sample @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1210
test.test_ucn.UnicodeNamesTest.test_strict_error_handling @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_unicodedata.txt

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,13 @@ test.test_unicodedata.NormalizationTest.test_edge_cases @ darwin-arm64,darwin-x8
33
test.test_unicodedata.NormalizationTest.test_normalization @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
44
test.test_unicodedata.UnicodeFunctionsTest.test_category @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
55
test.test_unicodedata.UnicodeFunctionsTest.test_combining @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
6-
test.test_unicodedata.UnicodeFunctionsTest.test_decimal @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
7-
test.test_unicodedata.UnicodeFunctionsTest.test_decomposition @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
8-
test.test_unicodedata.UnicodeFunctionsTest.test_digit @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
96
test.test_unicodedata.UnicodeFunctionsTest.test_east_asian_width @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
107
test.test_unicodedata.UnicodeFunctionsTest.test_east_asian_width_9_0_changes @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
118
test.test_unicodedata.UnicodeFunctionsTest.test_east_asian_width_unassigned @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
129
test.test_unicodedata.UnicodeFunctionsTest.test_issue10254 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1310
test.test_unicodedata.UnicodeFunctionsTest.test_issue29456 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
14-
test.test_unicodedata.UnicodeFunctionsTest.test_mirrored @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
15-
test.test_unicodedata.UnicodeFunctionsTest.test_numeric @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
11+
test.test_unicodedata.UnicodeFunctionsTest.test_name_inverse_lookup @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1612
test.test_unicodedata.UnicodeFunctionsTest.test_pr29 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1713
test.test_unicodedata.UnicodeMiscTest.test_bug_1704793 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1814
test.test_unicodedata.UnicodeMiscTest.test_bug_4971 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
1915
test.test_unicodedata.UnicodeMiscTest.test_bug_5828 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
20-
test.test_unicodedata.UnicodeMiscTest.test_decimal_numeric_consistent @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
21-
test.test_unicodedata.UnicodeMiscTest.test_digit_numeric_consistent @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64
22-
test.test_unicodedata.UnicodeMiscTest.test_ucd_510 @ darwin-arm64,darwin-x86_64,linux-aarch64,linux-x86_64,win32-AMD64

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags_bytecode_dsl/test_ucn.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,5 @@ test.test_ucn.UnicodeNamesTest.test_errors @ linux-x86_64
66
test.test_ucn.UnicodeNamesTest.test_general @ linux-x86_64
77
test.test_ucn.UnicodeNamesTest.test_hangul_syllables @ linux-x86_64
88
test.test_ucn.UnicodeNamesTest.test_misc_symbols @ linux-x86_64
9-
test.test_ucn.UnicodeNamesTest.test_named_sequences_full @ linux-x86_64
109
test.test_ucn.UnicodeNamesTest.test_named_sequences_names_in_pua_range @ linux-x86_64
11-
test.test_ucn.UnicodeNamesTest.test_named_sequences_sample @ linux-x86_64
1210
test.test_ucn.UnicodeNamesTest.test_strict_error_handling @ linux-x86_64

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags_bytecode_dsl/test_unicodedata.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,11 @@ test.test_unicodedata.NormalizationTest.test_edge_cases @ linux-x86_64
33
test.test_unicodedata.NormalizationTest.test_normalization @ linux-x86_64
44
test.test_unicodedata.UnicodeFunctionsTest.test_category @ linux-x86_64
55
test.test_unicodedata.UnicodeFunctionsTest.test_combining @ linux-x86_64
6-
test.test_unicodedata.UnicodeFunctionsTest.test_decimal @ linux-x86_64
7-
test.test_unicodedata.UnicodeFunctionsTest.test_decomposition @ linux-x86_64
8-
test.test_unicodedata.UnicodeFunctionsTest.test_digit @ linux-x86_64
96
test.test_unicodedata.UnicodeFunctionsTest.test_east_asian_width @ linux-x86_64
107
test.test_unicodedata.UnicodeFunctionsTest.test_east_asian_width_9_0_changes @ linux-x86_64
118
test.test_unicodedata.UnicodeFunctionsTest.test_issue10254 @ linux-x86_64
129
test.test_unicodedata.UnicodeFunctionsTest.test_issue29456 @ linux-x86_64
13-
test.test_unicodedata.UnicodeFunctionsTest.test_mirrored @ linux-x86_64
14-
test.test_unicodedata.UnicodeFunctionsTest.test_numeric @ linux-x86_64
1510
test.test_unicodedata.UnicodeFunctionsTest.test_pr29 @ linux-x86_64
1611
test.test_unicodedata.UnicodeMiscTest.test_bug_1704793 @ linux-x86_64
1712
test.test_unicodedata.UnicodeMiscTest.test_bug_4971 @ linux-x86_64
1813
test.test_unicodedata.UnicodeMiscTest.test_bug_5828 @ linux-x86_64
19-
test.test_unicodedata.UnicodeMiscTest.test_decimal_numeric_consistent @ linux-x86_64
20-
test.test_unicodedata.UnicodeMiscTest.test_digit_numeric_consistent @ linux-x86_64
21-
test.test_unicodedata.UnicodeMiscTest.test_ucd_510 @ linux-x86_64

0 commit comments

Comments
 (0)