Skip to content

Commit 751a75f

Browse files
authored
fix lint
1 parent 3630908 commit 751a75f

File tree

1 file changed

+39
-23
lines changed

1 file changed

+39
-23
lines changed

tests/torchtune/models/phi4/test_phi4_tokenizer.py

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,22 @@ def tokenizer(self):
2323
@pytest.fixture
2424
def expected_tokens(self):
2525
# fmt: off
26-
tokens = [100257, 100264, 115, 121, 322, 398, 100265, 10, 1539, 470, 258, 1444, 933, 1940, 511, 446, 100266, 10,
27-
100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334, 958, 99, 445, 98, 300, 258, 256, 281,
28-
107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827, 512, 1080, 116, 300, 262, 1249, 524, 340, 10, 35, 35, 35, 828, 1160, 117, 807, 1037,
29-
71, 1414, 534, 258, 1759, 511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35, 35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10,
30-
100264, 115, 121, 322, 398, 100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995, 505, 944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46, 270,
31-
776, 1341, 258, 1279, 641, 563, 275, 469, 573, 284, 944, 320, 526, 962, 425, 913, 1402, 97, 356, 446, 115, 284, 1229,
32-
1581, 282, 117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262, 739, 1886, 284, 783, 1803, 636, 277,
33-
268, 117, 316, 485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531, 476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280, 274, 913, 601, 359, 300, 44, 335, 834, 335,
34-
531, 476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257] # noqa
26+
tokens = [
27+
100257, 100264, 115, 121, 322, 398, 100265, 10, 1539, 470, 258, 1444, 933, 1940, 511, 446, 100266, 10,
28+
100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334,
29+
958, 99, 445, 98, 300, 258, 256, 281,
30+
107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827, 512, 1080, 116, 300, 262, 1249,
31+
524, 340, 10, 35, 35, 35, 828, 1160, 117, 807, 1037,
32+
71, 1414, 534, 258, 1759, 511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35,
33+
35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10,
34+
100264, 115, 121, 322, 398, 100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995, 505,
35+
944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46, 270,
36+
776, 1341, 258, 1279, 641, 563, 275, 469, 573, 284, 944, 320, 526, 962, 425, 913, 1402, 97, 356, 446, 115, 284, 1229,
37+
1581, 282, 117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262, 739, 1886, 284, 783, 1803, 636, 277,
38+
268, 117, 316, 485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531,
39+
476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280, 274, 913, 601, 359, 300, 44, 335, 834, 335,
40+
531, 476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257
41+
] # noqa
3542
return tokens
3643
# fmt: on
3744

@@ -81,26 +88,35 @@ def test_tokenize_messages_no_system_prompt(self, tokenizer):
8188
"good conversation over coffee.",
8289
),
8390
]
84-
tokens, mask = tokenizer.tokenize_messages(messages, ignore_system_prompt=True, add_eos=True)
91+
tokens, mask = tokenizer.tokenize_messages(
92+
messages, ignore_system_prompt=True, add_eos=True
93+
)
8594

8695
# fmt: off
87-
expected_tokens = [100257, 100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334, 958,
88-
99, 445, 98, 300, 258, 256, 281, 107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827, 512, 1080, 116, 300, 262, 1249, 524, 340,
89-
10, 35, 35, 35, 828, 1160, 117, 807, 1037, 71, 1414, 534, 258, 1759, 511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35,
90-
35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10, 100264, 115, 121, 322, 398, 100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995,
91-
505, 944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46, 270, 776, 1341, 258, 1279, 641, 563, 275, 469, 573, 284, 944, 320, 526, 962, 425,
92-
913, 1402, 97, 356, 446, 115, 284, 1229, 1581, 282, 117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262, 739, 1886, 284, 783, 1803, 636, 277, 268, 117, 316,
93-
485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531, 476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280, 274, 913, 601, 359, 300, 44, 335, 834, 335, 531,
94-
476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257] # noqa
96+
expected_tokens = [
97+
100257, 100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334, 958,
98+
99, 445, 98, 300, 258, 256, 281, 107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827,
99+
512, 1080, 116, 300, 262, 1249, 524, 340,
100+
10, 35, 35, 35, 828, 1160, 117, 807, 1037, 71, 1414, 534, 258, 1759,
101+
511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35,
102+
35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10, 100264, 115, 121, 322, 398,
103+
100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995,
104+
505, 944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46,
105+
270, 776, 1341, 258, 1279, 641, 563, 275, 469, 573,
106+
284, 944, 320, 526, 962, 425, 913, 1402, 97, 356, 446, 115, 284, 1229, 1581, 282,
107+
117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262,
108+
739, 1886, 284, 783, 1803, 636, 277, 268, 117, 316,
109+
485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531, 476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280,
110+
274, 913, 601, 359, 300, 44, 335, 834, 335, 531,
111+
476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257
112+
] # noqa
95113
# fmt: on
96114

97115
expected_mask = [True] * 81 + [False] * 127
98116
assert expected_tokens == tokens
99117
assert expected_mask == mask
100118

101-
def test_tokenize_message_drop_eos(
102-
self, tokenizer, expected_tokens
103-
):
119+
def test_tokenize_message_drop_eos(self, tokenizer, expected_tokens):
104120
"""
105121
Test that the tokenizer will not add an EOS token or EOT token if user requests it.
106122
This is the most common case for inference.
@@ -126,8 +142,8 @@ def test_tokenize_message_drop_eos(
126142
]
127143

128144
tokens, mask = tokenizer.tokenize_messages(messages, add_eos=False)
129-
145+
130146
expected_mask = [True] * 93 + [False] * 126
131-
# Drop eos token
147+
# Drop eos token.
132148
assert expected_tokens[:-1] == tokens
133149
assert expected_mask == mask

0 commit comments

Comments
 (0)