Skip to content

Commit 6f12edb

Browse files
weezymattrasbt
andauthored
Fix issue: 731 by resolving semantic error (#738)
* fix issue 731 * update test path --------- Co-authored-by: rasbt <[email protected]>
1 parent a354555 commit 6f12edb

File tree

3 files changed

+51
-44
lines changed

3 files changed

+51
-44
lines changed

.github/workflows/basic-tests-linux-uv.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
shell: bash
6767
run: |
6868
source .venv/bin/activate
69-
pytest ch02/05_bpe-from-scratch/tests/tests.py
69+
pytest ch02/05_bpe-from-scratch/tests.py
7070
7171
- name: Test Selected Bonus Materials
7272
shell: bash

ch02/05_bpe-from-scratch/bpe-from-scratch.ipynb

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
},
8282
{
8383
"cell_type": "code",
84-
"execution_count": 1,
84+
"execution_count": 39,
8585
"id": "8c9bc9e4-120f-4bac-8fa6-6523c568d12e",
8686
"metadata": {},
8787
"outputs": [
@@ -109,7 +109,7 @@
109109
},
110110
{
111111
"cell_type": "code",
112-
"execution_count": 2,
112+
"execution_count": 40,
113113
"id": "6c586945-d459-4f9a-855d-bf73438ef0e3",
114114
"metadata": {},
115115
"outputs": [
@@ -138,7 +138,7 @@
138138
},
139139
{
140140
"cell_type": "code",
141-
"execution_count": 3,
141+
"execution_count": 41,
142142
"id": "0d5b61d9-79a0-48b4-9b3e-64ab595c5b01",
143143
"metadata": {},
144144
"outputs": [
@@ -382,7 +382,7 @@
382382
},
383383
{
384384
"cell_type": "code",
385-
"execution_count": 4,
385+
"execution_count": 42,
386386
"id": "3e4a15ec-2667-4f56-b7c1-34e8071b621d",
387387
"metadata": {},
388388
"outputs": [],
@@ -809,15 +809,15 @@
809809
},
810810
{
811811
"cell_type": "code",
812-
"execution_count": 5,
812+
"execution_count": 71,
813813
"id": "51872c08-e01b-40c3-a8a0-e8d6a773e3df",
814814
"metadata": {},
815815
"outputs": [
816816
{
817817
"name": "stdout",
818818
"output_type": "stream",
819819
"text": [
820-
"the-verdict.txt already exists in ./the-verdict.txt\n"
820+
"the-verdict.txt already exists in ../01_main-chapter-code/the-verdict.txt\n"
821821
]
822822
}
823823
],
@@ -848,7 +848,7 @@
848848
" \"the-verdict.txt\"\n",
849849
" ),\n",
850850
" filename=\"the-verdict.txt\",\n",
851-
" search_dirs=\".\"\n",
851+
" search_dirs=[\"ch02/01_main-chapter-code/\", \"../01_main-chapter-code/\", \".\"]\n",
852852
")\n",
853853
"\n",
854854
"with open(verdict_path, \"r\", encoding=\"utf-8\") as f: # added ../01_main-chapter-code/\n",
@@ -867,7 +867,7 @@
867867
},
868868
{
869869
"cell_type": "code",
870-
"execution_count": 6,
870+
"execution_count": 46,
871871
"id": "027348fd-d52f-4396-93dd-38eed142df9b",
872872
"metadata": {},
873873
"outputs": [],
@@ -886,7 +886,7 @@
886886
},
887887
{
888888
"cell_type": "code",
889-
"execution_count": 7,
889+
"execution_count": 47,
890890
"id": "f705a283-355e-4460-b940-06bbc2ae4e61",
891891
"metadata": {},
892892
"outputs": [
@@ -913,7 +913,7 @@
913913
},
914914
{
915915
"cell_type": "code",
916-
"execution_count": 8,
916+
"execution_count": 48,
917917
"id": "3da42d1c-f75c-4ba7-a6c5-4cb8543d4a44",
918918
"metadata": {},
919919
"outputs": [
@@ -947,7 +947,7 @@
947947
},
948948
{
949949
"cell_type": "code",
950-
"execution_count": 9,
950+
"execution_count": 49,
951951
"id": "e1db5cce-e015-412b-ad56-060b8b638078",
952952
"metadata": {},
953953
"outputs": [
@@ -967,7 +967,7 @@
967967
},
968968
{
969969
"cell_type": "code",
970-
"execution_count": 10,
970+
"execution_count": 50,
971971
"id": "78249752-38d7-47b9-b259-912bcc093dc4",
972972
"metadata": {},
973973
"outputs": [
@@ -987,7 +987,7 @@
987987
},
988988
{
989989
"cell_type": "code",
990-
"execution_count": 11,
990+
"execution_count": 51,
991991
"id": "0331d37d-49a3-44f7-9aa9-9834e0938741",
992992
"metadata": {},
993993
"outputs": [
@@ -1007,7 +1007,7 @@
10071007
},
10081008
{
10091009
"cell_type": "code",
1010-
"execution_count": 12,
1010+
"execution_count": 52,
10111011
"id": "1ed1b344-f7d4-4e9e-ac34-2a04b5c5b7a8",
10121012
"metadata": {},
10131013
"outputs": [
@@ -1043,7 +1043,7 @@
10431043
},
10441044
{
10451045
"cell_type": "code",
1046-
"execution_count": 13,
1046+
"execution_count": 53,
10471047
"id": "da0e1faf-1933-43d9-b681-916c282a8f86",
10481048
"metadata": {},
10491049
"outputs": [
@@ -1061,7 +1061,7 @@
10611061
},
10621062
{
10631063
"cell_type": "code",
1064-
"execution_count": 14,
1064+
"execution_count": 54,
10651065
"id": "8b690e83-5d6b-409a-804e-321c287c24a4",
10661066
"metadata": {},
10671067
"outputs": [
@@ -1087,7 +1087,7 @@
10871087
},
10881088
{
10891089
"cell_type": "code",
1090-
"execution_count": 15,
1090+
"execution_count": 55,
10911091
"id": "2b9e6289-92cb-4d88-b3c8-e836d7c8095f",
10921092
"metadata": {},
10931093
"outputs": [
@@ -1142,7 +1142,7 @@
11421142
},
11431143
{
11441144
"cell_type": "code",
1145-
"execution_count": 16,
1145+
"execution_count": 56,
11461146
"id": "c7056cb1-a9a3-4cf6-8364-29fb493ae240",
11471147
"metadata": {},
11481148
"outputs": [
@@ -1152,7 +1152,7 @@
11521152
"'This is some text.'"
11531153
]
11541154
},
1155-
"execution_count": 16,
1155+
"execution_count": 56,
11561156
"metadata": {},
11571157
"output_type": "execute_result"
11581158
}
@@ -1165,7 +1165,7 @@
11651165
},
11661166
{
11671167
"cell_type": "code",
1168-
"execution_count": 17,
1168+
"execution_count": 57,
11691169
"id": "37bc6753-8f35-4ec7-b23e-df4a12103cb4",
11701170
"metadata": {},
11711171
"outputs": [
@@ -1175,7 +1175,7 @@
11751175
"'This is some text with \\n newline characters.'"
11761176
]
11771177
},
1178-
"execution_count": 17,
1178+
"execution_count": 57,
11791179
"metadata": {},
11801180
"output_type": "execute_result"
11811181
}
@@ -1204,7 +1204,7 @@
12041204
},
12051205
{
12061206
"cell_type": "code",
1207-
"execution_count": 18,
1207+
"execution_count": 58,
12081208
"id": "955181cb-0910-4c6a-9c22-d8292a3ec1fc",
12091209
"metadata": {},
12101210
"outputs": [],
@@ -1215,7 +1215,7 @@
12151215
},
12161216
{
12171217
"cell_type": "code",
1218-
"execution_count": 19,
1218+
"execution_count": 59,
12191219
"id": "6e5ccfe7-ac67-42f3-b727-87886a8867f1",
12201220
"metadata": {},
12211221
"outputs": [],
@@ -1235,7 +1235,7 @@
12351235
},
12361236
{
12371237
"cell_type": "code",
1238-
"execution_count": 20,
1238+
"execution_count": 60,
12391239
"id": "00d9bf8f-756f-48bf-81b8-b890e2c2ef13",
12401240
"metadata": {},
12411241
"outputs": [
@@ -1253,7 +1253,7 @@
12531253
},
12541254
{
12551255
"cell_type": "code",
1256-
"execution_count": 21,
1256+
"execution_count": 61,
12571257
"id": "e7addb64-2892-4e1c-85dd-4f5152740099",
12581258
"metadata": {},
12591259
"outputs": [
@@ -1263,7 +1263,7 @@
12631263
"'This is some text with \\n newline characters.'"
12641264
]
12651265
},
1266-
"execution_count": 21,
1266+
"execution_count": 61,
12671267
"metadata": {},
12681268
"output_type": "execute_result"
12691269
}
@@ -1293,7 +1293,7 @@
12931293
},
12941294
{
12951295
"cell_type": "code",
1296-
"execution_count": 22,
1296+
"execution_count": 72,
12971297
"id": "b45b4366-2c2b-4309-9a14-febf3add8512",
12981298
"metadata": {},
12991299
"outputs": [
@@ -1310,7 +1310,7 @@
13101310
"# Download files if not already present in this directory\n",
13111311
"\n",
13121312
"# Define the directories to search and the files to download\n",
1313-
"search_directories = [\".\", \"../02_bonus_bytepair-encoder/gpt2_model/\"]\n",
1313+
"search_directories = [\"ch02/02_bonus_bytepair-encoder/gpt2_model/\", \"../02_bonus_bytepair-encoder/gpt2_model/\", \".\"]\n",
13141314
"\n",
13151315
"files_to_download = {\n",
13161316
" \"https://openaipublic.blob.core.windows.net/gpt-2/models/124M/vocab.bpe\": \"vocab.bpe\",\n",

ch02/05_bpe-from-scratch/tests/tests.py renamed to ch02/05_bpe-from-scratch/tests.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
def import_definitions_from_notebook(fullname, names):
1212
"""Loads function definitions from a Jupyter notebook file into a module."""
13-
path = os.path.join(os.path.dirname(__file__), "..", fullname + ".ipynb")
13+
path = os.path.join(os.path.dirname(__file__), fullname + ".ipynb")
1414
path = os.path.normpath(path)
1515

1616
if not os.path.exists(path):
@@ -42,12 +42,30 @@ def imported_module():
4242
return import_definitions_from_notebook(fullname, names)
4343

4444

45+
@pytest.fixture(scope="module")
46+
def verdict_file(imported_module):
47+
"""Fixture to handle downloading The Verdict file."""
48+
download_file_if_absent = getattr(imported_module, "download_file_if_absent", None)
49+
50+
verdict_path = download_file_if_absent(
51+
url=(
52+
"https://raw.githubusercontent.com/rasbt/"
53+
"LLMs-from-scratch/main/ch02/01_main-chapter-code/"
54+
"the-verdict.txt"
55+
),
56+
filename="the-verdict.txt",
57+
search_dirs=["ch02/01_main-chapter-code/", "../01_main-chapter-code/", "."]
58+
)
59+
60+
return verdict_path
61+
62+
4563
@pytest.fixture(scope="module")
4664
def gpt2_files(imported_module):
4765
"""Fixture to handle downloading GPT-2 files."""
4866
download_file_if_absent = getattr(imported_module, "download_file_if_absent", None)
4967

50-
search_directories = [".", "../02_bonus_bytepair-encoder/gpt2_model/"]
68+
search_directories = ["ch02/02_bonus_bytepair-encoder/gpt2_model/", "../02_bonus_bytepair-encoder/gpt2_model/", "."]
5169
files_to_download = {
5270
"https://openaipublic.blob.core.windows.net/gpt-2/models/124M/vocab.bpe": "vocab.bpe",
5371
"https://openaipublic.blob.core.windows.net/gpt-2/models/124M/encoder.json": "encoder.json"
@@ -58,22 +76,11 @@ def gpt2_files(imported_module):
5876
return paths
5977

6078

61-
def test_tokenizer_training(imported_module):
79+
def test_tokenizer_training(imported_module, verdict_file):
6280
BPETokenizerSimple = getattr(imported_module, "BPETokenizerSimple", None)
63-
download_file_if_absent = getattr(imported_module, "download_file_if_absent", None)
64-
6581
tokenizer = BPETokenizerSimple()
66-
verdict_path = download_file_if_absent(
67-
url=(
68-
"https://raw.githubusercontent.com/rasbt/"
69-
"LLMs-from-scratch/main/ch02/01_main-chapter-code/"
70-
"the-verdict.txt"
71-
),
72-
filename="the-verdict.txt",
73-
search_dirs="."
74-
)
7582

76-
with open(verdict_path, "r", encoding="utf-8") as f: # added ../01_main-chapter-code/
83+
with open(verdict_file, "r", encoding="utf-8") as f: # added ../01_main-chapter-code/
7784
text = f.read()
7885

7986
tokenizer.train(text, vocab_size=1000, allowed_special={"<|endoftext|>"})

0 commit comments

Comments
 (0)