Skip to content

Commit aa65cc4

Browse files
committed
cli: address code review
Signed-off-by: Alexander Bezzubov <[email protected]>
1 parent 16063f1 commit aa65cc4

File tree

2 files changed

+14
-18
lines changed

2 files changed

+14
-18
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@ env.sh
22
.mypy_cache
33
notebooks/output
44
notebooks/repos
5-
.venv/
65
.vscode/

notebooks/codesearchnet-opennmt.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
wget 'https://s3.amazonaws.com/code-search-net/CodeSearchNet/v2/java.zip'
77
unzip java.zip
88
python notebooks/codesearchnet-opennmt.py \
9-
--data_dir='java/final/jsonl/valid' \
9+
--data-dir='java/final/jsonl/valid' \
1010
--newline='\\n'
1111
"""
1212
from argparse import ArgumentParser, Namespace
@@ -20,8 +20,13 @@
2020

2121
logging.basicConfig(level=logging.INFO)
2222

23+
# catch SIGPIPE to make it nix CLI friendly e.g. | head
24+
from signal import signal, SIGPIPE, SIG_DFL
2325

24-
class CodeSearchNetRAM(object):
26+
signal(SIGPIPE, SIG_DFL)
27+
28+
29+
class CodeSearchNetRAM:
2530
"""Stores one split of CodeSearchNet data in memory"""
2631

2732
def __init__(self, split_path: Path, newline_repl: str):
@@ -64,13 +69,10 @@ def __getitem__(self, idx: int) -> Tuple[str, str]:
6469

6570
# drop fn signature
6671
code = row["code"]
67-
fn_body = (
68-
code[
69-
code.find("{", code.find(fn_name) + len(fn_name)) + 1 : code.rfind("}")
70-
]
71-
.lstrip()
72-
.rstrip()
73-
)
72+
fn_body = code[
73+
code.find("{", code.find(fn_name) + len(fn_name)) + 1 : code.rfind("}")
74+
]
75+
fn_body = fn_body.strip()
7476
fn_body = fn_body.replace("\n", self.newline_repl)
7577
# fn_body_enc = self.enc.encode(fn_body)
7678

@@ -111,9 +113,7 @@ def main(args: Namespace) -> None:
111113
help="Path to the unziped input data (CodeSearchNet)",
112114
)
113115

114-
parser.add_argument(
115-
"--newline", type=str, default="\\n", help="Replace newline with this"
116-
)
116+
parser.add_argument("--newline", default="\\n", help="Replace newline with this")
117117

118118
parser.add_argument(
119119
"--token-level-sources",
@@ -128,14 +128,11 @@ def main(args: Namespace) -> None:
128128
)
129129

130130
parser.add_argument(
131-
"--src_file",
132-
type=str,
133-
default="src-%s.token",
134-
help="File with function bodies",
131+
"--src-file", default="src-%s.token", help="File with function bodies",
135132
)
136133

137134
parser.add_argument(
138-
"--tgt_file", type=str, default="tgt-%s.token", help="File with function texts"
135+
"--tgt-file", default="tgt-%s.token", help="File with function texts"
139136
)
140137

141138
parser.add_argument(

0 commit comments

Comments
 (0)