Skip to content

Commit 351ff81

Browse files
committed
parse validaly comment statement after schema & table
1 parent 60f586d commit 351ff81

File tree

14 files changed

+176
-20
lines changed

14 files changed

+176
-20
lines changed

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[flake8]
2-
exclude = .github,.git,__pycache__,docs/source/conf.py,old,build,dist,simple_ddl_parser/parsetab.py,./test.py,simple_ddl_parser/test.py
2+
exclude = .github,.git,__pycache__,docs/source/conf.py,old,build,dist,tests/,simple_ddl_parser/parsetab.py,./test.py,simple_ddl_parser/test.py
33
max-complexity = 10
44
max-line-length = 120
55
ignore = W503, E999

CHANGELOG.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
**v0.26.2**
2+
3+
Fixes:
4+
1. Fixed a huge bug for incorrect parsing lines with 'USE' & 'GO' strings inside.
5+
2. Fixed parsing for CREATE SCHEMA for Snowlake & Oracle DDLs
6+
7+
Improvements:
8+
1. Added COMMENT statement for CREATE TABLE ddl (for SNOWFLAKE dialect support)
9+
2. Added COMMENT statement for CREATE SCHEMA ddl (for SNOWFLAKE dialect support)
10+
11+
112
**v0.26.1**
213

314
Fixes:

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ In output you will have names like 'dbo' and 'TO_Requests', not '[dbo]' and '[TO
414414
- CREATE .. CLONE statements for table, database and schema
415415
- CREATE TABLE .. CLUSTER BY ..
416416
- CONSTRAINT .. [NOT] ENFORCED
417+
- COMMENT = in CREATE TABLE & CREATE SCHEMA statements
417418

418419
### BigQuery
419420

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "simple-ddl-parser"
3-
version = "0.26.1"
3+
version = "0.26.2"
44
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
55
authors = ["Iuliia Volkova <[email protected]>"]
66
license = "MIT"

simple_ddl_parser/ddl_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def set_lexx_tags(self, t: LexToken):
203203

204204
def set_last_token(self, t: LexToken):
205205
self.lexer.last_token = t.type
206+
206207
return t
207208

208209
def p_id(self, p):

simple_ddl_parser/dialects/snowflake.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,21 @@ def p_expression_cluster_by(self, p):
1414
p[0] = p[1]
1515
p_list = remove_par(list(p))
1616
p[0]["cluster_by"] = p_list[-1]
17+
18+
def p_table_comment(self, p):
19+
"""expr : expr option_comment
20+
"""
21+
p[0] = p[1]
22+
if p[2]:
23+
p[0].update(p[2])
24+
25+
def p_option_comment(self, p):
26+
"""option_comment : ID STRING
27+
| ID DQ_STRING
28+
| COMMENT ID STRING
29+
| COMMENT ID DQ_STRING
30+
"""
31+
p_list = remove_par(list(p))
32+
print(p_list)
33+
if "comment" in p[1].lower():
34+
p[0] = {"comment": p_list[-1]}

simple_ddl_parser/dialects/sql.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ def set_auth_property_in_schema(self, p: List, p_list: List) -> None:
416416
def p_c_schema(self, p: List) -> None:
417417
"""c_schema : CREATE SCHEMA
418418
| CREATE ID SCHEMA"""
419+
419420
if len(p) == 4:
420421
p[0] = {"remote": True}
421422

@@ -424,16 +425,23 @@ def p_create_schema(self, p: List) -> None:
424425
| c_schema id id id
425426
| c_schema id
426427
| c_schema id DOT id
428+
| c_schema id option_comment
429+
| c_schema id DOT id option_comment
427430
| c_schema IF NOT EXISTS id
428431
| c_schema IF NOT EXISTS id DOT id
429432
| create_schema id id id
430433
| create_schema id id STRING
431434
| create_schema options
432435
"""
433436
p_list = list(p)
437+
434438
p[0] = {}
435439
auth_index = None
436440

441+
if "comment" in p_list[-1]:
442+
p[0].update(p_list[-1])
443+
del p_list[-1]
444+
437445
self.add_if_not_exists(p[0], p_list)
438446
if isinstance(p_list[1], dict):
439447
p[0] = p_list[1]

simple_ddl_parser/output/common.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,19 +145,22 @@ def process_alter_and_index_result(
145145

146146
def process_entities(tables_dict: Dict, table: Dict, output_mode: str) -> Dict:
147147
"""process tables, types, sequence and etc. data"""
148-
table_data = init_table_data()
149-
table_data = d.populate_dialects_table_data(output_mode, table_data)
150-
not_table = False
148+
is_it_table = True
149+
151150
if table.get("table_name"):
151+
table_data = init_table_data()
152+
table_data = d.populate_dialects_table_data(output_mode, table_data)
152153
table_data.update(table)
153154
table_data = set_unique_columns(table_data)
154155
else:
155156
table_data = table
156-
not_table = True
157-
if not not_table:
158-
table_data = process_not_table_item(table_data, tables_dict)
157+
is_it_table = False
158+
159+
if is_it_table:
160+
table_data = process_is_it_table_item(table_data, tables_dict)
159161

160162
table_data = normalize_ref_columns_in_final_output(table_data)
163+
161164
d.dialects_clean_up(output_mode, table_data)
162165
return table_data
163166

@@ -183,7 +186,7 @@ def result_format(
183186
return final_result
184187

185188

186-
def process_not_table_item(table_data: Dict, tables_dict: Dict) -> Dict:
189+
def process_is_it_table_item(table_data: Dict, tables_dict: Dict) -> Dict:
187190
if table_data.get("table_name"):
188191
tables_dict[(table_data["table_name"], table_data["schema"])] = table_data
189192
else:

simple_ddl_parser/output/dialects.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
"fields_terminated_by",
1414
"collection_items_terminated_by",
1515
"map_keys_terminated_by",
16-
"comment",
1716
]
1817

1918

@@ -145,16 +144,17 @@ def dialects_clean_up(output_mode: str, table_data: Dict) -> Dict:
145144
key_cleaning(table_data, output_mode)
146145
update_mappers_for_table_properties = {"bigquery": update_bigquery_output}
147146
update_table_prop = update_mappers_for_table_properties.get(output_mode)
148-
149147
if update_table_prop:
150148
table_data = update_table_prop(table_data)
151149

152150
if output_mode == "oracle":
153-
for column in table_data["columns"]:
151+
for column in table_data.get("columns", []):
154152
column = add_additional_oracle_keys_in_column(column)
155153
elif output_mode == "snowflake":
156-
for column in table_data["columns"]:
154+
# can be no columns if it is a create database or create schema
155+
for column in table_data.get("columns", []):
157156
column = add_additional_snowflake_keys_in_column(column)
157+
158158
elif output_mode == "redshift":
159159
table_data = process_redshift_dialect(table_data)
160160
return table_data

simple_ddl_parser/parser.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,16 @@ def check_new_statement_start(self, line: str) -> bool:
163163
return self.new_statement
164164

165165
def check_line_on_skip_words(self) -> bool:
166-
skip_line_words = ["USE", "GO"]
166+
skip_regex = r"^(GO|USE)\b"
167167

168168
self.skip = False
169-
for word in skip_line_words:
170-
if self.line.startswith(word):
171-
self.skip = True
172-
break
169+
170+
if re.match(skip_regex, self.line.upper()):
171+
self.skip = True
173172
return self.skip
174173

175174
def add_line_to_statement(self) -> str:
175+
176176
if (
177177
self.line
178178
and not self.skip
@@ -206,15 +206,13 @@ def process_line(
206206
self.pre_process_line()
207207

208208
self.line = self.line.strip().replace("\n", "").replace("\t", "")
209-
210209
self.skip = self.check_line_on_skip_words()
211210

212211
self.parse_set_statement()
213212
# to avoid issues when comma or parath are glued to column name
214213
self.check_new_statement_start(self.line)
215214

216215
final_line = self.line.endswith(";") and not self.set_was_in_line
217-
218216
self.add_line_to_statement()
219217

220218
if final_line or self.new_statement:
@@ -237,6 +235,7 @@ def process_statement(self) -> None:
237235
self.statement = None
238236

239237
def parse_statement(self) -> None:
238+
240239
_parse_result = yacc.parse(self.statement)
241240
if _parse_result:
242241
self.tables.append(_parse_result)

0 commit comments

Comments
 (0)