Skip to content

Commit e4eff25

Browse files
committed
Use actual Ruby classes for parser result, instead of decoded JSON
This is essentialy to enable easy and fast two-way communication with the C library, and as a bonus makes for a better interaction on the Ruby side, as we are handling actual objects instead of hashes and arrays. Note this also unifies the scan output into the same protobuf file, although the scan result retains a different top-level protobuf message (ScanResult).
1 parent e271d8c commit e4eff25

File tree

6 files changed

+2049
-1488
lines changed

6 files changed

+2049
-1488
lines changed

lib/pg_query.rb

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
require 'pg_query/version'
22
require 'pg_query/parse_error'
33

4-
require 'pg_query/parse_tree_pb'
4+
require 'pg_query/pg_query_pb'
55

66
require 'pg_query/pg_query'
7+
require 'pg_query/constants'
78
require 'pg_query/parse'
89
require 'pg_query/treewalker'
910
require 'pg_query/node_types'
1011
require 'pg_query/deep_dup'
1112

12-
require 'pg_query/legacy_parsetree'
13-
1413
require 'pg_query/filter_columns'
1514
require 'pg_query/fingerprint'
1615
require 'pg_query/param_refs'

lib/pg_query/parse.rb

Lines changed: 125 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1-
require 'json'
1+
module PgQuery
2+
class Node
3+
def inspect
4+
node ? format('<PgQuery::Node: %s: %s>', node, public_send(node).inspect) : '<PgQuery::Node>'
5+
end
6+
end
7+
end
28

39
module PgQuery
410
def self.parse(query)
5-
tree, stderr = _raw_parse(query)
11+
result, stderr = parse_protobuf(query)
612

713
begin
8-
tree = JSON.parse(tree, max_nesting: 1000)
9-
rescue JSON::ParserError
10-
raise ParseError.new('Failed to parse JSON', __FILE__, __LINE__, -1)
14+
result = PgQuery::ParseResult.decode(result)
15+
rescue Google::Protobuf::ParseError
16+
raise PgQuery::ParseError.new('Failed to parse tree', __FILE__, __LINE__, -1)
1117
end
1218

1319
warnings = []
@@ -16,10 +22,10 @@ def self.parse(query)
1622
warnings << line.strip
1723
end
1824

19-
PgQuery::ParseResult.new(query, tree, warnings)
25+
PgQuery::ParserResult.new(query, result, warnings)
2026
end
2127

22-
class ParseResult
28+
class ParserResult
2329
attr_reader :query
2430
attr_reader :tree
2531
attr_reader :warnings
@@ -71,134 +77,139 @@ def load_tables_and_aliases! # rubocop:disable Metrics/CyclomaticComplexity
7177
@cte_names = []
7278
@aliases = {}
7379

74-
statements = @tree.dup
80+
statements = @tree.stmts.dup.to_a.map { |s| s.stmt }
7581
from_clause_items = [] # types: select, dml, ddl
7682
subselect_items = []
7783

7884
loop do
7985
statement = statements.shift
8086
if statement
81-
case statement.keys[0]
82-
when RAW_STMT
83-
statements << statement[RAW_STMT][STMT_FIELD]
87+
case statement.node
88+
when :list
89+
statements += statement.list.items
8490
# The following statement types do not modify tables and are added to from_clause_items
8591
# (and subsequently @tables)
86-
when SELECT_STMT
87-
case statement[SELECT_STMT]['op']
88-
when 0
89-
(statement[SELECT_STMT][FROM_CLAUSE_FIELD] || []).each do |item|
90-
if item[RANGE_SUBSELECT]
91-
statements << item[RANGE_SUBSELECT]['subquery']
92+
when :select_stmt
93+
subselect_items.concat(statement.select_stmt.target_list)
94+
subselect_items << statement.select_stmt.where_clause if statement.select_stmt.where_clause
95+
subselect_items.concat(statement.select_stmt.sort_clause.collect { |h| h.sort_by.node })
96+
subselect_items.concat(statement.select_stmt.group_clause)
97+
subselect_items << statement.select_stmt.having_clause if statement.select_stmt.having_clause
98+
99+
case statement.select_stmt.op
100+
when :SETOP_NONE
101+
(statement.select_stmt.from_clause || []).each do |item|
102+
if item.node == :range_subselect
103+
statements << item.range_subselect.subquery
92104
else
93105
from_clause_items << { item: item, type: :select }
94106
end
95107
end
96-
when 1
97-
statements << statement[SELECT_STMT]['larg'] if statement[SELECT_STMT]['larg']
98-
statements << statement[SELECT_STMT]['rarg'] if statement[SELECT_STMT]['rarg']
108+
when :SETOP_UNION
109+
statements << PgQuery::Node.new(select_stmt: statement.select_stmt.larg) if statement.select_stmt.larg
110+
statements << PgQuery::Node.new(select_stmt: statement.select_stmt.rarg) if statement.select_stmt.rarg
99111
end
100112

101-
if (with_clause = statement[SELECT_STMT]['withClause'])
102-
cte_statements, cte_names = statements_and_cte_names_for_with_clause(with_clause)
113+
if statement.select_stmt.with_clause
114+
cte_statements, cte_names = statements_and_cte_names_for_with_clause(statement.select_stmt.with_clause)
103115
@cte_names.concat(cte_names)
104116
statements.concat(cte_statements)
105117
end
106118
# The following statements modify the contents of a table
107-
when INSERT_STMT, UPDATE_STMT, DELETE_STMT
108-
value = statement.values[0]
109-
from_clause_items << { item: value['relation'], type: :dml }
110-
statements << value['selectStmt'] if value.key?('selectStmt')
111-
statements << value['withClause'] if value.key?('withClause')
119+
when :insert_stmt, :update_stmt, :delete_stmt
120+
value = statement.public_send(statement.node)
121+
from_clause_items << { item: PgQuery::Node.new(range_var: value.relation), type: :dml }
122+
statements << value.select_stmt if statement.node == :insert_stmt && value.select_stmt
123+
124+
subselect_items.concat(statement.update_stmt.target_list) if statement.node == :update_stmt
125+
subselect_items << statement.update_stmt.where_clause if statement.node == :update_stmt && statement.update_stmt.where_clause
126+
subselect_items << statement.delete_stmt.where_clause if statement.node == :delete_stmt && statement.delete_stmt.where_clause
112127

113-
if (with_clause = value['withClause'])
114-
cte_statements, cte_names = statements_and_cte_names_for_with_clause(with_clause)
128+
if value.with_clause
129+
cte_statements, cte_names = statements_and_cte_names_for_with_clause(value.with_clause)
115130
@cte_names.concat(cte_names)
116131
statements.concat(cte_statements)
117132
end
118-
when COPY_STMT
119-
from_clause_items << { item: statement.values[0]['relation'], type: :dml } if statement.values[0]['relation']
120-
statements << statement.values[0]['query']
133+
when :copy_stmt
134+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.copy_stmt.relation), type: :dml } if statement.copy_stmt.relation
135+
statements << statement.copy_stmt.query
121136
# The following statement types are DDL (changing table structure)
122-
when ALTER_TABLE_STMT, CREATE_STMT
123-
from_clause_items << { item: statement.values[0]['relation'], type: :ddl }
124-
when CREATE_TABLE_AS_STMT
125-
if statement[CREATE_TABLE_AS_STMT]['into'] && statement[CREATE_TABLE_AS_STMT]['into'][INTO_CLAUSE]['rel']
126-
from_clause_items << { item: statement[CREATE_TABLE_AS_STMT]['into'][INTO_CLAUSE]['rel'], type: :ddl }
137+
when :alter_table_stmt
138+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.alter_table_stmt.relation), type: :ddl }
139+
when :create_stmt
140+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.create_stmt.relation), type: :ddl }
141+
when :create_table_as_stmt
142+
if statement.create_table_as_stmt.into && statement.create_table_as_stmt.into.rel
143+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.create_table_as_stmt.into.rel), type: :ddl }
127144
end
128-
if statement[CREATE_TABLE_AS_STMT]['query']
129-
statements << statement[CREATE_TABLE_AS_STMT]['query']
130-
end
131-
when TRUNCATE_STMT
132-
from_clause_items += statement.values[0]['relations'].map { |r| { item: r, type: :ddl } }
133-
when VIEW_STMT
134-
from_clause_items << { item: statement[VIEW_STMT]['view'], type: :ddl }
135-
statements << statement[VIEW_STMT]['query']
136-
when INDEX_STMT, CREATE_TRIG_STMT, RULE_STMT
137-
from_clause_items << { item: statement.values[0]['relation'], type: :ddl }
138-
when VACUUM_STMT
139-
from_clause_items += statement.values[0]['rels'].map { |r| { item: r[VACUUM_RELATION]['relation'], type: :ddl } }
140-
when REFRESH_MAT_VIEW_STMT
141-
from_clause_items << { item: statement[REFRESH_MAT_VIEW_STMT]['relation'], type: :ddl }
142-
when DROP_STMT
143-
objects = statement[DROP_STMT]['objects'].map do |obj|
144-
if obj.is_a?(Array)
145-
obj.map { |obj2| obj2['String'] && obj2['String']['str'] }
146-
else
147-
obj['String'] && obj['String']['str']
145+
statements << statement.create_table_as_stmt.query if statement.create_table_as_stmt.query
146+
when :truncate_stmt
147+
from_clause_items += statement.truncate_stmt.relations.map { |r| { item: r, type: :ddl } }
148+
when :view_stmt
149+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.view_stmt.view), type: :ddl }
150+
statements << statement.view_stmt.query
151+
when :index_stmt
152+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.index_stmt.relation), type: :ddl }
153+
when :create_trig_stmt
154+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.create_trig_stmt.relation), type: :ddl }
155+
when :rule_stmt
156+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.rule_stmt.relation), type: :ddl }
157+
when :vacuum_stmt
158+
from_clause_items += statement.vacuum_stmt.rels.map { |r| { item: PgQuery::Node.new(range_var: r.vacuum_relation.relation), type: :ddl } if r.node == :vacuum_relation }
159+
when :refresh_mat_view_stmt
160+
from_clause_items << { item: PgQuery::Node.new(range_var: statement.refresh_mat_view_stmt.relation), type: :ddl }
161+
when :drop_stmt
162+
objects = statement.drop_stmt.objects.map do |obj|
163+
case obj.node
164+
when :list
165+
obj.list.items.map { |obj2| obj2.string.str if obj2.node == :string }
166+
when :string
167+
obj.string.str
148168
end
149169
end
150-
case statement[DROP_STMT]['removeType']
151-
when OBJECT_TYPE_TABLE
170+
case statement.drop_stmt.remove_type
171+
when :OBJECT_TABLE
152172
@tables += objects.map { |r| { name: r.join('.'), type: :ddl } }
153-
when OBJECT_TYPE_RULE, OBJECT_TYPE_TRIGGER
173+
when :OBJECT_RULE, :OBJECT_TRIGGER
154174
@tables += objects.map { |r| { name: r[0..-2].join('.'), type: :ddl } }
155175
end
156-
when GRANT_STMT
157-
objects = statement[GRANT_STMT]['objects']
158-
case statement[GRANT_STMT]['objtype']
159-
when OBJECT_TYPE_COLUMN # Column # rubocop:disable Lint/EmptyWhen
176+
when :grant_stmt
177+
objects = statement.grant_stmt.objects
178+
case statement.grant_stmt.objtype
179+
when :OBJECT_COLUMN # Column # rubocop:disable Lint/EmptyWhen
160180
# FIXME
161-
when OBJECT_TYPE_TABLE # Table
181+
when :OBJECT_TABLE # Table
162182
from_clause_items += objects.map { |o| { item: o, type: :ddl } }
163-
when OBJECT_TYPE_SEQUENCE # Sequence # rubocop:disable Lint/EmptyWhen
183+
when :OBJECT_SEQUENCE # Sequence # rubocop:disable Lint/EmptyWhen
164184
# FIXME
165185
end
166-
when LOCK_STMT
167-
from_clause_items += statement.values[0]['relations'].map { |r| { item: r, type: :ddl } }
186+
when :lock_stmt
187+
from_clause_items += statement.lock_stmt.relations.map { |r| { item: r, type: :ddl } }
168188
# The following are other statements that don't fit into query/DML/DDL
169-
when EXPLAIN_STMT
170-
statements << statement[EXPLAIN_STMT]['query']
171-
end
172-
173-
statement_value = statement.values[0]
174-
unless statement.empty?
175-
subselect_items.concat(statement_value['targetList']) if statement_value['targetList']
176-
subselect_items << statement_value['whereClause'] if statement_value['whereClause']
177-
subselect_items.concat(statement_value['sortClause'].collect { |h| h[SORT_BY]['node'] }) if statement_value['sortClause']
178-
subselect_items.concat(statement_value['groupClause']) if statement_value['groupClause']
179-
subselect_items << statement_value['havingClause'] if statement_value['havingClause']
189+
when :explain_stmt
190+
statements << statement.explain_stmt.query
180191
end
181192
end
182193

183194
next_item = subselect_items.shift
184195
if next_item
185-
case next_item.keys[0]
186-
when A_EXPR
196+
case next_item.node
197+
when :a_expr
187198
%w[lexpr rexpr].each do |side|
188-
elem = next_item.values[0][side]
199+
elem = next_item.a_expr.public_send(side)
189200
next unless elem
190-
if elem.is_a?(Array)
201+
if elem.is_a?(Array) # FIXME - this needs to traverse a list
191202
subselect_items += elem
192203
else
193204
subselect_items << elem
194205
end
195206
end
196-
when BOOL_EXPR
197-
subselect_items.concat(next_item.values[0]['args'])
198-
when RES_TARGET
199-
subselect_items << next_item[RES_TARGET]['val']
200-
when SUB_LINK
201-
statements << next_item[SUB_LINK]['subselect']
207+
when :bool_expr
208+
subselect_items.concat(next_item.bool_expr.args)
209+
when :res_target
210+
subselect_items << next_item.res_target.val
211+
when :sub_link
212+
statements << next_item.sub_link.subselect
202213
end
203214
end
204215

@@ -209,31 +220,30 @@ def load_tables_and_aliases! # rubocop:disable Metrics/CyclomaticComplexity
209220
next_item = from_clause_items.shift
210221
break unless next_item && next_item[:item]
211222

212-
case next_item[:item].keys[0]
213-
when JOIN_EXPR
214-
%w[larg rarg].each do |side|
215-
from_clause_items << { item: next_item[:item][JOIN_EXPR][side], type: next_item[:type] }
216-
end
217-
when ROW_EXPR
218-
from_clause_items += next_item[:item][ROW_EXPR]['args'].map { |a| { item: a, type: next_item[:type] } }
219-
when RANGE_VAR
220-
rangevar = next_item[:item][RANGE_VAR]
221-
next if !rangevar['schemaname'] && @cte_names.include?(rangevar['relname'])
223+
case next_item[:item].node
224+
when :join_expr
225+
from_clause_items << { item: next_item[:item].join_expr.larg, type: next_item[:type] }
226+
from_clause_items << { item: next_item[:item].join_expr.rarg, type: next_item[:type] }
227+
when :row_expr
228+
from_clause_items += next_item[:item].row_expr.args.map { |a| { item: a, type: next_item[:type] } }
229+
when :range_var
230+
rangevar = next_item[:item].range_var
231+
next if rangevar.schemaname.empty? && @cte_names.include?(rangevar.relname)
222232

223-
table = [rangevar['schemaname'], rangevar['relname']].compact.join('.')
233+
table = [rangevar.schemaname, rangevar.relname].reject { |s| s.nil? || s.empty? }.join('.')
224234
@tables << {
225235
name: table,
226236
type: next_item[:type],
227-
location: rangevar['location'],
228-
schemaname: rangevar['schemaname'],
229-
relname: rangevar['relname'],
230-
inh: rangevar['inh']
237+
location: rangevar.location,
238+
schemaname: (rangevar.schemaname if !rangevar.schemaname.empty?),
239+
relname: rangevar.relname,
240+
inh: rangevar.inh
231241
}
232-
@aliases[rangevar['alias'][ALIAS]['aliasname']] = table if rangevar['alias']
233-
when RANGE_SUBSELECT
234-
from_clause_items << { item: next_item[:item][RANGE_SUBSELECT]['subquery'], type: next_item[:type] }
235-
when SELECT_STMT
236-
from_clause = next_item[:item][SELECT_STMT][FROM_CLAUSE_FIELD]
242+
@aliases[rangevar.alias.aliasname] = table if rangevar.alias
243+
when :range_subselect
244+
from_clause_items << { item: next_item[:item].range_subselect.subquery, type: next_item[:type] }
245+
when :select_stmt
246+
from_clause = next_item[:item].select_stmt.from_clause
237247
from_clause_items += from_clause.map { |r| { item: r, type: next_item[:type] } } if from_clause
238248
end
239249
end
@@ -242,17 +252,17 @@ def load_tables_and_aliases! # rubocop:disable Metrics/CyclomaticComplexity
242252
@cte_names.uniq!
243253
end
244254

245-
def statements_and_cte_names_for_with_clause(with_clause)
255+
def statements_and_cte_names_for_with_clause(with_clause) # FIXME
246256
statements = []
247257
cte_names = []
248258

249-
with_clause[WITH_CLAUSE]['ctes'].each do |item|
250-
next unless item[COMMON_TABLE_EXPR]
251-
cte_names << item[COMMON_TABLE_EXPR]['ctename']
252-
statements << item[COMMON_TABLE_EXPR]['ctequery']
259+
with_clause.ctes.each do |item|
260+
next unless item.node == :common_table_expr
261+
cte_names << item.common_table_expr.ctename
262+
statements << item.common_table_expr.ctequery
253263
end
254264

255265
[statements, cte_names]
256266
end
257267
end
258-
end
268+
end

0 commit comments

Comments
 (0)