Skip to content

Commit 95aee88

Browse files
cpcloudcursoragent
andcommitted
feat(llm): improve SQL formatting with proper column layout
Enhanced SQL formatter to ensure each SELECT column/expression is on its own line with proper indentation. This makes complex queries much more readable. Changes to layoutClauses: - Track baseIndent level to handle nested queries - Each column after SELECT gets its own line with proper indentation - Subqueries are detected and indented appropriately - Works with arbitrarily nested queries Example output: SELECT name, budget_cents, actual_cents FROM projects WHERE status = 'active' Added test for nested subqueries to verify column layout with inline scalar subqueries. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent bccfcc4 commit 95aee88

File tree

2 files changed

+66
-13
lines changed

2 files changed

+66
-13
lines changed

internal/llm/sqlfmt.go

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -345,67 +345,95 @@ func trimTrailingSpace(b *strings.Builder) {
345345
}
346346

347347
// layoutClauses formats the token stream into indented, line-broken SQL.
348+
// Handles nested queries by tracking parenthesis depth and indenting
349+
// subqueries appropriately.
348350
func layoutClauses(rawTokens []sqlToken) string {
349351
tokens := buildClauseTokens(rawTokens)
350352

351353
var b strings.Builder
352-
const indent = " "
354+
const indentUnit = " "
353355
atLineStart := true
354356
inSelect := false
355357
parenDepth := 0
358+
baseIndent := 0 // Indentation level for current scope
356359

357-
for _, ct := range tokens {
358-
// Track parenthesis depth to avoid breaking inside subexpressions.
360+
for i, ct := range tokens {
361+
// Track parenthesis depth to detect subqueries.
359362
if ct.Kind == tokSymbol && ct.Text == "(" {
363+
// Check if this is the start of a subquery (preceded by FROM, JOIN, IN, etc.)
364+
// For simplicity, we treat all ( as potential subquery starts.
365+
b.WriteString(ct.Text)
360366
parenDepth++
367+
368+
// Peek ahead to see if next non-space token is SELECT (indicating subquery)
369+
nextIdx := i + 1
370+
for nextIdx < len(tokens) && tokens[nextIdx].Kind == tokSpace {
371+
nextIdx++
372+
}
373+
if nextIdx < len(tokens) && tokens[nextIdx].Keyword == "SELECT" {
374+
baseIndent++
375+
}
376+
atLineStart = false
377+
continue
361378
}
379+
362380
if ct.Kind == tokSymbol && ct.Text == ")" {
363381
if parenDepth > 0 {
364382
parenDepth--
383+
// Check if we're closing a subquery scope
384+
if baseIndent > 0 {
385+
// Look back to see if we had a SELECT at this level
386+
baseIndent--
387+
}
365388
}
389+
b.WriteString(ct.Text)
390+
atLineStart = false
391+
continue
366392
}
367393

368-
// Clause keyword at top level (not inside parens): start a new line.
394+
// Clause keyword at top level of current scope: start a new line.
369395
if ct.Level >= 0 && parenDepth == 0 {
370396
kw := ct.Keyword
371397

372-
// SELECT starts at column 0 with no preceding newline if it's
373-
// the very first token.
398+
// SELECT: new line with proper indentation
374399
if kw == "SELECT" {
375400
if b.Len() > 0 {
376401
trimTrailingSpace(&b)
377402
b.WriteString("\n")
403+
b.WriteString(strings.Repeat(indentUnit, baseIndent))
378404
}
379405
b.WriteString(ct.Text)
380406
atLineStart = false
381407
inSelect = true
382408
continue
383409
}
384410

385-
// AND/OR get single indent.
411+
// AND/OR get one extra indent from their clause.
386412
if ct.Level == 1 {
387413
trimTrailingSpace(&b)
388414
b.WriteString("\n")
389-
b.WriteString(indent)
415+
b.WriteString(strings.Repeat(indentUnit, baseIndent+1))
390416
b.WriteString(ct.Text)
391417
atLineStart = false
392418
continue
393419
}
394420

395-
// Other top-level clauses: newline, no indent.
421+
// Other top-level clauses: newline with base indentation.
396422
trimTrailingSpace(&b)
397423
b.WriteString("\n")
424+
b.WriteString(strings.Repeat(indentUnit, baseIndent))
398425
b.WriteString(ct.Text)
399426
atLineStart = false
400427
inSelect = false
401428
continue
402429
}
403430

404-
// In a SELECT column list, break on commas (top-level only).
431+
// In a SELECT column list, break on commas at current paren depth.
432+
// Each column gets its own line with proper indentation.
405433
if inSelect && ct.Kind == tokSymbol && ct.Text == "," && parenDepth == 0 {
406434
b.WriteString(",")
407435
b.WriteString("\n")
408-
b.WriteString(indent)
436+
b.WriteString(strings.Repeat(indentUnit, baseIndent+1))
409437
atLineStart = true
410438
continue
411439
}

internal/llm/sqlfmt_test.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,24 @@ func TestFormatSQLSubquery(t *testing.T) {
6464
assert.Contains(t, got, "WHERE id IN (SELECT project_id FROM quotes WHERE total_cents > 10000)")
6565
}
6666

67+
func TestFormatSQLNestedSubquery(t *testing.T) {
68+
got := FormatSQL(
69+
"SELECT name, (SELECT COUNT(*) FROM quotes WHERE project_id = projects.id) AS quote_count FROM projects WHERE status = 'active'",
70+
0,
71+
)
72+
// Verify main query columns are on separate lines
73+
assert.Contains(t, got, "SELECT name,")
74+
assert.Contains(t, got, "AS quote_count")
75+
assert.Contains(t, got, "FROM projects")
76+
assert.Contains(t, got, "WHERE status = 'active'")
77+
// Verify each column is on its own line
78+
lines := strings.Split(got, "\n")
79+
assert.Equal(t, 4, len(lines), "should have 4 lines: SELECT, column with subquery, FROM, WHERE")
80+
// Second line should be indented and contain the subquery
81+
assert.True(t, strings.HasPrefix(lines[1], " "), "second column should be indented")
82+
assert.Contains(t, lines[1], "SELECT COUNT(*)", "should contain nested SELECT on column line")
83+
}
84+
6785
func TestFormatSQLGroupBy(t *testing.T) {
6886
got := FormatSQL(
6987
"SELECT status, COUNT(*) AS cnt FROM projects "+
@@ -84,7 +102,10 @@ func TestFormatSQLGroupBy(t *testing.T) {
84102
}
85103

86104
func TestFormatSQLKeywordsUppercased(t *testing.T) {
87-
got := FormatSQL("select name from projects where status = 'underway' and deleted_at is null limit 1", 0)
105+
got := FormatSQL(
106+
"select name from projects where status = 'underway' and deleted_at is null limit 1",
107+
0,
108+
)
88109
assert.Contains(t, got, "SELECT")
89110
assert.Contains(t, got, "FROM")
90111
assert.Contains(t, got, "WHERE")
@@ -109,7 +130,11 @@ func TestFormatSQLDateFunctions(t *testing.T) {
109130
0,
110131
)
111132
assert.Contains(t, got, "SELECT name")
112-
assert.Contains(t, got, "date(last_serviced_at, '+' || interval_months || ' months') AS next_due")
133+
assert.Contains(
134+
t,
135+
got,
136+
"date(last_serviced_at, '+' || interval_months || ' months') AS next_due",
137+
)
113138
assert.Contains(t, got, "FROM maintenance_items")
114139
assert.Contains(t, got, "ORDER BY next_due")
115140
}

0 commit comments

Comments
 (0)