From 18917324c532dc212603dea60bab4be8514aa2d8 Mon Sep 17 00:00:00 2001 From: David Pordomingo Date: Tue, 20 Mar 2018 18:41:15 +0100 Subject: [PATCH 1/4] Add migrations to use uast nullable cols in FilePairs --- cli/migrations/README.md | 17 ++ cli/migrations/command-UAST-add-columns.go | 59 ++++++ .../command-UAST-import-from-source-db.go | 181 ++++++++++++++++++ cli/migrations/command.go | 31 +++ cli/migrations/main.go | 43 +++++ 5 files changed, 331 insertions(+) create mode 100644 cli/migrations/README.md create mode 100644 cli/migrations/command-UAST-add-columns.go create mode 100644 cli/migrations/command-UAST-import-from-source-db.go create mode 100644 cli/migrations/command.go create mode 100644 cli/migrations/main.go diff --git a/cli/migrations/README.md b/cli/migrations/README.md new file mode 100644 index 0000000..3ac186b --- /dev/null +++ b/cli/migrations/README.md @@ -0,0 +1,17 @@ +### Disclaimer: + +These scripts are **not real migrations**. There is no up/down rules. These scripts are not production ready. The idempotence of these scripts is not guaranteed at all. The internal DB should be SQLite. + +These scripts were needed to migrate the Database between different states. + +- [Add UAST nullable columns](command-UAST-add-columns.go) +```shell +# prepares the current "internal.db". Adds "uast_a" and "uast_b" nulable cols +go run cli/migrations/*.go uast-add-cols internal.db +``` + +- [Add UAST to a database, reading from other Database](command-UAST-import-from-source-db.go) +```shell +# import UASTs into "internal.db" reading from "source.db" +go run cli/migrations/*.go uast-import internal.db source.db +``` diff --git a/cli/migrations/command-UAST-add-columns.go b/cli/migrations/command-UAST-add-columns.go new file mode 100644 index 0000000..d4cd457 --- /dev/null +++ b/cli/migrations/command-UAST-add-columns.go @@ -0,0 +1,59 @@ +package main + +import ( + "fmt" + "log" + + "github.com/src-d/code-annotation/server/dbutil" +) + +type uastColsCmd struct { + commandDesc + Args struct { + InternalDBPath string `description:"filepath to the internal SQLite database"` + } `positional-args:"yes" required:"yes"` +} + +var uastColsOpts = uastColsCmd{ + commandDesc: commandDesc{ + name: "uast-add-cols", + shortDesc: "Add uast BLOB columns", + longDesc: "Adds 'uast_a' and 'uast_b' BLOB columns to the sqlite://InternalDBPath database", + }, +} + +// queries +const ( + leftFile = "a" + rightFile = "b" + + addUastQuery = "ALTER TABLE file_pairs ADD COLUMN uast_%s BLOB" +) + +func (c *uastColsCmd) Execute(args []string) error { + internalDb, err := dbutil.Open(fmt.Sprintf(sqliteDSN, c.Args.InternalDBPath), false) + if err != nil { + log.Fatal(err) + } + + defer internalDb.Close() + + if err := addColumn(internalDb, leftFile); err != nil { + log.Fatal(err) + } + + if err := addColumn(internalDb, rightFile); err != nil { + log.Fatal(err) + } + + log.Println("New BLOB columns 'uast_a' and 'uast_b' were added") + return nil +} + +func addColumn(db dbutil.DB, side string) error { + if _, err := db.Exec(fmt.Sprintf(addUastQuery, side)); err != nil { + return err + } + + return nil +} diff --git a/cli/migrations/command-UAST-import-from-source-db.go b/cli/migrations/command-UAST-import-from-source-db.go new file mode 100644 index 0000000..0d0e8e8 --- /dev/null +++ b/cli/migrations/command-UAST-import-from-source-db.go @@ -0,0 +1,181 @@ +package main + +import ( + "fmt" + "log" + + "github.com/src-d/code-annotation/server/dbutil" +) + +type uastImportCmd struct { + commandDesc + Args struct { + InternalDBPath string `description:"filepath to the internal SQLite database"` + SourceDBPath string `description:"filepath to the SQLite database containing the UAST to import"` + } `positional-args:"yes" required:"yes"` +} + +var uastImportOpts = uastImportCmd{ + commandDesc: commandDesc{ + name: "uast-import", + shortDesc: "Import UASTs", + longDesc: "Adds UASTs to the sqlite://InternalDBPath database reading from sqlite://SourceDBPath database", + }, +} + +// queries +const ( + filePairsWithoutUastQuery = `SELECT + blob_id_a, uast_a IS NOT NULL as hasUastA, + blob_id_b, uast_b IS NOT NULL as hasUastB + FROM file_pairs + WHERE hasUastA = 0 or hasUastB = 0` + uastByBlobIDQuery = `SELECT uast_%s + FROM files + WHERE blob_id_%s = CAST($1 AS BLOB) + LIMIT 1` + updateByBlobIDQuery = `UPDATE file_pairs + SET uast_%s = $1 + WHERE blob_id_%s = $2 and uast_%s IS NULL` + indexAddBlobID = `CREATE INDEX blob_id_%s ON file_pairs (blob_id_%s);` +) + +// command messages +const ( + uastImportMsgSuccess = `UASTs added into the internal DB: + - imported UASTs: %d + - edited rows: %d` + uastImportMsgError = `Some rows could not be properly updated: + - UAST not inserted: %d + - FilePair read errors: %d` +) + +type file struct { + side string + blobID string +} + +func (c *uastImportCmd) Execute(args []string) error { + internalDb, err := dbutil.Open(fmt.Sprintf(sqliteDSN, c.Args.InternalDBPath), false) + if err != nil { + log.Fatal(err) + } + + defer internalDb.Close() + + sourceDb, err := dbutil.Open(fmt.Sprintf(sqliteDSN, c.Args.SourceDBPath), false) + if err != nil { + log.Fatal(err) + } + + defer sourceDb.Close() + + fixSourceDb(sourceDb) + + files, fileReadfailures := getFilesToUpdate(internalDb) + log.Printf("Found %d blobs without UAST", len(files)) + + var rowsEdited, uastFails, uastsImported int64 + for _, file := range files { + affectedRows, err := importUastForBlobID(internalDb, sourceDb, file.side, file.blobID) + if err != nil { + log.Println(err) + uastFails++ + continue + } + + rowsEdited += affectedRows + uastsImported++ + } + + log.Printf(uastImportMsgSuccess, uastsImported, rowsEdited) + + if fileReadfailures+uastFails > 0 { + log.Fatal(fmt.Sprintf(uastImportMsgError, uastFails, fileReadfailures)) + } + + return nil +} + +type files map[string]file + +func (f *files) add(blobID string, side string, ignore bool) { + if ignore { + return + } + + if _, ok := (*f)[blobID+"_"+side]; !ok { + (*f)[blobID+"_"+side] = file{side: side, blobID: blobID} + } +} + +func getFilesToUpdate(internalDb dbutil.DB) (map[string]file, int64) { + rows, err := internalDb.Query(filePairsWithoutUastQuery) + if err != nil { + log.Fatal(err) + } + + defer rows.Close() + + filesToImport := files{} + var failures int64 + for rows.Next() { + var blobIDA, blobIDB string + var hasUastA, hasUastB int + err := rows.Scan(&blobIDA, &hasUastA, &blobIDB, &hasUastB) + if err != nil { + log.Printf("Failed to read row from internal DB\nerror: %v\n", err) + failures++ + continue + } + + filesToImport.add(blobIDA, leftFile, hasUastA == 1) + filesToImport.add(blobIDB, rightFile, hasUastB == 1) + } + + return filesToImport, failures +} + +func importUastForBlobID(internalDb dbutil.DB, sourceDb dbutil.DB, side string, blobID string) (int64, error) { + uast, err := getUastByBlobID(sourceDb, side, blobID) + if err != nil { + return 0, fmt.Errorf("uast_%s could not be retrieved for blobID#%s; %s", side, blobID, err) + } + + return setUastToBlobID(internalDb, side, blobID, uast) +} + +func getUastByBlobID(sourceDb dbutil.DB, side string, blobID string) ([]byte, error) { + var uast []byte + if err := sourceDb.QueryRow(fmt.Sprintf(uastByBlobIDQuery, side, side), blobID).Scan(&uast); err != nil { + return nil, err + } + + return uast, nil +} + +func setUastToBlobID(internalDb dbutil.DB, side string, blobID string, uast []byte) (int64, error) { + res, err := internalDb.Exec(fmt.Sprintf(updateByBlobIDQuery, side, side, side), uast, blobID) + if err != nil { + return 0, fmt.Errorf("uast_%s could not be saved for blobID#%s; %s", side, blobID, err) + } + + rows, _ := res.RowsAffected() + if rows == 0 { + return 0, fmt.Errorf("no uast_%s to be imported for blobID#%s", side, blobID) + } + + return rows, nil +} + +func fixSourceDb(sourceDb dbutil.DB) error { + if _, err := sourceDb.Exec(fmt.Sprintf(indexAddBlobID, leftFile, leftFile)); err != nil { + return fmt.Errorf("can not create index over blob_id_%s; %s", leftFile, err) + } + + if _, err := sourceDb.Exec(fmt.Sprintf(indexAddBlobID, rightFile, rightFile)); err != nil { + return fmt.Errorf("can not create index over blob_id_%s; %s", rightFile, err) + } + + return nil +} diff --git a/cli/migrations/command.go b/cli/migrations/command.go new file mode 100644 index 0000000..5ff02b1 --- /dev/null +++ b/cli/migrations/command.go @@ -0,0 +1,31 @@ +package main + +import ( + _ "net/http/pprof" +) + +// Command is a runnable command +type Command interface { + Name() string + ShortDesc() string + LongDesc() string + Execute(args []string) error +} + +type commandDesc struct { + name string + shortDesc string + longDesc string +} + +func (c *commandDesc) Name() string { + return c.name +} + +func (c *commandDesc) ShortDesc() string { + return c.shortDesc +} + +func (c *commandDesc) LongDesc() string { + return c.longDesc +} diff --git a/cli/migrations/main.go b/cli/migrations/main.go new file mode 100644 index 0000000..39db7a1 --- /dev/null +++ b/cli/migrations/main.go @@ -0,0 +1,43 @@ +package main + +import ( + "fmt" + _ "net/http/pprof" + "os" + + "github.com/jessevdk/go-flags" +) + +const ( + description string = "Migrate internal database" + sqliteDSN = "sqlite://%s" +) + +func main() { + parser := flags.NewParser(nil, flags.Default) + addCommand(parser, &uastColsOpts) + addCommand(parser, &uastImportOpts) + parse(parser, description) +} + +func addCommand(parser *flags.Parser, command Command) { + if _, err := parser.AddCommand(command.Name(), command.ShortDesc(), command.LongDesc(), command); err != nil { + panic(err) + } +} + +func parse(parser *flags.Parser, description string) { + parser.LongDescription = description + if _, err := parser.Parse(); err != nil { + if err, ok := err.(*flags.Error); ok { + if err.Type == flags.ErrHelp { + os.Exit(0) + } + + fmt.Println() + parser.WriteHelp(os.Stdout) + } + + os.Exit(1) + } +} From 82c4514249c35267ee0b9cba2a18772f18ced7df Mon Sep 17 00:00:00 2001 From: David Pordomingo Date: Tue, 27 Mar 2018 18:04:47 +0200 Subject: [PATCH 2/4] Add migrations to remove diff column --- cli/migrations/README.md | 6 + cli/migrations/command-diff-remove-column.go | 120 +++++++++++++++++++ cli/migrations/main.go | 1 + 3 files changed, 127 insertions(+) create mode 100644 cli/migrations/command-diff-remove-column.go diff --git a/cli/migrations/README.md b/cli/migrations/README.md index 3ac186b..ae5b8dc 100644 --- a/cli/migrations/README.md +++ b/cli/migrations/README.md @@ -15,3 +15,9 @@ go run cli/migrations/*.go uast-add-cols internal.db # import UASTs into "internal.db" reading from "source.db" go run cli/migrations/*.go uast-import internal.db source.db ``` + +- [Remove diff column from a database](command-diff-remove-column.go) +```shell +# prepares the current "internal.db". Remove "diff" col +go run cli/migrations/*.go diff-rm-col internal.db +``` diff --git a/cli/migrations/command-diff-remove-column.go b/cli/migrations/command-diff-remove-column.go new file mode 100644 index 0000000..1658e14 --- /dev/null +++ b/cli/migrations/command-diff-remove-column.go @@ -0,0 +1,120 @@ +package main + +import ( + "database/sql" + "fmt" + "log" + + "github.com/src-d/code-annotation/server/dbutil" +) + +type diffRmColCmd struct { + commandDesc + Args struct { + InternalDBPath string `description:"filepath to the internal SQLite database"` + } `positional-args:"yes" required:"yes"` +} + +var diffRmColOpts = diffRmColCmd{ + commandDesc: commandDesc{ + name: "diff-rm-col", + shortDesc: "Remove diff column", + longDesc: "Remove diff column from sqlite://InternalDBPath database", + }, +} + +// queries +const ( + fileFairsTable = "file_pairs" + tmpTable = "file_pairs_tmp" + + cols = `id, + blob_id_a, repository_id_a, commit_hash_a, path_a, content_a, hash_a, uast_a, + blob_id_b, repository_id_b, commit_hash_b, path_b, content_b, hash_b, uast_b, + score, experiment_id` + + createTmpTableQuery = `CREATE TABLE IF NOT EXISTS ` + tmpTable + ` ( + id INTEGER, + blob_id_a TEXT, repository_id_a TEXT, commit_hash_a TEXT, path_a TEXT, content_a TEXT, hash_a TEXT, + blob_id_b TEXT, repository_id_b TEXT, commit_hash_b TEXT, path_b TEXT, content_b TEXT, hash_b TEXT, + score DOUBLE PRECISION, experiment_id INTEGER, + uast_a BLOB, uast_b BLOB, + PRIMARY KEY (id), + FOREIGN KEY(experiment_id) REFERENCES experiments(id))` + + fillTmpTableQuery = "INSERT INTO " + tmpTable + "(" + cols + ") SELECT " + cols + " FROM " + fileFairsTable + disableIndexQuery = "PRAGMA foreign_keys=OFF" + dropOldTableQuery = "DROP TABLE " + fileFairsTable + renameTmpTableQuery = "ALTER TABLE " + tmpTable + " RENAME TO " + fileFairsTable + checkIndexQuery = "PRAGMA foreign_key_check" + enableIndexQuery = "PRAGMA foreign_keys=ON" +) + +func (c *diffRmColCmd) Execute(args []string) error { + internalDb, err := dbutil.Open(fmt.Sprintf(sqliteDSN, c.Args.InternalDBPath), false) + if err != nil { + log.Fatal(err) + } + + defer internalDb.Close() + + queries := []string{ + createTmpTableQuery, + fillTmpTableQuery, + disableIndexQuery, + dropOldTableQuery, + renameTmpTableQuery, + enableIndexQuery, + } + + if err := execQueries(internalDb, queries); err != nil { + log.Fatal(err) + } + + log.Println("Deleted 'diff' column") + return nil +} + +func execQueries(db dbutil.DB, queries []string) (err error) { + tx, err := db.Begin() + if err != nil { + return err + } + + defer func() { + if err != nil { + if rollbackErr := tx.Rollback(); rollbackErr != nil { + err = fmt.Errorf("Error running migration; %s \nThe rollback failed; %s", err, rollbackErr) + } + } + }() + + for _, query := range queries { + if _, err := tx.Exec(query); err != nil { + return err + } + } + + if err := ensureForeignKeys(tx); err != nil { + return err + } + + return tx.Commit() +} + +func ensureForeignKeys(tx queryer) error { + rows, err := tx.Query(checkIndexQuery) + if err != nil { + return err + } + + if rows.Next() { + return fmt.Errorf("Foreign key constraints were violated") + } + + return nil +} + +type queryer interface { + Query(string, ...interface{}) (*sql.Rows, error) +} diff --git a/cli/migrations/main.go b/cli/migrations/main.go index 39db7a1..9248875 100644 --- a/cli/migrations/main.go +++ b/cli/migrations/main.go @@ -17,6 +17,7 @@ func main() { parser := flags.NewParser(nil, flags.Default) addCommand(parser, &uastColsOpts) addCommand(parser, &uastImportOpts) + addCommand(parser, &diffRmColOpts) parse(parser, description) } From ff30b2c6c2f5fb060cc04117e40428969eed83fd Mon Sep 17 00:00:00 2001 From: David Pordomingo Date: Tue, 27 Mar 2018 18:05:44 +0200 Subject: [PATCH 3/4] Add migrations to vacuum internal database --- cli/migrations/README.md | 6 ++++ cli/migrations/command-vacuum.go | 47 ++++++++++++++++++++++++++++++++ cli/migrations/main.go | 1 + 3 files changed, 54 insertions(+) create mode 100644 cli/migrations/command-vacuum.go diff --git a/cli/migrations/README.md b/cli/migrations/README.md index ae5b8dc..cf8c0fc 100644 --- a/cli/migrations/README.md +++ b/cli/migrations/README.md @@ -4,6 +4,12 @@ These scripts are **not real migrations**. There is no up/down rules. The These scripts were needed to migrate the Database between different states. +- [Vacuum database](command-vacuum.go) +```shell +# rebuilds the database to defragment it +go run cli/migrations/*.go vacuum internal.db +``` + - [Add UAST nullable columns](command-UAST-add-columns.go) ```shell # prepares the current "internal.db". Adds "uast_a" and "uast_b" nulable cols diff --git a/cli/migrations/command-vacuum.go b/cli/migrations/command-vacuum.go new file mode 100644 index 0000000..3d3a062 --- /dev/null +++ b/cli/migrations/command-vacuum.go @@ -0,0 +1,47 @@ +package main + +import ( + "fmt" + "log" + + "github.com/src-d/code-annotation/server/dbutil" +) + +type vacuumCmd struct { + commandDesc + Args struct { + InternalDBPath string `description:"filepath to the internal SQLite database"` + } `positional-args:"yes" required:"yes"` +} + +var vacuumOpts = vacuumCmd{ + commandDesc: commandDesc{ + name: "vacuum", + shortDesc: "Rebuilds the database to defragment it", + longDesc: "Rebuilds the sqlite://InternalDBPath database to eliminate free pages, compact table data...", + }, +} + +// queries +const ( + vacuumQuery = "VACUUM" +) + +func (c *vacuumCmd) Execute(args []string) error { + internalDb, err := dbutil.Open(fmt.Sprintf(sqliteDSN, c.Args.InternalDBPath), false) + if err != nil { + log.Fatal(err) + } + + defer internalDb.Close() + + log.Println("Running VACUUM process ...") + + _, err = internalDb.Exec(vacuumQuery) + if err != nil { + log.Fatal(err) + } + + log.Println("... VACUUM process finished") + return nil +} diff --git a/cli/migrations/main.go b/cli/migrations/main.go index 9248875..4033ccf 100644 --- a/cli/migrations/main.go +++ b/cli/migrations/main.go @@ -18,6 +18,7 @@ func main() { addCommand(parser, &uastColsOpts) addCommand(parser, &uastImportOpts) addCommand(parser, &diffRmColOpts) + addCommand(parser, &vacuumOpts) parse(parser, description) } From 3f013f39bcfbbb12b425850e3931d084c535cb69 Mon Sep 17 00:00:00 2001 From: David Pordomingo Date: Tue, 3 Apr 2018 19:07:54 +0200 Subject: [PATCH 4/4] Add migrations to remove features table --- cli/migrations/README.md | 6 +++ cli/migrations/command-features-drop-table.go | 52 +++++++++++++++++++ cli/migrations/main.go | 1 + 3 files changed, 59 insertions(+) create mode 100644 cli/migrations/command-features-drop-table.go diff --git a/cli/migrations/README.md b/cli/migrations/README.md index cf8c0fc..b0d41fc 100644 --- a/cli/migrations/README.md +++ b/cli/migrations/README.md @@ -27,3 +27,9 @@ go run cli/migrations/*.go uast-import internal.db source.db # prepares the current "internal.db". Remove "diff" col go run cli/migrations/*.go diff-rm-col internal.db ``` + +- [Remove features table from a database](command-features-drop-table.go) +```shell +# drops the 'features' table from the current "internal.db" +go run cli/migrations/*.go features-drop-table internal.db +``` diff --git a/cli/migrations/command-features-drop-table.go b/cli/migrations/command-features-drop-table.go new file mode 100644 index 0000000..8d1b67a --- /dev/null +++ b/cli/migrations/command-features-drop-table.go @@ -0,0 +1,52 @@ +package main + +import ( + "fmt" + "log" + + "github.com/src-d/code-annotation/server/dbutil" +) + +type featureDropCmd struct { + commandDesc + Args struct { + InternalDBPath string `description:"filepath to the internal SQLite database"` + } `positional-args:"yes" required:"yes"` +} + +var featureDropOpts = featureDropCmd{ + commandDesc: commandDesc{ + name: "features-drop-table", + shortDesc: "Drop Features table", + longDesc: "Removes the Features table from the sqlite://InternalDBPath database", + }, +} + +// queries +const ( + dropFeaturesTableQuery = "DROP TABLE features" +) + +func (c *featureDropCmd) Execute(args []string) error { + internalDb, err := dbutil.Open(fmt.Sprintf(sqliteDSN, c.Args.InternalDBPath), false) + if err != nil { + log.Fatal(err) + } + + defer internalDb.Close() + + if err := dropFeaturesTable(internalDb); err != nil { + log.Fatal(err) + } + + log.Println("Features table was deleted") + return nil +} + +func dropFeaturesTable(db dbutil.DB) error { + if _, err := db.Exec(dropFeaturesTableQuery); err != nil { + return err + } + + return nil +} diff --git a/cli/migrations/main.go b/cli/migrations/main.go index 4033ccf..497a569 100644 --- a/cli/migrations/main.go +++ b/cli/migrations/main.go @@ -18,6 +18,7 @@ func main() { addCommand(parser, &uastColsOpts) addCommand(parser, &uastImportOpts) addCommand(parser, &diffRmColOpts) + addCommand(parser, &featureDropOpts) addCommand(parser, &vacuumOpts) parse(parser, description) }