From 5e1c86b655c860512f8a37a180800c34fcb80069 Mon Sep 17 00:00:00 2001
From: Matthew Fernandez <matthew.fernandez@gmail.com>
Date: Wed, 12 Apr 2023 15:11:40 -0700
Subject: [PATCH] make 'symbols.path' a foreign key instead of direct path

This change takes a step towards normalising the database. The `path` field of
the `symbols` table is now a foreign key pointer into the `records` table. This
has a number of implications:

  1. Entries in the `symbols` table require a corresponding entry in the
     `records` table. Without this, they are effectively orphaned without a
     path.

  2. The `records` table gains a new field, `id`. Due to the way SQLite works,
     this is an alias for the internal `rowid` column. This column should rarely
     need to be referenced beyond in `join` expressions with `symbols`. Though
     `clink_db_add_record` gives the caller access to this field in anticipation
     of a more efficient API for symbol addition in future.

This change has a large effect on database on-disk size, reducing an index of a
recent Graphviz commit from 58MB to 20MB (65%).

This was not specifically intended to accelerate database construction (the
hinted at more efficient symbol addition above is the aimed for optimisation).
However this also has a large effect on this too, reducing the creation of the
above index from 7.94s to 1.81s (77%).
---
 clink/src/clink-repl                   | 37 +++++++++++++++-----------
 libclink/src/db_add_symbol.c           | 13 ++++++---
 libclink/src/db_find_call.c            |  9 ++++---
 libclink/src/db_find_caller.c          |  9 ++++---
 libclink/src/db_find_definition.c      |  9 ++++---
 libclink/src/db_find_includer.c        |  9 ++++---
 libclink/src/db_find_symbol.c          |  7 ++---
 libclink/src/db_open.c                 |  1 +
 libclink/src/db_remove.c               | 18 +++++++++++--
 libclink/src/schema.sql                |  5 ++--
 libclink/src/vim_read_into.c           |  4 ++-
 test/cases/atomic-builtins-2.c         |  2 +-
 test/cases/atomic-builtins.c           |  4 +--
 test/cases/basic.py                    |  6 ++---
 test/cases/cr.c                        |  8 +++---
 test/cases/def-export.def              |  2 +-
 test/cases/define-branch-0.c           |  2 +-
 test/cases/define-branch-1.c           |  2 +-
 test/cases/define-branch-elif-0.c      |  2 +-
 test/cases/define-branch-elif-1.c      |  2 +-
 test/cases/define-branch-else-0.c      |  2 +-
 test/cases/define-branch-else-1.c      |  2 +-
 test/cases/definition-multiplication.c |  2 +-
 test/cases/functions-in-scopes.c       |  2 +-
 test/cases/global.c                    |  2 +-
 test/cases/global_const.c              |  2 +-
 test/cases/global_const2.c             |  2 +-
 test/cases/ifdef.c                     |  8 +++---
 test/cases/include.c                   |  2 +-
 test/cases/include2.c                  |  2 +-
 test/cases/macro-definition-parent.c   |  6 ++---
 test/cases/macro-definition.c          |  2 +-
 test/cases/macro-parent.c              |  2 +-
 test/cases/other-builtins-2.c          |  2 +-
 test/cases/other-builtins.c            |  2 +-
 test/cases/undef.c                     |  2 +-
 36 files changed, 114 insertions(+), 79 deletions(-)

diff --git a/clink/src/clink-repl b/clink/src/clink-repl
index 7890f83f..e464f381 100755
--- a/clink/src/clink-repl
+++ b/clink/src/clink-repl
@@ -86,10 +86,11 @@ def in_bw(s: Optional[str]) -> str:
 def find_symbol(db: sqlite3.Connection, name: str):
     logging.debug(f"find_symbol of {name}")
     SQL = (
-        "select symbols.path, symbols.parent, symbols.line, content.body "
-        "from symbols left join content on symbols.path = content.path and "
+        "select records.path, symbols.parent, symbols.line, content.body "
+        "from symbols inner join records on symbols.path = records.id "
+        "left join content on records.path = content.path and "
         "symbols.line = content.line where symbols.name = :name order by "
-        "symbols.path, symbols.line, symbols.col;"
+        "records.path, symbols.line, symbols.col;"
     )
     rows = select(db, SQL, {"name": name})
     print(f"cscope: {len(rows)} lines")
@@ -100,11 +101,12 @@ def find_symbol(db: sqlite3.Connection, name: str):
 def find_definition(db: sqlite3.Connection, name: str):
     logging.debug(f"find_definition of {name}")
     SQL = (
-        "select symbols.path, symbols.line, content.body "
-        "from symbols left join content on symbols.path = content.path and "
+        "select records.path, symbols.line, content.body "
+        "from symbols inner join records on symbols.path = records.id "
+        "left join content on records.path = content.path and "
         "symbols.line = content.line where symbols.name = :name and "
         f"symbols.category = {CLINK_DEFINITION} order by "
-        "symbols.path, symbols.line, symbols.col;"
+        "records.path, symbols.line, symbols.col;"
     )
     rows = select(db, SQL, {"name": name})
     print(f"cscope: {len(rows)} lines")
@@ -115,11 +117,12 @@ def find_definition(db: sqlite3.Connection, name: str):
 def find_calls(db: sqlite3.Connection, caller: str):
     logging.debug(f"find_calls of {caller}")
     SQL = (
-        "select symbols.path, symbols.name, symbols.line, content.body "
-        "from symbols left join content on symbols.path = content.path and "
+        "select records.path, symbols.name, symbols.line, content.body "
+        "from symbols inner join records on symbols.path = records.id "
+        "left join content on records.path = content.path and "
         "symbols.line = content.line where symbols.parent = :caller and "
         f"symbols.category = {CLINK_FUNCTION_CALL} order by "
-        "symbols.path, symbols.line, symbols.col;"
+        "records.path, symbols.line, symbols.col;"
     )
     rows = select(db, SQL, {"caller": caller})
     print(f"cscope: {len(rows)} lines")
@@ -130,11 +133,12 @@ def find_calls(db: sqlite3.Connection, caller: str):
 def find_callers(db: sqlite3.Connection, callee: str):
     logging.debug(f"find_callers of {callee}")
     SQL = (
-        "select symbols.path, symbols.parent, symbols.line, content.body "
-        "from symbols left join content on symbols.path = content.path and "
+        "select records.path, symbols.parent, symbols.line, content.body "
+        "from symbols inner join symbols.path = records.id "
+        "left join content on records.path = content.path and "
         "symbols.line = content.line where symbols.name = :callee and "
         f"symbols.category = {CLINK_FUNCTION_CALL} order by "
-        "symbols.path, symbols.line, symbols.col;"
+        "records.path, symbols.line, symbols.col;"
     )
     rows = select(db, SQL, {"callee": callee})
     print(f"cscope: {len(rows)} lines")
@@ -145,7 +149,7 @@ def find_callers(db: sqlite3.Connection, callee: str):
 def find_file(db: sqlite3.Connection, filename: str):
     logging.debug(f"find_file of {filename}")
     SQL = (
-        "select distinct path from symbols where path = :filename or "
+        "select distinct path from records where path = :filename or "
         "path like :pattern order by path"
     )
     rows = select(db, SQL, {"filename": filename, "pattern": f"%/{filename}"})
@@ -157,11 +161,12 @@ def find_file(db: sqlite3.Connection, filename: str):
 def find_includers(db: sqlite3.Connection, path: str):
     logging.debug(f"find_includers of {path}")
     SQL = (
-        "select symbols.path, symbols.parent, symbols.line, content.body "
-        "from symbols left join content on symbols.path = content.path and "
+        "select records.path, symbols.parent, symbols.line, content.body "
+        "from symbols inner join records on symbols.path = records.id "
+        "left join content on records.path = content.path and "
         "symbols.line = content.line where symbols.name like :name and "
         f"symbols.category = {CLINK_INCLUDE} order by "
-        "symbols.path, symbols.line, symbols.col;"
+        "records.path, symbols.line, symbols.col;"
     )
     rows = select(db, SQL, {"name": f"%{path}"})
     print(f"cscope: {len(rows)} lines")
diff --git a/libclink/src/db_add_symbol.c b/libclink/src/db_add_symbol.c
index f69d1550..9188458e 100644
--- a/libclink/src/db_add_symbol.c
+++ b/libclink/src/db_add_symbol.c
@@ -3,6 +3,7 @@
 #include "debug.h"
 #include "span.h"
 #include "sql.h"
+#include "get_id.h"
 #include <assert.h>
 #include <clink/db.h>
 #include <clink/symbol.h>
@@ -12,19 +13,25 @@
 #include <stddef.h>
 #include <string.h>
 
-static int add(sqlite3_stmt *stmt, clink_category_t category, span_t name,
+static int add(clink_db_t *db, sqlite3_stmt *stmt, clink_category_t category, span_t name,
                const char *path, span_t parent) {
 
+  assert(db != NULL);
   assert(stmt != NULL);
 
   int rc = 0;
 
+  // find the identifier for this path
+  clink_record_id_t id = -1;
+  if (ERROR((rc = get_id(db, path, &id))))
+    goto done;
+
   assert(name.base != NULL);
   if (ERROR((rc = sql_bind_span(stmt, 1, name))))
     goto done;
 
   assert(path != NULL);
-  if (ERROR((rc = sql_bind_text(stmt, 2, path))))
+  if (ERROR((rc = sql_bind_int(stmt, 2, id))))
     goto done;
 
   if (ERROR((rc = sql_bind_int(stmt, 3, category))))
@@ -86,7 +93,7 @@ int add_symbols(clink_db_t *db, size_t syms_size, symbol_t *syms) {
       assert(r == SQLITE_OK);
     }
 
-    if (ERROR((rc = add(s, syms[i].category, syms[i].name, syms[i].path,
+    if (ERROR((rc = add(db, s, syms[i].category, syms[i].name, syms[i].path,
                         syms[i].parent))))
       goto done;
   }
diff --git a/libclink/src/db_find_call.c b/libclink/src/db_find_call.c
index 13a360c7..ace52f97 100644
--- a/libclink/src/db_find_call.c
+++ b/libclink/src/db_find_call.c
@@ -111,12 +111,13 @@ int clink_db_find_call(clink_db_t *db, const char *regex, clink_iter_t **it) {
     return EINVAL;
 
   static const char QUERY[] =
-      "select symbols.name, symbols.path, symbols.line, symbols.col, "
-      "symbols.parent, content.body from symbols left join content "
+      "select symbols.name, records.path, symbols.line, symbols.col, "
+      "symbols.parent, content.body from symbols inner join records "
+      "on symbols.path = records.id left join content "
       "on "
-      "symbols.path = content.path and symbols.line = content.line where "
+      "records.path = content.path and symbols.line = content.line where "
       "symbols.parent regexp @parent and symbols.category = @category order by "
-      "symbols.path, symbols.line, symbols.col;";
+      "records.path, symbols.line, symbols.col;";
 
   int rc = 0;
   clink_iter_t *i = NULL;
diff --git a/libclink/src/db_find_caller.c b/libclink/src/db_find_caller.c
index 19935732..c04aa70c 100644
--- a/libclink/src/db_find_caller.c
+++ b/libclink/src/db_find_caller.c
@@ -110,11 +110,12 @@ int clink_db_find_caller(clink_db_t *db, const char *regex, clink_iter_t **it) {
     return EINVAL;
 
   static const char QUERY[] =
-      "select symbols.name, symbols.path, symbols.line, symbols.col, "
-      "symbols.parent, content.body from symbols left join content on "
-      "symbols.path = content.path and symbols.line = content.line where "
+      "select symbols.name, records.path, symbols.line, symbols.col, "
+      "symbols.parent, content.body from symbols inner join records "
+      "on symbols.path = records.id left join content on "
+      "records.path = content.path and symbols.line = content.line where "
       "symbols.name regexp @name and symbols.category = @category order by "
-      "symbols.path, symbols.line, symbols.col;";
+      "records.path, symbols.line, symbols.col;";
 
   int rc = 0;
   clink_iter_t *i = NULL;
diff --git a/libclink/src/db_find_definition.c b/libclink/src/db_find_definition.c
index 14fc2488..33d82800 100644
--- a/libclink/src/db_find_definition.c
+++ b/libclink/src/db_find_definition.c
@@ -111,11 +111,12 @@ int clink_db_find_definition(clink_db_t *db, const char *regex,
     return EINVAL;
 
   static const char QUERY[] =
-      "select symbols.name, symbols.path, symbols.line, symbols.col, "
-      "symbols.parent, content.body from symbols left join content on "
-      "symbols.path = content.path and symbols.line = content.line where "
+      "select symbols.name, records.path, symbols.line, symbols.col, "
+      "symbols.parent, content.body from symbols inner join records "
+      "on symbols.path = records.id left join content on "
+      "records.path = content.path and symbols.line = content.line where "
       "symbols.name regexp @name and symbols.category = @category order by "
-      "symbols.path, symbols.line, symbols.col;";
+      "records.path, symbols.line, symbols.col;";
 
   int rc = 0;
   clink_iter_t *i = NULL;
diff --git a/libclink/src/db_find_includer.c b/libclink/src/db_find_includer.c
index eb27f97f..87ed5dd5 100644
--- a/libclink/src/db_find_includer.c
+++ b/libclink/src/db_find_includer.c
@@ -111,12 +111,13 @@ int clink_db_find_includer(clink_db_t *db, const char *regex,
     return EINVAL;
 
   static const char QUERY[] =
-      "select symbols.name, symbols.path, symbols.line,"
-      "symbols.col, symbols.parent, content.body from symbols left join "
+      "select symbols.name, records.path, symbols.line,"
+      "symbols.col, symbols.parent, content.body from symbols inner join "
+      "records on symbols.path = records.id left join "
       "content "
-      "on symbols.path = content.path and symbols.line = content.line where "
+      "on records.path = content.path and symbols.line = content.line where "
       "symbols.name regexp @name and symbols.category = @category order "
-      "by symbols.path, symbols.line, symbols.col;";
+      "by records.path, symbols.line, symbols.col;";
 
   int rc = 0;
   clink_iter_t *i = NULL;
diff --git a/libclink/src/db_find_symbol.c b/libclink/src/db_find_symbol.c
index 4d689196..5e3e2495 100644
--- a/libclink/src/db_find_symbol.c
+++ b/libclink/src/db_find_symbol.c
@@ -110,11 +110,12 @@ int clink_db_find_symbol(clink_db_t *db, const char *regex, clink_iter_t **it) {
     return EINVAL;
 
   static const char QUERY[] =
-      "select symbols.name, symbols.path, symbols.category, "
+      "select symbols.name, records.path, symbols.category, "
       "symbols.line, symbols.col, symbols.parent, content.body from symbols "
+      "inner join records on symbols.path = records.id "
       "left "
-      "join content on symbols.path = content.path and symbols.line = "
-      "content.line where symbols.name regexp @name order by symbols.path, "
+      "join content on records.path = content.path and symbols.line = "
+      "content.line where symbols.name regexp @name order by records.path, "
       "symbols.line, symbols.col;";
 
   int rc = 0;
diff --git a/libclink/src/db_open.c b/libclink/src/db_open.c
index b8009722..caf58706 100644
--- a/libclink/src/db_open.c
+++ b/libclink/src/db_open.c
@@ -42,6 +42,7 @@ static int configure(sqlite3 *db) {
       "pragma synchronous=OFF;",
       "pragma journal_mode=OFF;",
       "pragma temp_store=MEMORY;",
+      "pragma foreign_keys=ON;",
   };
 
   return exec_all(db, sizeof(PRAGMAS) / sizeof(PRAGMAS[0]), PRAGMAS);
diff --git a/libclink/src/db_remove.c b/libclink/src/db_remove.c
index e5cb9143..0f6829b8 100644
--- a/libclink/src/db_remove.c
+++ b/libclink/src/db_remove.c
@@ -1,5 +1,6 @@
 #include "db.h"
 #include "debug.h"
+#include "get_id.h"
 #include "sql.h"
 #include <clink/db.h>
 #include <sqlite3.h>
@@ -16,7 +17,20 @@ void clink_db_remove(clink_db_t *db, const char *path) {
   if (ERROR(path == NULL))
     return;
 
-  // first delete it from the symbols table
+  // find the record identifier for this path
+  clink_record_id_t id = -1;
+  {
+    int r = get_id(db, path, &id);
+    if (r == ENOENT) {
+      // this path already does not exist in the database
+      return;
+    }
+    // if something else went wrong, give up
+    if (ERROR(r != 0))
+      return;
+  }
+
+  // delete the path from the symbols table
   {
     static const char SYMBOLS_DELETE[] =
         "delete from symbols where path = @path";
@@ -25,7 +39,7 @@ void clink_db_remove(clink_db_t *db, const char *path) {
     if (ERROR(sql_prepare(db->db, SYMBOLS_DELETE, &s)))
       return;
 
-    if (ERROR(sql_bind_text(s, 1, path))) {
+    if (ERROR(sql_bind_int(s, 1, id))) {
       sqlite3_finalize(s);
       return;
     }
diff --git a/libclink/src/schema.sql b/libclink/src/schema.sql
index a4106d1b..0ad00276 100644
--- a/libclink/src/schema.sql
+++ b/libclink/src/schema.sql
@@ -1,11 +1,12 @@
 create table if not exists symbols (
   name text not null,
-  path text not null,
+  path integer not null,
   category integer not null,
   line integer not null,
   col integer not null,
   parent text,
-  unique(name, path, category, line, col));
+  unique(name, path, category, line, col),
+  foreign key(path) references records(id));
 
 create table if not exists content (
   path text not null,
diff --git a/libclink/src/vim_read_into.c b/libclink/src/vim_read_into.c
index c94dd26a..88d61a05 100644
--- a/libclink/src/vim_read_into.c
+++ b/libclink/src/vim_read_into.c
@@ -84,7 +84,9 @@ int clink_vim_read_into(clink_db_t *db, const char *filename) {
 
   // create a query to lookup relevant line numbers from the target file
   static const char QUERY[] =
-      "select distinct line from symbols where path = @filename order by line;";
+      "select distinct symbols.line from symbols inner join records "
+      "on symbols.path = records.id "
+      "where records.path = @filename order by symbols.line;";
   if (ERROR((rc = sql_prepare(db->db, QUERY, &s.stmt))))
     goto done;
   if (ERROR((rc = sql_bind_text(s.stmt, 1, filename))))
diff --git a/test/cases/atomic-builtins-2.c b/test/cases/atomic-builtins-2.c
index 9ba7e1f2..7f67c677 100644
--- a/test/cases/atomic-builtins-2.c
+++ b/test/cases/atomic-builtins-2.c
@@ -6,5 +6,5 @@ int foo(void) {
 }
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select name, path, category, line, col from symbols where name = '__sync_fetch_and_add';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, col from symbols inner join records on symbols.path = records.id where symbols.name = '__sync_fetch_and_add';" | sqlite3 {%t}
 // CHECK: __sync_fetch_and_add|{%s}|1|5|10
diff --git a/test/cases/atomic-builtins.c b/test/cases/atomic-builtins.c
index 3a313283..b94626a0 100644
--- a/test/cases/atomic-builtins.c
+++ b/test/cases/atomic-builtins.c
@@ -6,7 +6,7 @@ int foo(void) {
 }
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select name, path, line, col from symbols where name = '__ATOMIC_ACQUIRE';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = '__ATOMIC_ACQUIRE';" | sqlite3 {%t}
 // CHECK: __ATOMIC_ACQUIRE|{%s}|5|30
-// RUN: echo "select name, path, category, line, col from symbols where name = '__atomic_load_n';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = '__atomic_load_n';" | sqlite3 {%t}
 // CHECK: __atomic_load_n|{%s}|1|5|10
diff --git a/test/cases/basic.py b/test/cases/basic.py
index cc7504fb..34654d5e 100644
--- a/test/cases/basic.py
+++ b/test/cases/basic.py
@@ -9,12 +9,12 @@ def __init__():
 
 # RUN: clink --build-only --database={%t} --debug {%s} >/dev/null
 
-# RUN: echo "select path, category, line, col from symbols where name = 'foo' and line < 12;" | sqlite3 {%t}
+# RUN: echo "select records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = 'foo' and symbols.line < 12;" | sqlite3 {%t}
 # CHECK: {%s}|0|3|5
 
-# RUN: echo "select path, line, col from symbols where name = 'x' and line < 12 order by line;" | sqlite3 {%t}
+# RUN: echo "select records.path, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = 'x' and symbols.line < 12 order by symbols.line;" | sqlite3 {%t}
 # CHECK: {%s}|3|9
 # CHECK: {%s}|4|10
 
-# RUN: echo "select * from symbols where name = 'Bar' and line < 12;" | sqlite3 {%t}
+# RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = 'Bar' and symbols.line < 12;" | sqlite3 {%t}
 # CHECK: Bar|{%s}|0|6|7
diff --git a/test/cases/cr.c b/test/cases/cr.c
index 46381b42..c4f9b9e0 100644
--- a/test/cases/cr.c
+++ b/test/cases/cr.c
@@ -6,14 +6,14 @@
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
 
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|3|9|
 
-// RUN: echo "select * from symbols where name = 'bar';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'bar';" | sqlite3 {%t}
 // CHECK: bar|{%s}|0|4|5|
 
-// RUN: echo "select * from symbols where name = 'baz';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'baz';" | sqlite3 {%t}
 // CHECK: baz|{%s}|0|7|9|
 
-// RUN: echo "select * from symbols where name = 'quz';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'quz';" | sqlite3 {%t}
 // CHECK: quz|{%s}|0|8|5|
diff --git a/test/cases/def-export.def b/test/cases/def-export.def
index 7cec5f33..fe7ea104 100644
--- a/test/cases/def-export.def
+++ b/test/cases/def-export.def
@@ -4,5 +4,5 @@ LIBRARY foo
 EXPORTS bar @1
 
 // RUN: clink --build-only --database={%t} --debug {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'bar' and line = 4;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'bar' and symbols.line = 4;" | sqlite3 {%t}
 // CHECK: bar|{%s}|2|4|9|
diff --git a/test/cases/define-branch-0.c b/test/cases/define-branch-0.c
index c54c1036..9e215255 100644
--- a/test/cases/define-branch-0.c
+++ b/test/cases/define-branch-0.c
@@ -8,5 +8,5 @@
 
 // XFAIL: version.parse(os.environ["LLVM_VERSION"]) < version.parse("10.0.0")
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|5|9|
diff --git a/test/cases/define-branch-1.c b/test/cases/define-branch-1.c
index 8184264b..5c0206d1 100644
--- a/test/cases/define-branch-1.c
+++ b/test/cases/define-branch-1.c
@@ -6,5 +6,5 @@
 #endif
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|4|9|
diff --git a/test/cases/define-branch-elif-0.c b/test/cases/define-branch-elif-0.c
index a91d6529..f5feb4af 100644
--- a/test/cases/define-branch-elif-0.c
+++ b/test/cases/define-branch-elif-0.c
@@ -8,5 +8,5 @@
 
 // XFAIL: version.parse(os.environ["LLVM_VERSION"]) < version.parse("10.0.0")
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|6|9|
diff --git a/test/cases/define-branch-elif-1.c b/test/cases/define-branch-elif-1.c
index 15542a62..954901ab 100644
--- a/test/cases/define-branch-elif-1.c
+++ b/test/cases/define-branch-elif-1.c
@@ -6,5 +6,5 @@
 #endif
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|5|9|
diff --git a/test/cases/define-branch-else-0.c b/test/cases/define-branch-else-0.c
index f9f61ccb..e22aed82 100644
--- a/test/cases/define-branch-else-0.c
+++ b/test/cases/define-branch-else-0.c
@@ -6,5 +6,5 @@
 #endif
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|5|9|
diff --git a/test/cases/define-branch-else-1.c b/test/cases/define-branch-else-1.c
index 2a668303..a1c6f354 100644
--- a/test/cases/define-branch-else-1.c
+++ b/test/cases/define-branch-else-1.c
@@ -8,5 +8,5 @@
 
 // XFAIL: version.parse(os.environ["LLVM_VERSION"]) < version.parse("10.0.0")
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|0|6|9|
diff --git a/test/cases/definition-multiplication.c b/test/cases/definition-multiplication.c
index 8be78d17..855761b1 100644
--- a/test/cases/definition-multiplication.c
+++ b/test/cases/definition-multiplication.c
@@ -11,5 +11,5 @@ void foo(void) {
 }
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'y' and line = 10;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'y' and symbols.line = 10;" | sqlite3 {%t}
 // CHECK: y|{%s}|2|10|9|foo
diff --git a/test/cases/functions-in-scopes.c b/test/cases/functions-in-scopes.c
index 0b0e82ea..ea6100e6 100644
--- a/test/cases/functions-in-scopes.c
+++ b/test/cases/functions-in-scopes.c
@@ -17,7 +17,7 @@ void foo(void) {
 }
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where category = 1 order by name;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.category = 1 order by symbols.name;" | sqlite3 {%t}
 // CHECK: f|{%s}|1|11|7|foo
 // CHECK: g|{%s}|1|12|5|foo
 // CHECK: h|{%s}|1|13|9|foo
diff --git a/test/cases/global.c b/test/cases/global.c
index 60e6d286..629889ad 100644
--- a/test/cases/global.c
+++ b/test/cases/global.c
@@ -1,5 +1,5 @@
 int x;
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id;" | sqlite3 {%t}
 // CHECK: x|{%s}|0|1|5|
diff --git a/test/cases/global_const.c b/test/cases/global_const.c
index f41d5d4f..5d99fa60 100644
--- a/test/cases/global_const.c
+++ b/test/cases/global_const.c
@@ -1,5 +1,5 @@
 const int x;
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id;" | sqlite3 {%t}
 // CHECK: x|{%s}|0|1|11|
diff --git a/test/cases/global_const2.c b/test/cases/global_const2.c
index 9a520b2e..4b0d7ed0 100644
--- a/test/cases/global_const2.c
+++ b/test/cases/global_const2.c
@@ -1,5 +1,5 @@
 int const x;
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id;" | sqlite3 {%t}
 // CHECK: x|{%s}|0|1|11|
diff --git a/test/cases/ifdef.c b/test/cases/ifdef.c
index c92782e2..6dc7d814 100644
--- a/test/cases/ifdef.c
+++ b/test/cases/ifdef.c
@@ -11,15 +11,15 @@
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
 
 // can we recognise a reference in an ifdef?
-// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
 // CHECK: FOO|{%s}|2|3|8|
 
 // can we reognise things in #ifs? 
-// RUN: echo "select * from symbols where name = 'QUX';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'QUX';" | sqlite3 {%t}
 // CHECK: QUX|{%s}|2|7|5|
-// RUN: echo "select * from symbols where name = 'QUUX';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'QUUX';" | sqlite3 {%t}
 // CHECK: QUUX|{%s}|2|7|12|
 
 // can we recognise things in #elifs?
-// RUN: echo "select * from symbols where name = 'QUUZ';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'QUUZ';" | sqlite3 {%t}
 // CHECK: QUUZ|{%s}|2|8|7|
diff --git a/test/cases/include.c b/test/cases/include.c
index 82f2e108..dfaec54b 100644
--- a/test/cases/include.c
+++ b/test/cases/include.c
@@ -3,5 +3,5 @@
 #include "foo.h"
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id;" | sqlite3 {%t}
 // CHECK: foo.h|{%s}|3|3|1|
diff --git a/test/cases/include2.c b/test/cases/include2.c
index aa149b05..aea565e8 100644
--- a/test/cases/include2.c
+++ b/test/cases/include2.c
@@ -3,5 +3,5 @@
 #include <foo.h>
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id;" | sqlite3 {%t}
 // CHECK: foo.h|{%s}|3|3|1|
diff --git a/test/cases/macro-definition-parent.c b/test/cases/macro-definition-parent.c
index 1f08f35a..4e9530f0 100644
--- a/test/cases/macro-definition-parent.c
+++ b/test/cases/macro-definition-parent.c
@@ -10,11 +10,11 @@
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
 
-// RUN: echo "select * from symbols where name = 'ref';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'ref';" | sqlite3 {%t}
 // CHECK: ref|{%s}|2|4|13|FOO
 
-// RUN: echo "select * from symbols where name = 'a_call';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'a_call';" | sqlite3 {%t}
 // CHECK: a_call|{%s}|2|6|13|BAR
 
-// RUN: echo "select * from symbols where name = 'our_ref';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'our_ref';" | sqlite3 {%t}
 // CHECK: our_ref|{%s}|2|9|35|BAZ
diff --git a/test/cases/macro-definition.c b/test/cases/macro-definition.c
index 0ff102d4..c1eb8aec 100644
--- a/test/cases/macro-definition.c
+++ b/test/cases/macro-definition.c
@@ -5,7 +5,7 @@
 #define BAZ(x) something_else(x)
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where category = 0 order by name;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.category = 0 order by symbols.name;" | sqlite3 {%t}
 // CHECK: BAR|{%s}|0|4|9|
 // CHECK: BAZ|{%s}|0|5|9|
 // CHECK: FOO|{%s}|0|3|9|
diff --git a/test/cases/macro-parent.c b/test/cases/macro-parent.c
index fb8a5cb6..fca5eb9f 100644
--- a/test/cases/macro-parent.c
+++ b/test/cases/macro-parent.c
@@ -10,5 +10,5 @@ int foo() {
 }
 
 // RUN: clink --build-only --database={%t} --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols where name = 'macro_call' and category = 1;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'macro_call' and symbols.category = 1;" | sqlite3 {%t}
 // CHECK: macro_call|{%s}|1|8|3|foo
diff --git a/test/cases/other-builtins-2.c b/test/cases/other-builtins-2.c
index ec62a8c6..5bb5bb54 100644
--- a/test/cases/other-builtins-2.c
+++ b/test/cases/other-builtins-2.c
@@ -5,5 +5,5 @@ void foo(void *dst, const void *src, unsigned len) {
 }
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select name, path, category, line, col from symbols where name = '__builtin_memcpy';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = '__builtin_memcpy';" | sqlite3 {%t}
 // CHECK: __builtin_memcpy|{%s}|1|4|3
diff --git a/test/cases/other-builtins.c b/test/cases/other-builtins.c
index f7be84dd..ce3ac404 100644
--- a/test/cases/other-builtins.c
+++ b/test/cases/other-builtins.c
@@ -6,5 +6,5 @@ void foo(void) {
 }
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select name, path, category, line, col from symbols where name = '__builtin_unreachable';" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = '__builtin_unreachable';" | sqlite3 {%t}
 // CHECK: __builtin_unreachable|{%s}|1|5|3
diff --git a/test/cases/undef.c b/test/cases/undef.c
index 4ab63ee9..c966459d 100644
--- a/test/cases/undef.c
+++ b/test/cases/undef.c
@@ -3,5 +3,5 @@
 #undef FOO
 
 // RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
-// RUN: echo "select * from symbols;" | sqlite3 {%t}
+// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id;" | sqlite3 {%t}
 // CHECK: FOO|{%s}|2|3|8|