Skip to content

Commit

Permalink
make 'symbols.path' a foreign key instead of direct path
Browse files Browse the repository at this point in the history
This change takes a step towards normalising the database. The `path` field of
the `symbols` table is now a foreign key pointer into the `records` table. This
has a number of implications:

  1. Entries in the `symbols` table require a corresponding entry in the
     `records` table. Without this, they are effectively orphaned without a
     path.

  2. The `records` table gains a new field, `id`. Due to the way SQLite works,
     this is an alias for the internal `rowid` column. This column should rarely
     need to be referenced beyond in `join` expressions with `symbols`. Though
     `clink_db_add_record` gives the caller access to this field in anticipation
     of a more efficient API for symbol addition in future.

This change has a large effect on database on-disk size, reducing an index of a
recent Graphviz commit from 58MB to 20MB (65%).

This was not specifically intended to accelerate database construction (the
hinted at more efficient symbol addition above is the aimed for optimisation).
However this also has a large effect on this too, reducing the creation of the
above index from 7.94s to 1.81s (77%).
  • Loading branch information
Smattr committed Apr 12, 2023
1 parent 388c826 commit 5e1c86b
Show file tree
Hide file tree
Showing 36 changed files with 114 additions and 79 deletions.
37 changes: 21 additions & 16 deletions clink/src/clink-repl
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,11 @@ def in_bw(s: Optional[str]) -> str:
def find_symbol(db: sqlite3.Connection, name: str):
logging.debug(f"find_symbol of {name}")
SQL = (
"select symbols.path, symbols.parent, symbols.line, content.body "
"from symbols left join content on symbols.path = content.path and "
"select records.path, symbols.parent, symbols.line, content.body "
"from symbols inner join records on symbols.path = records.id "
"left join content on records.path = content.path and "
"symbols.line = content.line where symbols.name = :name order by "
"symbols.path, symbols.line, symbols.col;"
"records.path, symbols.line, symbols.col;"
)
rows = select(db, SQL, {"name": name})
print(f"cscope: {len(rows)} lines")
Expand All @@ -100,11 +101,12 @@ def find_symbol(db: sqlite3.Connection, name: str):
def find_definition(db: sqlite3.Connection, name: str):
logging.debug(f"find_definition of {name}")
SQL = (
"select symbols.path, symbols.line, content.body "
"from symbols left join content on symbols.path = content.path and "
"select records.path, symbols.line, content.body "
"from symbols inner join records on symbols.path = records.id "
"left join content on records.path = content.path and "
"symbols.line = content.line where symbols.name = :name and "
f"symbols.category = {CLINK_DEFINITION} order by "
"symbols.path, symbols.line, symbols.col;"
"records.path, symbols.line, symbols.col;"
)
rows = select(db, SQL, {"name": name})
print(f"cscope: {len(rows)} lines")
Expand All @@ -115,11 +117,12 @@ def find_definition(db: sqlite3.Connection, name: str):
def find_calls(db: sqlite3.Connection, caller: str):
logging.debug(f"find_calls of {caller}")
SQL = (
"select symbols.path, symbols.name, symbols.line, content.body "
"from symbols left join content on symbols.path = content.path and "
"select records.path, symbols.name, symbols.line, content.body "
"from symbols inner join records on symbols.path = records.id "
"left join content on records.path = content.path and "
"symbols.line = content.line where symbols.parent = :caller and "
f"symbols.category = {CLINK_FUNCTION_CALL} order by "
"symbols.path, symbols.line, symbols.col;"
"records.path, symbols.line, symbols.col;"
)
rows = select(db, SQL, {"caller": caller})
print(f"cscope: {len(rows)} lines")
Expand All @@ -130,11 +133,12 @@ def find_calls(db: sqlite3.Connection, caller: str):
def find_callers(db: sqlite3.Connection, callee: str):
logging.debug(f"find_callers of {callee}")
SQL = (
"select symbols.path, symbols.parent, symbols.line, content.body "
"from symbols left join content on symbols.path = content.path and "
"select records.path, symbols.parent, symbols.line, content.body "
"from symbols inner join symbols.path = records.id "
"left join content on records.path = content.path and "
"symbols.line = content.line where symbols.name = :callee and "
f"symbols.category = {CLINK_FUNCTION_CALL} order by "
"symbols.path, symbols.line, symbols.col;"
"records.path, symbols.line, symbols.col;"
)
rows = select(db, SQL, {"callee": callee})
print(f"cscope: {len(rows)} lines")
Expand All @@ -145,7 +149,7 @@ def find_callers(db: sqlite3.Connection, callee: str):
def find_file(db: sqlite3.Connection, filename: str):
logging.debug(f"find_file of {filename}")
SQL = (
"select distinct path from symbols where path = :filename or "
"select distinct path from records where path = :filename or "
"path like :pattern order by path"
)
rows = select(db, SQL, {"filename": filename, "pattern": f"%/{filename}"})
Expand All @@ -157,11 +161,12 @@ def find_file(db: sqlite3.Connection, filename: str):
def find_includers(db: sqlite3.Connection, path: str):
logging.debug(f"find_includers of {path}")
SQL = (
"select symbols.path, symbols.parent, symbols.line, content.body "
"from symbols left join content on symbols.path = content.path and "
"select records.path, symbols.parent, symbols.line, content.body "
"from symbols inner join records on symbols.path = records.id "
"left join content on records.path = content.path and "
"symbols.line = content.line where symbols.name like :name and "
f"symbols.category = {CLINK_INCLUDE} order by "
"symbols.path, symbols.line, symbols.col;"
"records.path, symbols.line, symbols.col;"
)
rows = select(db, SQL, {"name": f"%{path}"})
print(f"cscope: {len(rows)} lines")
Expand Down
13 changes: 10 additions & 3 deletions libclink/src/db_add_symbol.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "debug.h"
#include "span.h"
#include "sql.h"
#include "get_id.h"
#include <assert.h>
#include <clink/db.h>
#include <clink/symbol.h>
Expand All @@ -12,19 +13,25 @@
#include <stddef.h>
#include <string.h>

static int add(sqlite3_stmt *stmt, clink_category_t category, span_t name,
static int add(clink_db_t *db, sqlite3_stmt *stmt, clink_category_t category, span_t name,
const char *path, span_t parent) {

assert(db != NULL);
assert(stmt != NULL);

int rc = 0;

// find the identifier for this path
clink_record_id_t id = -1;
if (ERROR((rc = get_id(db, path, &id))))
goto done;

assert(name.base != NULL);
if (ERROR((rc = sql_bind_span(stmt, 1, name))))
goto done;

assert(path != NULL);
if (ERROR((rc = sql_bind_text(stmt, 2, path))))
if (ERROR((rc = sql_bind_int(stmt, 2, id))))
goto done;

if (ERROR((rc = sql_bind_int(stmt, 3, category))))
Expand Down Expand Up @@ -86,7 +93,7 @@ int add_symbols(clink_db_t *db, size_t syms_size, symbol_t *syms) {
assert(r == SQLITE_OK);
}

if (ERROR((rc = add(s, syms[i].category, syms[i].name, syms[i].path,
if (ERROR((rc = add(db, s, syms[i].category, syms[i].name, syms[i].path,
syms[i].parent))))
goto done;
}
Expand Down
9 changes: 5 additions & 4 deletions libclink/src/db_find_call.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,13 @@ int clink_db_find_call(clink_db_t *db, const char *regex, clink_iter_t **it) {
return EINVAL;

static const char QUERY[] =
"select symbols.name, symbols.path, symbols.line, symbols.col, "
"symbols.parent, content.body from symbols left join content "
"select symbols.name, records.path, symbols.line, symbols.col, "
"symbols.parent, content.body from symbols inner join records "
"on symbols.path = records.id left join content "
"on "
"symbols.path = content.path and symbols.line = content.line where "
"records.path = content.path and symbols.line = content.line where "
"symbols.parent regexp @parent and symbols.category = @category order by "
"symbols.path, symbols.line, symbols.col;";
"records.path, symbols.line, symbols.col;";

int rc = 0;
clink_iter_t *i = NULL;
Expand Down
9 changes: 5 additions & 4 deletions libclink/src/db_find_caller.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ int clink_db_find_caller(clink_db_t *db, const char *regex, clink_iter_t **it) {
return EINVAL;

static const char QUERY[] =
"select symbols.name, symbols.path, symbols.line, symbols.col, "
"symbols.parent, content.body from symbols left join content on "
"symbols.path = content.path and symbols.line = content.line where "
"select symbols.name, records.path, symbols.line, symbols.col, "
"symbols.parent, content.body from symbols inner join records "
"on symbols.path = records.id left join content on "
"records.path = content.path and symbols.line = content.line where "
"symbols.name regexp @name and symbols.category = @category order by "
"symbols.path, symbols.line, symbols.col;";
"records.path, symbols.line, symbols.col;";

int rc = 0;
clink_iter_t *i = NULL;
Expand Down
9 changes: 5 additions & 4 deletions libclink/src/db_find_definition.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,12 @@ int clink_db_find_definition(clink_db_t *db, const char *regex,
return EINVAL;

static const char QUERY[] =
"select symbols.name, symbols.path, symbols.line, symbols.col, "
"symbols.parent, content.body from symbols left join content on "
"symbols.path = content.path and symbols.line = content.line where "
"select symbols.name, records.path, symbols.line, symbols.col, "
"symbols.parent, content.body from symbols inner join records "
"on symbols.path = records.id left join content on "
"records.path = content.path and symbols.line = content.line where "
"symbols.name regexp @name and symbols.category = @category order by "
"symbols.path, symbols.line, symbols.col;";
"records.path, symbols.line, symbols.col;";

int rc = 0;
clink_iter_t *i = NULL;
Expand Down
9 changes: 5 additions & 4 deletions libclink/src/db_find_includer.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,13 @@ int clink_db_find_includer(clink_db_t *db, const char *regex,
return EINVAL;

static const char QUERY[] =
"select symbols.name, symbols.path, symbols.line,"
"symbols.col, symbols.parent, content.body from symbols left join "
"select symbols.name, records.path, symbols.line,"
"symbols.col, symbols.parent, content.body from symbols inner join "
"records on symbols.path = records.id left join "
"content "
"on symbols.path = content.path and symbols.line = content.line where "
"on records.path = content.path and symbols.line = content.line where "
"symbols.name regexp @name and symbols.category = @category order "
"by symbols.path, symbols.line, symbols.col;";
"by records.path, symbols.line, symbols.col;";

int rc = 0;
clink_iter_t *i = NULL;
Expand Down
7 changes: 4 additions & 3 deletions libclink/src/db_find_symbol.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ int clink_db_find_symbol(clink_db_t *db, const char *regex, clink_iter_t **it) {
return EINVAL;

static const char QUERY[] =
"select symbols.name, symbols.path, symbols.category, "
"select symbols.name, records.path, symbols.category, "
"symbols.line, symbols.col, symbols.parent, content.body from symbols "
"inner join records on symbols.path = records.id "
"left "
"join content on symbols.path = content.path and symbols.line = "
"content.line where symbols.name regexp @name order by symbols.path, "
"join content on records.path = content.path and symbols.line = "
"content.line where symbols.name regexp @name order by records.path, "
"symbols.line, symbols.col;";

int rc = 0;
Expand Down
1 change: 1 addition & 0 deletions libclink/src/db_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static int configure(sqlite3 *db) {
"pragma synchronous=OFF;",
"pragma journal_mode=OFF;",
"pragma temp_store=MEMORY;",
"pragma foreign_keys=ON;",
};

return exec_all(db, sizeof(PRAGMAS) / sizeof(PRAGMAS[0]), PRAGMAS);
Expand Down
18 changes: 16 additions & 2 deletions libclink/src/db_remove.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "db.h"
#include "debug.h"
#include "get_id.h"
#include "sql.h"
#include <clink/db.h>
#include <sqlite3.h>
Expand All @@ -16,7 +17,20 @@ void clink_db_remove(clink_db_t *db, const char *path) {
if (ERROR(path == NULL))
return;

// first delete it from the symbols table
// find the record identifier for this path
clink_record_id_t id = -1;
{
int r = get_id(db, path, &id);
if (r == ENOENT) {
// this path already does not exist in the database
return;
}
// if something else went wrong, give up
if (ERROR(r != 0))
return;
}

// delete the path from the symbols table
{
static const char SYMBOLS_DELETE[] =
"delete from symbols where path = @path";
Expand All @@ -25,7 +39,7 @@ void clink_db_remove(clink_db_t *db, const char *path) {
if (ERROR(sql_prepare(db->db, SYMBOLS_DELETE, &s)))
return;

if (ERROR(sql_bind_text(s, 1, path))) {
if (ERROR(sql_bind_int(s, 1, id))) {
sqlite3_finalize(s);
return;
}
Expand Down
5 changes: 3 additions & 2 deletions libclink/src/schema.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
create table if not exists symbols (
name text not null,
path text not null,
path integer not null,
category integer not null,
line integer not null,
col integer not null,
parent text,
unique(name, path, category, line, col));
unique(name, path, category, line, col),
foreign key(path) references records(id));

create table if not exists content (
path text not null,
Expand Down
4 changes: 3 additions & 1 deletion libclink/src/vim_read_into.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ int clink_vim_read_into(clink_db_t *db, const char *filename) {

// create a query to lookup relevant line numbers from the target file
static const char QUERY[] =
"select distinct line from symbols where path = @filename order by line;";
"select distinct symbols.line from symbols inner join records "
"on symbols.path = records.id "
"where records.path = @filename order by symbols.line;";
if (ERROR((rc = sql_prepare(db->db, QUERY, &s.stmt))))
goto done;
if (ERROR((rc = sql_bind_text(s.stmt, 1, filename))))
Expand Down
2 changes: 1 addition & 1 deletion test/cases/atomic-builtins-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ int foo(void) {
}

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select name, path, category, line, col from symbols where name = '__sync_fetch_and_add';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, col from symbols inner join records on symbols.path = records.id where symbols.name = '__sync_fetch_and_add';" | sqlite3 {%t}
// CHECK: __sync_fetch_and_add|{%s}|1|5|10
4 changes: 2 additions & 2 deletions test/cases/atomic-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ int foo(void) {
}

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select name, path, line, col from symbols where name = '__ATOMIC_ACQUIRE';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = '__ATOMIC_ACQUIRE';" | sqlite3 {%t}
// CHECK: __ATOMIC_ACQUIRE|{%s}|5|30
// RUN: echo "select name, path, category, line, col from symbols where name = '__atomic_load_n';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = '__atomic_load_n';" | sqlite3 {%t}
// CHECK: __atomic_load_n|{%s}|1|5|10
6 changes: 3 additions & 3 deletions test/cases/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def __init__():

# RUN: clink --build-only --database={%t} --debug {%s} >/dev/null

# RUN: echo "select path, category, line, col from symbols where name = 'foo' and line < 12;" | sqlite3 {%t}
# RUN: echo "select records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = 'foo' and symbols.line < 12;" | sqlite3 {%t}
# CHECK: {%s}|0|3|5

# RUN: echo "select path, line, col from symbols where name = 'x' and line < 12 order by line;" | sqlite3 {%t}
# RUN: echo "select records.path, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = 'x' and symbols.line < 12 order by symbols.line;" | sqlite3 {%t}
# CHECK: {%s}|3|9
# CHECK: {%s}|4|10

# RUN: echo "select * from symbols where name = 'Bar' and line < 12;" | sqlite3 {%t}
# RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col from symbols inner join records on symbols.path = records.id where symbols.name = 'Bar' and symbols.line < 12;" | sqlite3 {%t}
# CHECK: Bar|{%s}|0|6|7
8 changes: 4 additions & 4 deletions test/cases/cr.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null

// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|3|9|

// RUN: echo "select * from symbols where name = 'bar';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'bar';" | sqlite3 {%t}
// CHECK: bar|{%s}|0|4|5|

// RUN: echo "select * from symbols where name = 'baz';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'baz';" | sqlite3 {%t}
// CHECK: baz|{%s}|0|7|9|

// RUN: echo "select * from symbols where name = 'quz';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'quz';" | sqlite3 {%t}
// CHECK: quz|{%s}|0|8|5|
Expand Down
2 changes: 1 addition & 1 deletion test/cases/def-export.def
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ LIBRARY foo
EXPORTS bar @1

// RUN: clink --build-only --database={%t} --debug {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'bar' and line = 4;" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'bar' and symbols.line = 4;" | sqlite3 {%t}
// CHECK: bar|{%s}|2|4|9|
2 changes: 1 addition & 1 deletion test/cases/define-branch-0.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@

// XFAIL: version.parse(os.environ["LLVM_VERSION"]) < version.parse("10.0.0")
// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|5|9|
2 changes: 1 addition & 1 deletion test/cases/define-branch-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
#endif

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|4|9|
2 changes: 1 addition & 1 deletion test/cases/define-branch-elif-0.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@

// XFAIL: version.parse(os.environ["LLVM_VERSION"]) < version.parse("10.0.0")
// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|6|9|
2 changes: 1 addition & 1 deletion test/cases/define-branch-elif-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
#endif

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|5|9|
2 changes: 1 addition & 1 deletion test/cases/define-branch-else-0.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
#endif

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|5|9|
2 changes: 1 addition & 1 deletion test/cases/define-branch-else-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@

// XFAIL: version.parse(os.environ["LLVM_VERSION"]) < version.parse("10.0.0")
// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'FOO';" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'FOO';" | sqlite3 {%t}
// CHECK: FOO|{%s}|0|6|9|
2 changes: 1 addition & 1 deletion test/cases/definition-multiplication.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ void foo(void) {
}

// RUN: clink --build-only --database={%t} --debug --parse-c=clang {%s} >/dev/null
// RUN: echo "select * from symbols where name = 'y' and line = 10;" | sqlite3 {%t}
// RUN: echo "select symbols.name, records.path, symbols.category, symbols.line, symbols.col, symbols.parent from symbols inner join records on symbols.path = records.id where symbols.name = 'y' and symbols.line = 10;" | sqlite3 {%t}
// CHECK: y|{%s}|2|10|9|foo
Loading

0 comments on commit 5e1c86b

Please sign in to comment.