Skip to content

Commit

Permalink
sql: add schema support for vector indexing in CREATE TABLE
Browse files Browse the repository at this point in the history
Support usage of VECTOR INDEX in a CREATE TABLE statement, e.g.:

  CREATE TABLE simple (
    a INT PRIMARY KEY,
    vec1 VECTOR(3),
    VECTOR INDEX (vec1)
  )

Create the corresponding table and index schema objects for the vector
index. Check various error conditions, e.g. that only a column of type
VECTOR can be the last column in the index. Add unit and logic tests.
CREATE VECTOR INDEX support will come in a future PR.

Epic: CRDB-42943

Release note: None

Co-authored-by: Drew Kimball <[email protected]>
  • Loading branch information
andy-kimball and Drew Kimball committed Jan 30, 2025
1 parent 8655f5d commit a38f721
Show file tree
Hide file tree
Showing 38 changed files with 587 additions and 158 deletions.
7 changes: 7 additions & 0 deletions pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/sql/catalog/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ go_library(
"//pkg/sql/sessiondatapb",
"//pkg/sql/sqlclustersettings",
"//pkg/sql/types",
"//pkg/sql/vecindex/vecpb",
"//pkg/util",
"//pkg/util/hlc",
"//pkg/util/intsets",
Expand Down
26 changes: 18 additions & 8 deletions pkg/sql/catalog/catformat/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,13 @@ func indexForDisplay(
if index.Unique {
f.WriteString("UNIQUE ")
}
if !f.HasFlags(tree.FmtPGCatalog) && index.Type == idxtype.INVERTED {
f.WriteString("INVERTED ")
if !f.HasFlags(tree.FmtPGCatalog) {
switch index.Type {
case idxtype.INVERTED:
f.WriteString("INVERTED ")
case idxtype.VECTOR:
f.WriteString("VECTOR ")
}
}
f.WriteString("INDEX ")
f.FormatNameP(&index.Name)
Expand All @@ -114,9 +119,12 @@ func indexForDisplay(

if f.HasFlags(tree.FmtPGCatalog) {
f.WriteString(" USING")
if index.Type == idxtype.INVERTED {
switch index.Type {
case idxtype.INVERTED:
f.WriteString(" gin")
} else {
case idxtype.VECTOR:
f.WriteString(" cspann")
default:
f.WriteString(" btree")
}
}
Expand Down Expand Up @@ -240,17 +248,19 @@ func FormatIndexElements(
} else {
f.FormatNameP(&index.KeyColumnNames[i])
}
// TODO(drewk): we might need to print something like "vector_l2_ops" for
// vector indexes.
if index.Type == idxtype.INVERTED &&
col.GetID() == index.InvertedColumnID() && len(index.InvertedColumnKinds) > 0 {
switch index.InvertedColumnKinds[0] {
case catpb.InvertedIndexColumnKind_TRIGRAM:
f.WriteString(" gin_trgm_ops")
}
}
// The last column of an inverted index cannot have a DESC direction.
// Since the default direction is ASC, we omit the direction entirely
// for inverted index columns.
if i < n-1 || index.Type != idxtype.INVERTED {
// The last column of an inverted or vector index cannot have a DESC
// direction. Since the default direction is ASC, we omit the direction
// entirely for inverted/vector index columns.
if i < n-1 || index.Type.AllowExplicitDirection() {
f.WriteByte(' ')
f.WriteString(index.KeyColumnDirections[i].String())
}
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/catalog/catformat/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ func TestIndexForDisplay(t *testing.T) {
ColumnNames: []string{"a"},
}

// VECTOR INDEX baz (a)
vectorIndex := baseIndex
vectorIndex.Type = idxtype.VECTOR
vectorIndex.KeyColumnNames = []string{"a"}
vectorIndex.KeyColumnIDs = descpb.ColumnIDs{1}

testData := []struct {
index descpb.IndexDescriptor
tableName tree.TableName
Expand Down Expand Up @@ -266,6 +272,22 @@ func TestIndexForDisplay(t *testing.T) {
expected: "CREATE INDEX baz ON foo.public.bar (a DESC) USING HASH WITH (bucket_count=8)",
pgExpected: "CREATE INDEX baz ON foo.public.bar USING btree (a DESC) USING HASH WITH (bucket_count=8)",
},
{
index: vectorIndex,
tableName: descpb.AnonymousTable,
partition: "",
displayMode: IndexDisplayDefOnly,
expected: "VECTOR INDEX baz (a)",
pgExpected: "INDEX baz USING cspann (a)",
},
{
index: vectorIndex,
tableName: tableName,
partition: "",
displayMode: IndexDisplayShowCreate,
expected: "CREATE VECTOR INDEX baz ON foo.public.bar (a)",
pgExpected: "CREATE INDEX baz ON foo.public.bar USING cspann (a)",
},
}

sd := &sessiondata.SessionData{}
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/catalog/colinfo/col_type_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ func ColumnTypeIsOnlyInvertedIndexable(t *types.T) bool {
return true
}

// ColumnTypeIsVectorIndexable returns true if the type t can be indexed using a
// vector index.
func ColumnTypeIsVectorIndexable(t *types.T) bool {
return t.Family() == types.PGVectorFamily
}

// MustBeValueEncoded returns true if columns of the given kind can only be value
// encoded.
func MustBeValueEncoded(semanticType *types.T) bool {
Expand Down
22 changes: 21 additions & 1 deletion pkg/sql/catalog/descpb/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func (desc *IndexDescriptor) GetName() string {
}

// InvertedColumnID returns the ColumnID of the inverted column of the inverted
// index. This is always the last column in ColumnIDs. Panics if the index is
// index. This is always the last column in KeyColumnIDs. Panics if the index is
// not inverted.
func (desc *IndexDescriptor) InvertedColumnID() ColumnID {
if desc.Type != idxtype.INVERTED {
Expand Down Expand Up @@ -126,3 +126,23 @@ func (desc *IndexDescriptor) InvertedColumnKeyType() *types.T {
}
return types.EncodedKey
}

// VectorColumnID returns the ColumnID of the vector column of the vector index.
// This is always the last column in KeyColumnIDs. Panics if the index is not a
// vector index.
func (desc *IndexDescriptor) VectorColumnID() ColumnID {
if desc.Type != idxtype.VECTOR {
panic(errors.AssertionFailedf("index is not a vector index"))
}
return desc.KeyColumnIDs[len(desc.KeyColumnIDs)-1]
}

// VectorColumnName returns the name of the vector column of the vector index.
// This is always the last column in KeyColumnNames. Panics if the index is
// not a vector index.
func (desc *IndexDescriptor) VectorColumnName() string {
if desc.Type != idxtype.VECTOR {
panic(errors.AssertionFailedf("index is not a vector index"))
}
return desc.KeyColumnNames[len(desc.KeyColumnNames)-1]
}
14 changes: 14 additions & 0 deletions pkg/sql/catalog/table_elements.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/semenumpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecpb"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/intsets"
"github.com/cockroachdb/cockroach/pkg/util/iterutil"
Expand Down Expand Up @@ -174,6 +175,7 @@ type Index interface {
GetPredicate() string
GetType() idxtype.T
GetGeoConfig() geopb.Config
GetVecConfig() vecpb.Config
GetVersion() descpb.IndexDescriptorVersion
GetEncodingType() catenumpb.IndexDescriptorEncodingType

Expand Down Expand Up @@ -223,6 +225,18 @@ type Index interface {
// index.
InvertedColumnKind() catpb.InvertedIndexColumnKind

// VectorColumnName returns the name of the vector column of the vector
// index.
//
// Panics if the index is not a vector index.
VectorColumnName() string

// VectorColumnID returns the ColumnID of the vector column of the vector
// index.
//
// Panics if the index is not a vector index.
VectorColumnID() descpb.ColumnID

NumPrimaryStoredColumns() int
NumSecondaryStoredColumns() int
GetStoredColumnID(storedColumnOrdinal int) descpb.ColumnID
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/catalog/tabledesc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/clusterversion",
"//pkg/docs",
"//pkg/geo/geopb",
"//pkg/jobs/jobspb",
"//pkg/keys",
Expand Down Expand Up @@ -59,6 +58,7 @@ go_library(
"//pkg/sql/sem/volatility",
"//pkg/sql/sqlerrors",
"//pkg/sql/types",
"//pkg/sql/vecindex/vecpb",
"//pkg/util",
"//pkg/util/errorutil/unimplemented",
"//pkg/util/hlc",
Expand Down Expand Up @@ -122,6 +122,7 @@ go_test(
"//pkg/sql/sem/idxtype",
"//pkg/sql/sem/semenumpb",
"//pkg/sql/types",
"//pkg/sql/vecindex/vecpb",
"//pkg/testutils",
"//pkg/testutils/serverutils",
"//pkg/testutils/sqlutils",
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/catalog/tabledesc/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecpb"
"github.com/cockroachdb/cockroach/pkg/util/iterutil"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
Expand Down Expand Up @@ -223,6 +224,20 @@ func (w index) InvertedColumnKind() catpb.InvertedIndexColumnKind {
return w.desc.InvertedColumnKinds[0]
}

// VectorColumnID returns the ColumnID of the vector column of the vector index.
// This is always the last column in KeyColumnIDs. Panics if the index is not a
// vector index.
func (w index) VectorColumnID() descpb.ColumnID {
return w.desc.VectorColumnID()
}

// VectorColumnName returns the name of the vector column of the vector index.
// This is always the last column in KeyColumnIDs. Panics if the index is not a
// vector index.
func (w index) VectorColumnName() string {
return w.desc.VectorColumnName()
}

// CollectKeyColumnIDs creates a new set containing the column IDs in the key
// of this index.
func (w index) CollectKeyColumnIDs() catalog.TableColSet {
Expand Down Expand Up @@ -265,6 +280,11 @@ func (w index) GetGeoConfig() geopb.Config {
return w.desc.GeoConfig
}

// GetVecConfig returns the vec config in the index descriptor.
func (w index) GetVecConfig() vecpb.Config {
return w.desc.VecConfig
}

// GetSharded returns the ShardedDescriptor in the index descriptor
func (w index) GetSharded() catpb.ShardedDescriptor {
return w.desc.Sharded
Expand Down
16 changes: 14 additions & 2 deletions pkg/sql/catalog/tabledesc/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/internal/validate"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecpb"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
Expand Down Expand Up @@ -57,17 +58,19 @@ func TestIndexInterface(t *testing.T) {
c5 VARCHAR,
c6 JSONB,
c7 GEOGRAPHY(GEOMETRY,4326) NULL,
c8 VECTOR(3),
CONSTRAINT pk PRIMARY KEY (c1 ASC, c2 ASC, c3 ASC),
INDEX s1 (c4 DESC, c5 DESC),
INVERTED INDEX s2 (c6),
INDEX s3 (c2, c3) STORING (c5, c6),
INDEX s4 (c5) USING HASH WITH (bucket_count=8),
UNIQUE INDEX s5 (c1, c4) WHERE c4 = 'x',
INVERTED INDEX s6 (c7) WITH (s2_level_mod=2)
INVERTED INDEX s6 (c7) WITH (s2_level_mod=2),
VECTOR INDEX s7 (c8)
);
`)

indexNames := []string{"pk", "s1", "s2", "s3", "s4", "s5", "s6"}
indexNames := []string{"pk", "s1", "s2", "s3", "s4", "s5", "s6", "s7"}
indexColumns := [][]string{
{"c1", "c2", "c3"},
{"c4", "c5"},
Expand All @@ -76,6 +79,7 @@ func TestIndexInterface(t *testing.T) {
{"crdb_internal_c5_shard_8", "c5"},
{"c1", "c4"},
{"c7"},
{"c8"},
}
extraColumnsAsPkColOrdinals := [][]int{
{},
Expand All @@ -85,6 +89,7 @@ func TestIndexInterface(t *testing.T) {
{0, 1, 2},
{1, 2},
{0, 1, 2},
{0, 1, 2},
}

immutable := desctestutils.TestingGetPublicTableDescriptor(db, s.Codec(), "d", "t")
Expand All @@ -110,6 +115,7 @@ func TestIndexInterface(t *testing.T) {
s4 := indexes[4]
s5 := indexes[5]
s6 := indexes[6]
s7 := indexes[7]

// Check that GetPrimaryIndex returns the primary index.
require.Equal(t, pk, tableI.GetPrimaryIndex())
Expand Down Expand Up @@ -265,6 +271,7 @@ func TestIndexInterface(t *testing.T) {
require.Equal(t, "c4 = 'x':::STRING", s5.GetPredicate())
require.Equal(t, "crdb_internal_c5_shard_8", s4.GetShardColumnName())
require.Equal(t, int32(2), s6.GetGeoConfig().S2Geography.S2Config.LevelMod)
require.Equal(t, int64(3), s7.GetVecConfig().Dims)
for _, idx := range indexes {
require.Equalf(t, idx == s5, idx.IsPartial(),
errMsgFmt, "IsPartial", idx.GetName())
Expand All @@ -284,6 +291,9 @@ func TestIndexInterface(t *testing.T) {
errMsgFmt, "GetSharded", idx.GetName())
require.Equalf(t, idx != s3, idx.NumSecondaryStoredColumns() == 0,
errMsgFmt, "NumSecondaryStoredColumns", idx.GetName())
vecConfig := idx.GetVecConfig()
require.Equal(t, idx == s7, !(&vecpb.Config{}).Equal(&vecConfig),
errMsgFmt, "GetVecConfig", idx.GetName())
}

// Check index columns.
Expand Down Expand Up @@ -326,6 +336,8 @@ func TestIndexInterface(t *testing.T) {
require.Equal(t, 2, s3.NumSecondaryStoredColumns())
require.Equal(t, "c5", s3.GetStoredColumnName(0))
require.Equal(t, "c6", s3.GetStoredColumnName(1))
require.Equal(t, s7.GetKeyColumnID(0), s7.VectorColumnID())
require.Equal(t, "c8", s7.VectorColumnName())
}

// TestIndexStrictColumnIDs tests that the index format version value
Expand Down
Loading

0 comments on commit a38f721

Please sign in to comment.