From 435dca8f3d3d218ae2f193a8aa35edd3cc92f539 Mon Sep 17 00:00:00 2001 From: dnwpark Date: Wed, 26 Feb 2025 17:54:31 -0500 Subject: [PATCH 1/2] Add a default value for embedding_model_max_batch_tokens. --- edb/buildmeta.py | 2 +- edb/lib/ext/ai.edgeql | 3 ++- tests/test_edgeql_data_migration.py | 41 +++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/edb/buildmeta.py b/edb/buildmeta.py index 996b1b0f421..647181562b3 100644 --- a/edb/buildmeta.py +++ b/edb/buildmeta.py @@ -60,7 +60,7 @@ # The merge conflict there is a nice reminder that you probably need # to write a patch in edb/pgsql/patches.py, and then you should preserve # the old value. -EDGEDB_CATALOG_VERSION = 2025_02_04_00_00 +EDGEDB_CATALOG_VERSION = 2025_02_26_00_00 EDGEDB_MAJOR_VERSION = 7 diff --git a/edb/lib/ext/ai.edgeql b/edb/lib/ext/ai.edgeql index 508f12b201a..c2be5628da6 100644 --- a/edb/lib/ext/ai.edgeql +++ b/edb/lib/ext/ai.edgeql @@ -179,8 +179,9 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { { create annotation ext::ai::embedding_model_max_input_tokens := ""; + # for now, use the openai batch limit as the default. create annotation - ext::ai::embedding_model_max_batch_tokens := ""; + ext::ai::embedding_model_max_batch_tokens := "8191"; create annotation ext::ai::embedding_model_max_output_dimensions := ""; create annotation diff --git a/tests/test_edgeql_data_migration.py b/tests/test_edgeql_data_migration.py index e418522928c..e7a7f4f4a0c 100644 --- a/tests/test_edgeql_data_migration.py +++ b/tests/test_edgeql_data_migration.py @@ -12897,6 +12897,47 @@ async def test_edgeql_migration_ai_09(self): }; ''', explicit_modules=True) + async def test_edgeql_migration_ai_10(self): + # EmbeddingModel with default embedding_model_max_batch_tokens. + + await self.migrate(''' + using extension ai; + + module default { + type TestEmbeddingModel + extending ext::ai::EmbeddingModel + { + annotation ext::ai::model_name := "text-embedding-test"; + annotation ext::ai::model_provider := "custom::test"; + annotation ext::ai::embedding_model_max_input_tokens + := "8191"; + annotation ext::ai::embedding_model_max_output_dimensions + := "10"; + annotation ext::ai::embedding_model_supports_shortening + := "true"; + }; + }; + ''', explicit_modules=True) + + await self.assert_query_result( + r""" + with model := ( + select schema::ObjectType { + x := ( + select (.annotations, .annotations@value) + filter ( + .0.name + = 'ext::ai::embedding_model_max_batch_tokens' + ) + ) + } + filter .name = 'default::TestEmbeddingModel' + ) + select model.x.1 + """, + ['8191'], + ) + class EdgeQLMigrationRewriteTestCase(EdgeQLDataMigrationTestCase): DEFAULT_MODULE = 'default' From 6c39982b9f314036124f11b0b1db7a4044a536eb Mon Sep 17 00:00:00 2001 From: dnwpark Date: Wed, 26 Feb 2025 17:57:23 -0500 Subject: [PATCH 2/2] Update docs. --- docs/reference/ai/extai.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/reference/ai/extai.rst b/docs/reference/ai/extai.rst index aa2d0fba2b5..8f52dd36bc8 100644 --- a/docs/reference/ai/extai.rst +++ b/docs/reference/ai/extai.rst @@ -367,6 +367,7 @@ Anthropic (`documentation