geldata · dnwpark · Feb 27, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/docs/reference/ai/extai.rst b/docs/reference/ai/extai.rst
@@ -367,6 +367,7 @@ Anthropic (`documentation <https://docs.anthropic.com/claude/docs/models-overvie
     Abstract base type for AI models.
 
     Annotations:
+
     * ``model_name`` - Model identifier
     * ``model_provider`` - Provider identifier
 
@@ -377,8 +378,9 @@ Anthropic (`documentation <https://docs.anthropic.com/claude/docs/models-overvie
     Abstract type for embedding models.
 
     Annotations:
+
     * ``embedding_model_max_input_tokens`` - Maximum tokens per input
-    * ``embedding_model_max_batch_tokens`` - Maximum tokens per batch
+    * ``embedding_model_max_batch_tokens`` - Maximum tokens per batch. Default: ``'8191'``.
     * ``embedding_model_max_output_dimensions`` - Maximum embedding dimensions
     * ``embedding_model_supports_shortening`` - Input shortening support flag
 
@@ -391,6 +393,7 @@ Anthropic (`documentation <https://docs.anthropic.com/claude/docs/models-overvie
     Abstract type for text generation models.
 
     Annotations:
+
     * ``text_gen_model_context_window`` - Model's context window size
 
 
@@ -474,6 +477,7 @@ Example custom prompt configuration:
     Type for chat prompt messages.
 
     Properties:
+
     * ``participant_role``: ChatParticipantRole (Required)
     * ``participant_name``: str (Optional)
     * ``content``: str (Required)
@@ -485,6 +489,7 @@ Example custom prompt configuration:
     Type for chat prompt configuration.
 
     Properties:
+
     * ``name``: str (Required)
     * ``messages``: set of ChatPromptMessage (Required)
 
diff --git a/edb/buildmeta.py b/edb/buildmeta.py
@@ -60,7 +60,7 @@
 # The merge conflict there is a nice reminder that you probably need
 # to write a patch in edb/pgsql/patches.py, and then you should preserve
 # the old value.
-EDGEDB_CATALOG_VERSION = 2025_02_04_00_00
+EDGEDB_CATALOG_VERSION = 2025_02_26_00_00
 EDGEDB_MAJOR_VERSION = 7
 
 

diff --git a/edb/lib/ext/ai.edgeql b/edb/lib/ext/ai.edgeql
@@ -179,8 +179,9 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' {
     {
         create annotation
             ext::ai::embedding_model_max_input_tokens := "<must override>";
+        # for now, use the openai batch limit as the default.
         create annotation
-            ext::ai::embedding_model_max_batch_tokens := "<must override>";
+            ext::ai::embedding_model_max_batch_tokens := "8191";
         create annotation
             ext::ai::embedding_model_max_output_dimensions := "<must override>";
         create annotation

diff --git a/tests/test_edgeql_data_migration.py b/tests/test_edgeql_data_migration.py
@@ -12897,6 +12897,47 @@ async def test_edgeql_migration_ai_09(self):
                 };
             ''', explicit_modules=True)
 
+    async def test_edgeql_migration_ai_10(self):
+        # EmbeddingModel with default embedding_model_max_batch_tokens.
+
+        await self.migrate('''
+            using extension ai;
+
+            module default {
+                type TestEmbeddingModel
+                    extending ext::ai::EmbeddingModel
+                {
+                    annotation ext::ai::model_name := "text-embedding-test";
+                    annotation ext::ai::model_provider := "custom::test";
+                    annotation ext::ai::embedding_model_max_input_tokens
+                      := "8191";
+                    annotation ext::ai::embedding_model_max_output_dimensions
+                      := "10";
+                    annotation ext::ai::embedding_model_supports_shortening
+                      := "true";
+                };
+            };
+        ''', explicit_modules=True)
+
+        await self.assert_query_result(
+            r"""
+                with model := (
+                    select schema::ObjectType {
+                        x := (
+                            select (.annotations, .annotations@value)
+                            filter (
+                                .0.name
+                                = 'ext::ai::embedding_model_max_batch_tokens'
+                            )
+                        )
+                    }
+                    filter .name = 'default::TestEmbeddingModel'
+                )
+                select model.x.1
+            """,
+            ['8191'],
+        )
+
 
 class EdgeQLMigrationRewriteTestCase(EdgeQLDataMigrationTestCase):
     DEFAULT_MODULE = 'default'