diff --git a/migrations/2024-03-29-101252_add_ai_help_metadata_embedding_duration/down.sql b/migrations/2024-03-29-101252_add_ai_help_metadata_embedding_duration/down.sql new file mode 100644 index 00000000..cd3829d6 --- /dev/null +++ b/migrations/2024-03-29-101252_add_ai_help_metadata_embedding_duration/down.sql @@ -0,0 +1,3 @@ +ALTER TABLE ai_help_message_meta +DROP COLUMN embedding_duration, +DROP COLUMN embedding_model; diff --git a/migrations/2024-03-29-101252_add_ai_help_metadata_embedding_duration/up.sql b/migrations/2024-03-29-101252_add_ai_help_metadata_embedding_duration/up.sql new file mode 100644 index 00000000..10a5106e --- /dev/null +++ b/migrations/2024-03-29-101252_add_ai_help_metadata_embedding_duration/up.sql @@ -0,0 +1,3 @@ +ALTER TABLE ai_help_message_meta +ADD COLUMN embedding_duration BIGINT DEFAULT NULL, +ADD COLUMN embedding_model TEXT NOT NULL DEFAULT ''; diff --git a/src/ai/embeddings.rs b/src/ai/embeddings.rs index eb49b406..d6d98db4 100644 --- a/src/ai/embeddings.rs +++ b/src/ai/embeddings.rs @@ -1,8 +1,10 @@ +use std::time::Instant; + use async_openai::{config::OpenAIConfig, types::CreateEmbeddingRequestArgs, Client}; use itertools::Itertools; use crate::{ - ai::{constants::EMBEDDING_MODEL, error::AIError}, + ai::{constants::EMBEDDING_MODEL, error::AIError, help::AIHelpRequestMeta}, db::SupaPool, }; @@ -69,16 +71,22 @@ pub async fn get_related_macro_docs( client: &Client, pool: &SupaPool, prompt: String, + request_meta: &mut AIHelpRequestMeta, ) -> Result, AIError> { + request_meta.embedding_model = Some(EMBEDDING_MODEL); + let embedding_req = CreateEmbeddingRequestArgs::default() .model(EMBEDDING_MODEL) .input(prompt) .build()?; + let start = Instant::now(); let embedding_res = client.embeddings().create(embedding_req).await?; + request_meta.embedding_duration = Some(start.elapsed()); let embedding = pgvector::Vector::from(embedding_res.data.into_iter().next().unwrap().embedding); + let start = Instant::now(); let mut docs: Vec = sqlx::query_as(MACRO_DOCS_QUERY) .bind(embedding) .bind(MACRO_EMB_DISTANCE) @@ -86,6 +94,7 @@ pub async fn get_related_macro_docs( .bind(MACRO_EMB_SEC_MIN_LENGTH) .fetch_all(pool) .await?; + request_meta.search_duration = Some(start.elapsed()); let duplicate_titles: Vec = docs .iter() @@ -108,15 +117,21 @@ pub async fn get_related_full_docs( client: &Client, pool: &SupaPool, prompt: String, + request_meta: &mut AIHelpRequestMeta, ) -> Result, AIError> { + request_meta.embedding_model = Some(EMBEDDING_MODEL); + let embedding_req = CreateEmbeddingRequestArgs::default() .model(EMBEDDING_MODEL) .input(prompt) .build()?; + let start = Instant::now(); let embedding_res = client.embeddings().create(embedding_req).await?; + request_meta.embedding_duration = Some(start.elapsed()); let embedding = pgvector::Vector::from(embedding_res.data.into_iter().next().unwrap().embedding); + let start = Instant::now(); let docs: Vec = sqlx::query_as(FULL_DOCS_QUERY) .bind(embedding) .bind(FULL_EMB_DISTANCE) @@ -124,6 +139,8 @@ pub async fn get_related_full_docs( .bind(FULL_EMB_SEC_MIN_LENGTH) .fetch_all(pool) .await?; + request_meta.search_duration = Some(start.elapsed()); + Ok(docs) } @@ -131,15 +148,21 @@ pub async fn get_related_docs( client: &Client, pool: &SupaPool, prompt: String, + request_meta: &mut AIHelpRequestMeta, ) -> Result, AIError> { + request_meta.embedding_model = Some(EMBEDDING_MODEL); + let embedding_req = CreateEmbeddingRequestArgs::default() .model(EMBEDDING_MODEL) .input(prompt) .build()?; + let start = Instant::now(); let embedding_res = client.embeddings().create(embedding_req).await?; + request_meta.embedding_duration = Some(start.elapsed()); let embedding = pgvector::Vector::from(embedding_res.data.into_iter().next().unwrap().embedding); + let start = Instant::now(); let docs: Vec = sqlx::query_as(DEFAULT_QUERY) .bind(embedding) .bind(DEFAULT_EMB_DISTANCE) @@ -147,5 +170,7 @@ pub async fn get_related_docs( .bind(DEFAULT_EMB_SEC_MIN_LENGTH) .fetch_all(pool) .await?; + request_meta.search_duration = Some(start.elapsed()); + Ok(docs) } diff --git a/src/ai/help.rs b/src/ai/help.rs index 34b46438..9504ccb1 100644 --- a/src/ai/help.rs +++ b/src/ai/help.rs @@ -1,4 +1,4 @@ -use std::time::{Duration, Instant}; +use std::time::Duration; use async_openai::{ config::OpenAIConfig, @@ -39,7 +39,9 @@ pub struct AIHelpRequest { pub struct AIHelpRequestMeta { pub query_len: Option, pub context_len: Option, + pub embedding_duration: Option, pub search_duration: Option, + pub embedding_model: Option<&'static str>, pub model: Option<&'static str>, pub sources: Option>, } @@ -95,13 +97,23 @@ pub async fn prepare_ai_help_req( .ok_or(AIError::NoUserPrompt)?; request_meta.query_len = Some(last_user_message.len()); - let start = Instant::now(); let related_docs = if config.full_doc { - get_related_macro_docs(client, pool, last_user_message.replace('\n', " ")).await? + get_related_macro_docs( + client, + pool, + last_user_message.replace('\n', " "), + request_meta, + ) + .await? } else { - get_related_docs(client, pool, last_user_message.replace('\n', " ")).await? + get_related_docs( + client, + pool, + last_user_message.replace('\n', " "), + request_meta, + ) + .await? }; - request_meta.search_duration = Some(start.elapsed()); let mut context = vec![]; let mut refs = vec![]; diff --git a/src/api/ai_help.rs b/src/api/ai_help.rs index ada3e413..9d0cbaa1 100644 --- a/src/api/ai_help.rs +++ b/src/api/ai_help.rs @@ -1,4 +1,7 @@ -use std::{future, time::Instant}; +use std::{ + future, + time::{Duration, Instant}, +}; use actix_identity::Identity; use actix_web::{ @@ -503,15 +506,13 @@ pub async fn ai_help( message_id, parent_id, created_at: Some(created_at.naive_utc()), - search_duration: default_meta_big_int( - ai_help_req_meta.search_duration.map(|d| d.as_millis()), - ), - response_duration: default_meta_big_int(Some( - response_duration.as_millis(), - )), + embedding_duration: default_meta_duration(ai_help_req_meta.embedding_duration), + search_duration: default_meta_duration(ai_help_req_meta.search_duration), + response_duration: default_meta_duration(Some(response_duration)), query_len: default_meta_big_int(ai_help_req_meta.query_len), context_len: default_meta_big_int(ai_help_req_meta.context_len), response_len: default_meta_big_int(Some(context.len)), + embedding_model: ai_help_req_meta.embedding_model.unwrap_or_default(), model: ai_help_req_meta.model.unwrap_or(""), status, sources: ai_help_req_meta.sources.as_ref().map(|sources| { @@ -537,11 +538,11 @@ pub async fn ai_help( chat_id, message_id, parent_id, - search_duration: default_meta_big_int( - ai_help_req_meta.search_duration.map(|d| d.as_millis()), - ), + embedding_duration: default_meta_duration(ai_help_req_meta.embedding_duration), + search_duration: default_meta_duration(ai_help_req_meta.search_duration), query_len: default_meta_big_int(ai_help_req_meta.query_len), context_len: default_meta_big_int(ai_help_req_meta.context_len), + embedding_model: ai_help_req_meta.embedding_model.unwrap_or_default(), model: ai_help_req_meta.model.unwrap_or(""), status: (&e).into(), sources: ai_help_req_meta @@ -716,3 +717,7 @@ fn qa_check_for_error_trigger( fn default_meta_big_int(value: Option>) -> Option { value.and_then(|v| v.try_into().ok()) } + +fn default_meta_duration(duration: Option) -> Option { + default_meta_big_int(duration.map(|d| d.as_millis())) +} diff --git a/src/db/model.rs b/src/db/model.rs index e05afe66..41169a1b 100644 --- a/src/db/model.rs +++ b/src/db/model.rs @@ -327,7 +327,9 @@ pub struct AiHelpMessageMetaInsert<'a> { pub parent_id: Option, /// Timestamp at which the message failed or finished. pub created_at: Option, - /// Time it took to search related content in milliseconds. + /// Time it took to generate the embedding in milliseconds. + pub embedding_duration: Option, + /// Time it took to search using the embedding in milliseconds. pub search_duration: Option, /// Time it took to generate the answer in milliseconds. pub response_duration: Option, @@ -337,6 +339,8 @@ pub struct AiHelpMessageMetaInsert<'a> { pub context_len: Option, /// Length of LLM's reply in bytes. pub response_len: Option, + /// Model used to generate the embedding. + pub embedding_model: &'a str, /// Model used to generate the answer. pub model: &'a str, /// Status of the message. diff --git a/src/db/schema.rs b/src/db/schema.rs index 535a792a..fd9baa87 100644 --- a/src/db/schema.rs +++ b/src/db/schema.rs @@ -126,6 +126,8 @@ diesel::table! { model -> Text, status -> AiHelpMessageStatus, sources -> Jsonb, + embedding_duration -> Nullable, + embedding_model -> Text, } }