From 6d711779d2c635fe18965422838ae79417d07650 Mon Sep 17 00:00:00 2001 From: Florian Dieminger Date: Tue, 30 Jan 2024 17:58:27 +0100 Subject: [PATCH] feat(ai-help): switch to markdown context (#410) * chore(ai-help): wrap docs in tags for context * chore(ai-help): use mdn_doc_macro.markdown column Co-authored-by: Claas Augner --- src/ai/constants.rs | 50 ++++++++++++++++++++++++++++++++++++++++++-- src/ai/embeddings.rs | 4 ++-- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/ai/constants.rs b/src/ai/constants.rs index c990915f..a4cf82a4 100644 --- a/src/ai/constants.rs +++ b/src/ai/constants.rs @@ -16,6 +16,13 @@ pub struct AIHelpConfig { pub make_context: fn(Vec) -> String, } +fn join_with_tags(related_docs: Vec) -> String { + related_docs + .into_iter() + .flat_map(|d| ["
".to_string(), d.content, "
".to_string()]) + .join("\n") +} + pub const AI_HELP_GPT3_5_FULL_DOC_NEW_PROMPT: AIHelpConfig = AIHelpConfig { name: "20230901-full_doc-new_prompt", model: "gpt-3.5-turbo-1106", @@ -25,7 +32,7 @@ pub const AI_HELP_GPT3_5_FULL_DOC_NEW_PROMPT: AIHelpConfig = AIHelpConfig { token_limit: 16_384, context_limit: 12_000, max_completion_tokens: 2_048, - make_context: |related_docs| related_docs.into_iter().map(|d| d.content).join("\n"), + make_context: join_with_tags, }; pub const AI_HELP_GPT4_FULL_DOC_NEW_PROMPT: AIHelpConfig = AIHelpConfig { @@ -37,7 +44,7 @@ pub const AI_HELP_GPT4_FULL_DOC_NEW_PROMPT: AIHelpConfig = AIHelpConfig { token_limit: 32_768, context_limit: 20_000, max_completion_tokens: 4_096, - make_context: |related_docs| related_docs.into_iter().map(|d| d.content).join("\n"), + make_context: join_with_tags, }; pub const MODEL: &str = "gpt-3.5-turbo"; @@ -69,3 +76,42 @@ outputted in markdown format.\ "; pub const AI_EXPLAIN_VERSION: i64 = 1; + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_join() { + let related_docs = vec![ + RelatedDoc { + url: "".into(), + title: "".into(), + content: "content1".into(), + similarity: 0f64, + }, + RelatedDoc { + url: "".into(), + title: "".into(), + content: "content2".into(), + similarity: 0f64, + }, + RelatedDoc { + url: "".into(), + title: "".into(), + content: "content3".into(), + similarity: 0f64, + }, + ]; + let expected = r#"
+content1 +
+
+content2 +
+
+content3 +
"#; + assert_eq!(join_with_tags(related_docs), expected) + } +} diff --git a/src/ai/embeddings.rs b/src/ai/embeddings.rs index 3f2fc7f7..d72ee5c6 100644 --- a/src/ai/embeddings.rs +++ b/src/ai/embeddings.rs @@ -44,10 +44,10 @@ const MACRO_EMB_DOC_LIMIT: i64 = 5; const MACRO_DOCS_QUERY: &str = "select mdn_doc_macro.mdn_url as url, mdn_doc_macro.title, -mdn_doc_macro.html as content, +mdn_doc_macro.markdown as content, mdn_doc_macro.embedding <=> $1 as similarity from mdn_doc_macro -where length(mdn_doc_macro.html) >= $4 +where length(mdn_doc_macro.markdown) >= $4 and (mdn_doc_macro.embedding <=> $1) < $2 and mdn_doc_macro.mdn_url not like '/en-US/docs/MDN%' order by mdn_doc_macro.embedding <=> $1