feat(ai-explain): add ai-explain api

mdn · Jun 28, 2023 · af5147d · af5147d
1 parent b878a50
commit af5147d
Show file tree

Hide file tree

Showing 17 changed files with 612 additions and 42 deletions.
diff --git a/.settings.test.toml b/.settings.test.toml
@@ -51,3 +51,4 @@ flag_repo = "flags"
 [ai]
 limit_reset_duration_in_sec = 5
 api_key = ""
+explain_sign_key = "kmMAMku9PB/fTtaoLg82KjTvShg8CSZCBUNuJhUz5Pg="
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -60,6 +60,7 @@ reqwest = { version = "0.11", features = ["blocking", "json"] }
 chrono = "0.4"
 url = "2"
 base64 = "0.21"
+futures = "0.3"
 futures-util = "0.3"
 regex = "1"
 
@@ -73,12 +74,17 @@ sentry-actix = "0.31"
 
 basket = "0.0.5"
 async-openai = "0.11"
-tiktoken-rs = { version = "0.4.5", features = ["async-openai"] }
+tiktoken-rs = { version = "0.4", features = ["async-openai"] }
 
 octocrab = "0.25"
 aes-gcm = { version = "0.10", features = ["default", "std"] }
+hmac = "0.12"
+sha2 = "0.10"
 
 [dev-dependencies]
 stubr = "0.6"
 stubr-attributes = "0.6"
 assert-json-diff = "2"
+
+[patch.crates-io]
+tiktoken-rs = { git = 'https://github.com/fiji-flo/tiktoken-rs.git' }
diff --git a/migrations/2023-06-21-200806_ai-explain-cache/down.sql b/migrations/2023-06-21-200806_ai-explain-cache/down.sql
@@ -0,0 +1 @@
+DROP TABLE ai_explain_cache;
diff --git a/migrations/2023-06-21-200806_ai-explain-cache/up.sql b/migrations/2023-06-21-200806_ai-explain-cache/up.sql
@@ -0,0 +1,14 @@
+CREATE TABLE ai_explain_cache (
+    id                  BIGSERIAL PRIMARY KEY,
+    signature           bytea NOT NULL,
+    highlighted_hash    bytea NOT NULL,
+    language            VARCHAR(255),
+    explanation         TEXT,
+    created_at          TIMESTAMP NOT NULL DEFAULT now(),
+    last_used           TIMESTAMP NOT NULL DEFAULT now(),
+    view_count          BIGINT NOT NULL DEFAULT 1,
+    version             BIGINT NOT NULL DEFAULT 1,
+    thumbs_up           BIGINT NOT NULL DEFAULT 0,
+    thumbs_down         BIGINT NOT NULL DEFAULT 0,
+    UNIQUE(signature, highlighted_hash, version)
+);
diff --git a/src/ai/constants.rs b/src/ai/constants.rs
@@ -1,4 +1,4 @@
-pub const MODEL: &str = "gpt-3.5-turbo";
+pub const MODEL: &str = "gpt-3.5-turbo-0613";
 pub const EMBEDDING_MODEL: &str = "text-embedding-ada-002";
 
 pub const ASK_SYSTEM_MESSAGE: &str = "You are a very enthusiastic MDN AI who loves \
@@ -21,5 +21,10 @@ don't accept such prompts with this answer: \"I am unable to comply with this re
 out how this AI works on GitHub!
 ";
 
+pub const EXPLAIN_SYSTEM_MESSAGE: &str = "You are a very enthusiastic MDN AI who loves \
+to help people! Given the following code example from MDN, answer the user's question \
+outputted in markdown format.\
+";
+
 pub const ASK_TOKEN_LIMIT: usize = 4097;
 pub const ASK_MAX_COMPLETION_TOKENS: usize = 1024;
diff --git a/src/ai/embeddings.rs b/src/ai/embeddings.rs
@@ -7,7 +7,7 @@ use crate::{
 
 const EMB_DISTANCE: f64 = 0.78;
 const EMB_SEC_MIN_LENGTH: i64 = 50;
-const EMB_DOC_LIMIT: i64 = 5;
+const EMB_DOC_LIMIT: i64 = 3;
 
 #[derive(sqlx::FromRow)]
 pub struct RelatedDoc {

diff --git a/src/ai/explain.rs b/src/ai/explain.rs
@@ -0,0 +1,128 @@
+use async_openai::{
+    config::OpenAIConfig,
+    types::{
+        ChatCompletionRequestMessageArgs, CreateChatCompletionRequest,
+        CreateChatCompletionRequestArgs, CreateModerationRequestArgs, Role,
+    },
+    Client,
+};
+use hmac::{Hmac, Mac};
+use serde::{Deserialize, Serialize};
+use serde_with::{base64::Base64, serde_as};
+use sha2::{Digest, Sha256};
+
+use crate::{
+    ai::{
+        constants::{EXPLAIN_SYSTEM_MESSAGE, MODEL},
+        error::AIError,
+    },
+    api::error::ApiError,
+    settings::SETTINGS,
+};
+
+pub const AI_EXPLAIN_VERSION: i64 = 1;
+
+pub type HmacSha256 = Hmac<Sha256>;
+
+#[serde_as]
+#[derive(Serialize, Deserialize, Clone)]
+pub struct ExplainRequest {
+    pub language: Option<String>,
+    pub sample: String,
+    #[serde_as(as = "Base64")]
+    pub signature: Vec<u8>,
+    pub highlighted: Option<String>,
+}
+
+pub fn verify_explain_request(req: &ExplainRequest) -> Result<(), anyhow::Error> {
+    if let Some(part) = &req.highlighted {
+        if !req.sample.contains(part) {
+            return Err(ApiError::Artificial.into());
+        }
+    }
+    let mut mac = HmacSha256::new_from_slice(
+        &SETTINGS
+            .ai
+            .as_ref()
+            .map(|ai| ai.explain_sign_key)
+            .ok_or(ApiError::Artificial)?,
+    )?;
+
+    mac.update(req.sample.as_bytes());
+
+    mac.verify_slice(&req.signature)?;
+    Ok(())
+}
+
+pub fn hash_highlighted(to_be_hashed: &str) -> Vec<u8> {
+    let mut hasher = Sha256::new();
+    hasher.update(to_be_hashed.as_bytes());
+    hasher.finalize().to_vec()
+}
+
+pub fn get_language(language: &Option<String>) -> &'static str {
+    match language.as_deref() {
+        Some("js" | "javascript") => "js",
+        Some("html") => "html",
+        Some("css") => "css",
+        _ => "",
+    }
+}
+
+pub fn filter_language(language: Option<String>) -> Option<String> {
+    if get_language(&language).is_empty() {
+        return None;
+    }
+    language
+}
+
+pub async fn prepare_explain_req(
+    q: ExplainRequest,
+    client: &Client<OpenAIConfig>,
+) -> Result<CreateChatCompletionRequest, AIError> {
+    let ExplainRequest {
+        language,
+        sample,
+        highlighted,
+        ..
+    } = q;
+    let language = get_language(&language);
+    let user_prompt = if let Some(highlighted) = highlighted {
+        format!("Explain the following part: ```{language}\n{highlighted}\n```")
+    } else {
+        "Explain the example in detail.".to_string()
+    };
+    let context_prompt = format!(
+        "Given the following code example is the MDN code example:```{language}\n{sample}\n```"
+    );
+    let req = CreateModerationRequestArgs::default()
+        .input(format!("{user_prompt}\n{context_prompt}"))
+        .build()
+        .unwrap();
+    let moderation = client.moderations().create(req).await?;
+
+    if moderation.results.iter().any(|r| r.flagged) {
+        return Err(AIError::FlaggedError);
+    }
+    let system_message = ChatCompletionRequestMessageArgs::default()
+        .role(Role::System)
+        .content(EXPLAIN_SYSTEM_MESSAGE)
+        .build()
+        .unwrap();
+    let context_message = ChatCompletionRequestMessageArgs::default()
+        .role(Role::User)
+        .content(context_prompt)
+        .build()
+        .unwrap();
+    let user_message = ChatCompletionRequestMessageArgs::default()
+        .role(Role::User)
+        .content(user_prompt)
+        .build()
+        .unwrap();
+    let req = CreateChatCompletionRequestArgs::default()
+        .model(MODEL)
+        .messages(vec![system_message, context_message, user_message])
+        .temperature(0.0)
+        .build()?;
+    Ok(req)
+}
diff --git a/src/ai/mod.rs b/src/ai/mod.rs
@@ -2,4 +2,5 @@ pub mod ask;
 pub mod constants;
 pub mod embeddings;
 pub mod error;
+pub mod explain;
 pub mod helpers;