From 92541a5af026d31868a3009ab2b35b618b2592cd Mon Sep 17 00:00:00 2001 From: Andi Pieper Date: Mon, 18 Mar 2024 17:48:07 +0100 Subject: [PATCH] feat(ai-help): delete ai history after six months (#437) --- .settings.dev.toml | 1 + .settings.local.toml | 7 ++- .settings.test.toml | 1 + src/api/admin.rs | 20 +++++++ src/db/ai_history.rs | 35 +++++++++++ src/db/mod.rs | 1 + src/settings.rs | 1 + tests/api/ai_help_history.rs | 109 ++++++++++++++++++++++++++++++++++- 8 files changed, 171 insertions(+), 4 deletions(-) create mode 100644 src/db/ai_history.rs diff --git a/.settings.dev.toml b/.settings.dev.toml index 84b465d9..bf3e8810 100644 --- a/.settings.dev.toml +++ b/.settings.dev.toml @@ -51,5 +51,6 @@ flag_repo = "flags" [ai] api_key = "" limit_reset_duration_in_sec = 3600 +history_deletion_period_in_sec = 15_778_476 trigger_error_for_search_term = "Please give me an error in the search phase of AI conversation" trigger_error_for_chat_term = "Please give me an error in the chat phase of the AI conversation" diff --git a/.settings.local.toml b/.settings.local.toml index 3d5f9962..15b70ed0 100644 --- a/.settings.local.toml +++ b/.settings.local.toml @@ -13,10 +13,10 @@ scopes = "openid profile email profile:subscriptions" auth_cookie_name = "auth-cookie" login_cookie_name = "login-cookie" auth_cookie_secure = false -client_id="TEST_CLIENT_ID" -client_secret="TEST_CLIENT_SECRET" +client_id = "TEST_CLIENT_ID" +client_secret = "TEST_CLIENT_SECRET" cookie_key = "DUwIFZuUYzRhHPlhOm6DwTHSDUSyR5SyvZHIeHdx4DIanxm5/GD/4dqXROLvn5vMofOYUq37HhhivjCyMCWP4w==" -admin_update_bearer_token="TEST_TOKEN" +admin_update_bearer_token = "TEST_TOKEN" [application] document_base_url = "https://developer.allizom.org" @@ -51,3 +51,4 @@ flag_repo = "flags" [ai] api_key = "" limit_reset_duration_in_sec = 3600 +history_deletion_period_in_sec = 15_778_476 diff --git a/.settings.test.toml b/.settings.test.toml index c2e4ee67..167f726c 100644 --- a/.settings.test.toml +++ b/.settings.test.toml @@ -52,5 +52,6 @@ flag_repo = "flags" limit_reset_duration_in_sec = 5 api_key = "" explain_sign_key = "kmMAMku9PB/fTtaoLg82KjTvShg8CSZCBUNuJhUz5Pg=" +history_deletion_period_in_sec = 15_778_476 trigger_error_for_search_term = "Please give me an error in the search phase of the AI conversation" trigger_error_for_chat_term = "Please give me an error in the chat phase of the AI conversation" diff --git a/src/api/admin.rs b/src/api/admin.rs index 4364742f..909f1dda 100644 --- a/src/api/admin.rs +++ b/src/api/admin.rs @@ -1,6 +1,11 @@ +use crate::db::ai_history::do_delete_old_ai_history; use crate::db::v2::synchronize_bcd_updates_db::update_bcd; +use crate::db::Pool; use crate::settings::SETTINGS; +use actix_rt::ArbiterHandle; use actix_web::dev::{HttpServiceFactory, ServiceRequest}; +use actix_web::web::Data; +use actix_web::HttpResponse; use actix_web::{web, Error}; use actix_web_httpauth::extractors::bearer::BearerAuth; use actix_web_httpauth::middleware::HttpAuthentication; @@ -22,4 +27,19 @@ pub fn admin_service() -> impl HttpServiceFactory { web::scope("/admin-api") .wrap(HttpAuthentication::bearer(validator)) .service(web::resource("/v2/updates/").route(web::post().to(update_bcd))) + .service(web::resource("/ai-history/").route(web::post().to(delete_old_ai_history))) +} + +pub async fn delete_old_ai_history( + pool: Data, + arbiter: Data, +) -> Result { + if !arbiter.spawn(async move { + if let Err(e) = do_delete_old_ai_history(pool).await { + error!("{}", e); + } + }) { + return Ok(HttpResponse::InternalServerError().finish()); + } + Ok(HttpResponse::Accepted().finish()) } diff --git a/src/db/ai_history.rs b/src/db/ai_history.rs new file mode 100644 index 00000000..a7903b5c --- /dev/null +++ b/src/db/ai_history.rs @@ -0,0 +1,35 @@ +use crate::db::schema::ai_help_history; +use crate::db::Pool; +use crate::diesel::QueryDsl; +use crate::{api::error::ApiError, settings::SETTINGS}; +use actix_web::web::Data; +use chrono::Utc; +use diesel::{ExpressionMethods, RunQueryDsl}; +use std::ops::Sub; +use std::time::Duration; + +/// This removes old AI history records from the database. It is meant to be called from a +/// cron job calling the respective endpoint in the admin API. +pub async fn do_delete_old_ai_history(pool: Data) -> Result<(), ApiError> { + let mut conn = pool.get()?; + let history_deletion_period_in_sec = SETTINGS + .ai + .as_ref() + .map(|ai| ai.history_deletion_period_in_sec) + .ok_or(ApiError::Generic( + "ai.history_deletion_period_in_sec missing from configuration".to_string(), + ))?; + + let oldest_timestamp = Utc::now() + .sub(Duration::from_secs(history_deletion_period_in_sec)) + .naive_utc(); + + let affected_rows = diesel::delete( + ai_help_history::table.filter(ai_help_history::updated_at.lt(oldest_timestamp)), + ) + .execute(&mut conn)?; + info!( + "Deleted old AI history before {oldest_timestamp}: {affected_rows} old record(s) deleted." + ); + Ok(()) +} diff --git a/src/db/mod.rs b/src/db/mod.rs index f4772964..764c7e6a 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,5 +1,6 @@ pub mod ai_explain; pub mod ai_help; +pub mod ai_history; pub mod documents; pub mod error; pub mod fxa_webhook; diff --git a/src/settings.rs b/src/settings.rs index 1f2e4dee..aee47086 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -83,6 +83,7 @@ pub struct AI { pub limit_reset_duration_in_sec: i64, #[serde_as(as = "Base64")] pub explain_sign_key: [u8; 32], + pub history_deletion_period_in_sec: u64, } #[serde_as] diff --git a/tests/api/ai_help_history.rs b/tests/api/ai_help_history.rs index c00f3f25..296e5a72 100644 --- a/tests/api/ai_help_history.rs +++ b/tests/api/ai_help_history.rs @@ -5,11 +5,19 @@ use actix_web::test; use anyhow::Error; use async_openai::types::ChatCompletionRequestMessage; use async_openai::types::Role::{Assistant, User}; +use chrono::{NaiveDateTime, Utc}; +use diesel::dsl::count; +use diesel::prelude::*; +use diesel::{insert_into, ExpressionMethods, RunQueryDsl}; use rumba::ai::help::RefDoc; use rumba::db::ai_help::{add_help_history, add_help_history_message}; -use rumba::db::model::{AIHelpHistoryMessageInsert, SettingsInsert}; +use rumba::db::model::{AIHelpHistoryInsert, AIHelpHistoryMessageInsert, SettingsInsert}; +use rumba::db::schema::ai_help_history; use rumba::db::settings::create_or_update_settings; +use rumba::settings::SETTINGS; use serde_json::Value::Null; +use std::ops::Sub; +use std::time::Duration; use uuid::Uuid; const CHAT_ID: Uuid = Uuid::nil(); @@ -95,6 +103,105 @@ async fn test_history() -> Result<(), Error> { Ok(()) } +#[actix_rt::test] +#[stubr::mock(port = 4321)] +async fn test_history_deletion() -> Result<(), Error> { + let pool = reset()?; + let app = test_app_with_login(&pool).await.unwrap(); + let service = test::init_service(app).await; + let mut _logged_in_client = TestHttpClient::new(&service).await; + let mut conn = pool.get()?; + + let history_deletion_period_in_sec = &SETTINGS + .ai + .as_ref() + .map(|ai| ai.history_deletion_period_in_sec) + .expect("ai.history_deletion_period_in_sec missing"); + + // Add an old chat history entry, double our configured period ago. + let ts = Utc::now() + .sub(Duration::from_secs(history_deletion_period_in_sec * 2)) + .naive_utc(); + let history = AIHelpHistoryInsert { + user_id: 1, + chat_id: Uuid::from_u128(1), + created_at: Some(ts), + updated_at: Some(ts), + label: "old entry".to_string(), + }; + insert_into(ai_help_history::table) + .values(history) + .execute(&mut conn)?; + + // Add a newer chat history entry, half of our configured period ago. + let ts = Utc::now() + // .checked_sub_months(Months::new(2)) + .sub(Duration::from_secs(history_deletion_period_in_sec / 2)) + .naive_utc(); + let history = AIHelpHistoryInsert { + user_id: 1, + chat_id: Uuid::from_u128(2), + created_at: Some(ts), + updated_at: Some(ts), + label: "new entry".to_string(), + }; + insert_into(ai_help_history::table) + .values(history) + .execute(&mut conn)?; + + // check the history count before we run the delete job + let rec_count = ai_help_history::table + .filter(ai_help_history::user_id.eq(1)) + .select(count(ai_help_history::user_id)) + .first::(&mut conn)?; + + assert_eq!(2, rec_count); + + // Now, run the delete job. + let req = test::TestRequest::post() + .uri("/admin-api/ai-history/") + .insert_header(( + "Authorization", + format!("Bearer {}", SETTINGS.auth.admin_update_bearer_token), + )) + .to_request(); + + let res = test::call_service(&service, req).await; + assert!(res.response().status().is_success()); + + // Check database that the old entry is gone. + // Loop until we see the old entry is gone because the + // delete job runs asynchonously. + let mut retry = 0; + const MAX_RETRIES: u32 = 40; + let mut records: Vec; + loop { + records = ai_help_history::table + .filter(ai_help_history::user_id.eq(1)) + .select(ai_help_history::updated_at) + .get_results(&mut conn)?; + if records.len() == 1 { + break; + } + + actix_rt::time::sleep(Duration::from_millis(5)).await; + retry += 1; + if retry > MAX_RETRIES { + break; + } + } + + assert_eq!(1, records.len()); + // we only compare up to millisecond precision + assert_eq!( + ts.timestamp_micros(), + records.get(0).unwrap().timestamp_micros() + ); + + drop_stubr(stubr).await; + Ok(()) +} + fn normalize_digits(s: &str) -> String { let mut result = String::new();