Implement LangSmith Tracing

zx8086 · Feb 5, 2025 · c926c57 · c926c57
1 parent 448681c
commit c926c57
Show file tree

Hide file tree

Showing 6 changed files with 336 additions and 169 deletions.
diff --git a/bun.lock b/bun.lock
diff --git a/package.json b/package.json
@@ -9,7 +9,7 @@
     "build:docker": "bun run svelte-kit sync && DISABLE_OPENTELEMETRY=true NODE_ENV=production bun --bun vite build",
     "preview": "bun --bun vite preview --host",
     "build:no-telemetry": "LANG=C.UTF-8 LC_ALL=C.UTF-8 LC_CTYPE=C.UTF-8 DISABLE_OPENTELEMETRY=true VITE_BUILD_LEGACY=true VITE_BUILD_SSR=true VITE_BUILD_CJS=true NODE_ENV=production bun --bun vite build",
-    "start": "bun run --preload ./src/instrumentation.ts ./build/index.js",
+    "start": "NODE_DEBUG=http BUN_CONFIG_VERBOSE_FETCH=true bun run --preload ./src/instrumentation.ts ./build/index.js",
     "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
     "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
     "prepublish": "bun run snyk-protect",
@@ -63,6 +63,8 @@
     "@azure/msal-node": "^3.0.1",
     "@elastic/apm-rum": "^5.16.3",
     "@elastic/ecs-winston-format": "^1.5.3",
+    "@langchain/core": "^0.3.38",
+    "@langchain/openai": "^0.4.2",
     "@microsoft/microsoft-graph-client": "^3.0.7",
     "@microsoft/msgraph-sdk": "^1.0.0-preview.30",
     "@openreplay/tracker": "^15.0.3",
@@ -103,6 +105,8 @@
     "graphql": "^16.10.0",
     "graphql-request": "^7.1.2",
     "help": "^3.0.2",
+    "langchain": "^0.3.15",
+    "langsmith": "^0.3.5",
     "mode-watcher": "^0.5.0",
     "net": "^1.0.2",
     "node-cron": "^3.0.3",

diff --git a/src/otlp/MonitoredOTLPExporter.ts b/src/otlp/MonitoredOTLPExporter.ts
@@ -78,6 +78,8 @@ export abstract class MonitoredOTLPExporter<T> {
     this.logTimer = setInterval(() => {
       this.logStatistics();
     }, this.logIntervalMs);
+
+    this.startHealthChecks();
   }
 
   private async verifyDNSPrefetch(): Promise<void> {
@@ -257,4 +259,27 @@ export abstract class MonitoredOTLPExporter<T> {
     items: T,
     resultCallback: (result: ExportResult) => void,
   ): Promise<void>;
+
+  protected async validateEndpoint(): Promise<void> {
+    try {
+      const response = await fetch(this.url, {
+        method: 'OPTIONS',
+        timeout: 5000
+      });
+
+      if (!response.ok) {
+        warn(`OpenTelemetry endpoint ${this.url} returned status ${response.status}`);
+      }
+    } catch (error) {
+      err(`Failed to validate OpenTelemetry endpoint ${this.url}:`, error);
+    }
+  }
+
+  private startHealthChecks(): void {
+    setInterval(() => {
+      this.validateEndpoint();
+      this.checkNetworkConnectivity();
+      this.logSystemResources();
+    }, this.logIntervalMs);
+  }
 }
diff --git a/src/otlp/otlpConfig.ts b/src/otlp/otlpConfig.ts
@@ -4,4 +4,31 @@ import { backendConfig } from "../backend-config";
 
 export const otlpConfig = {
   logIntervalMs: backendConfig.openTelemetry.SUMMARY_LOG_INTERVAL,
+  isEnabled: () => {
+    // Check enable flag first
+    const enableFlag = process.env.ENABLE_OPENTELEMETRY?.toLowerCase();
+    if (enableFlag === 'true') return true;
+
+    // Check disable flag second (for backwards compatibility)
+    const disableFlag = process.env.DISABLE_OPENTELEMETRY?.toLowerCase();
+    if (disableFlag === 'true') return false;
+
+    // Default to enabled if in production, disabled otherwise
+    return process.env.NODE_ENV === 'production';
+  },
+  validateConfig: () => {
+    const enabled = otlpConfig.isEnabled();
+
+    if (enabled) {
+      // Log the configuration state
+      console.debug('OpenTelemetry Configuration:', {
+        ENABLE_OPENTELEMETRY: process.env.ENABLE_OPENTELEMETRY,
+        DISABLE_OPENTELEMETRY: process.env.DISABLE_OPENTELEMETRY,
+        NODE_ENV: process.env.NODE_ENV,
+        isEnabled: enabled
+      });
+    }
+
+    return enabled;
+  }
 };
diff --git a/src/routes/api/chat/+server.ts b/src/routes/api/chat/+server.ts
@@ -2,15 +2,71 @@ import { json } from '@sveltejs/kit';
 import { Pinecone } from "@pinecone-database/pinecone";
 import OpenAI from 'openai';
 import type { RequestHandler } from './$types';
+import { traceable } from "langsmith/traceable";
+import { wrapOpenAI } from "langsmith/wrappers";
 
 // console.log('🔑 Environment Variables:', {
 //     OPENAI_API_KEY: Bun.env.OPENAI_API_KEY,
 //     PINECONE_API_KEY: Bun.env.PINECONE_API_KEY
 // });
 
-// Initialize OpenAI
-const openai = new OpenAI({
+// Initialize OpenAI with LangSmith tracing
+const openai = wrapOpenAI(new OpenAI({
     apiKey: Bun.env.OPENAI_API_KEY
+}));
+
+// Wrap the RAG pipeline with tracing
+const ragPipeline = traceable(async (message: string) => {
+    // Initialize Pinecone
+    const pc = new Pinecone({
+        apiKey: Bun.env.PINECONE_API_KEY as string,
+    });
+
+    // Get index with specific namespace
+    const index = pc.index(Bun.env.PINECONE_INDEX_NAME as string)
+        .namespace(Bun.env.PINECONE_NAMESPACE as string);
+
+    // Generate embedding
+    console.log('🔄 Generating embedding...');
+    const embeddingResponse = await openai.embeddings.create({
+        model: "text-embedding-ada-002",
+        input: message
+    });
+
+    // Query Pinecone
+    const queryResponse = await index.query({
+        vector: embeddingResponse.data[0].embedding,
+        topK: 3,
+        includeMetadata: true
+    });
+
+    // Extract context
+    const context = queryResponse.matches
+        ?.map(match => ({
+            text: match.metadata?.text,
+            filename: match.metadata?.filename || 'Unknown source'
+        }))
+        .filter(item => item.text);
+
+    // Generate completion
+    const stream = await openai.chat.completions.create({
+        model: "gpt-4-turbo-preview",
+        messages: [
+            {
+                role: "system",
+                content: "You are a helpful assistant. Use the following context to answer the user's question. Always end your response with '\n\nReferences:' followed by the source filenames. If you cannot answer the question based on the context, say so."
+            },
+            {
+                role: "user",
+                content: `Context: ${context?.map(c => c.text).join('\n\n---\n\n')}\n\nSource files: ${context?.map(c => c.filename).join(', ')}\n\nQuestion: ${message}`
+            }
+        ],
+        temperature: 0.7,
+        max_tokens: 2000,
+        stream: true
+    });
+
+    return { stream, context };
 });
 
 // Keep existing GET handler
@@ -40,109 +96,23 @@ export const GET: RequestHandler = async () => {
     }
 };
 
-// Add POST handler for chat
+// Modify POST handler to use traced pipeline
 export const POST: RequestHandler = async ({ request }) => {
     const { message } = await request.json();
 
     try {
-        // Initialize Pinecone with correct host URL
-        const pc = new Pinecone({
-            apiKey: Bun.env.PINECONE_API_KEY as string,
-        });
-
-        // Get index with specific namespace
-        const index = pc.index(Bun.env.PINECONE_INDEX_NAME as string).namespace(Bun.env.PINECONE_NAMESPACE as string);
-
-        // Generate embedding
-        console.log('🔄 Generating embedding...');
-        const embeddingResponse = await openai.embeddings.create({
-            model: "text-embedding-ada-002",
-            input: message
-        });
-        console.log('✅ Embedding details:', {
-            dimensions: embeddingResponse.data[0].embedding.length,
-            embedding: `${embeddingResponse.data[0].embedding.slice(0, 3)}...`
-        });
-
-        // Query within that namespace
-        console.log('🔄 Querying Pinecone...', {
-            indexName: "platform-engineering-rag",
-            namespace: "capella-document-search",
-            vectorDimensions: embeddingResponse.data[0].embedding.length
-        });
-
-        const queryResponse = await index.query({
-            vector: embeddingResponse.data[0].embedding,
-            topK: 3,
-            includeMetadata: true
-        });
-
-        console.log('📊 Pinecone matches:', {
-            totalMatches: queryResponse.matches?.length,
-            matches: queryResponse.matches?.map(m => ({
-                score: m.score,
-                metadata: m.metadata,
-                id: m.id
-            }))
-        });
-
-        if (!queryResponse.matches?.length) {
-            console.warn('⚠️ No matches found in Pinecone');
-            return json({
-                response: "I couldn't find any relevant information to answer your question."
-            });
-        }
-
-        // Extract context
-        const context = queryResponse.matches
-            .map(match => ({
-                text: match.metadata?.text,
-                filename: match.metadata?.filename || 'Unknown source'
-            }))
-            .filter(item => item.text);
-
-        console.log('📝 Context details:', {
-            contextLength: context.length,
-            matchCount: queryResponse.matches.length,
-            topMatchScores: queryResponse.matches.map(m => m.score)
-        });
-
-        console.log('🔍 Using RAG Context:', {
-            question: message,
-            retrievedContext: context.map(c => c.text).join('\n\n---\n\n'),
-            similarityScore: queryResponse.matches[0].score
-        });
-
-        const stream = await openai.chat.completions.create({
-            model: "gpt-4-turbo-preview",
-            messages: [
-                {
-                    role: "system",
-                    content: "You are a helpful assistant. Use the following context to answer the user's question. Always end your response with '\n\nReferences:' followed by the source filenames. If you cannot answer the question based on the context, say so."
-                },
-                {
-                    role: "user",
-                    content: `Context: ${context.map(c => c.text).join('\n\n---\n\n')}\n\nSource files: ${context.map(c => c.filename).join(', ')}\n\nQuestion: ${message}`
-                }
-            ],
-            temperature: 0.7,
-            max_tokens: 2000,
-            stream: true
-        });
-
-        // After the response, append the references if they're not already included
-        let fullResponse = '';
+        const { stream, context } = await ragPipeline(message);
 
-        // Match the working implementation's response structure
         return new Response(
             new ReadableStream({
                 async start(controller) {
                     try {
+                        let fullResponse = '';
+
                         for await (const chunk of stream) {
                             const content = chunk.choices[0]?.delta?.content;
                             if (content) {
                                 fullResponse += content;
-                                // Send each chunk as a JSON string with newline delimiter
                                 controller.enqueue(
                                     new TextEncoder().encode(
                                         JSON.stringify({ content }) + '\n'
@@ -151,7 +121,6 @@ export const POST: RequestHandler = async ({ request }) => {
                             }
                         }
 
-                        // If response doesn't include references, append them
                         if (!fullResponse.includes('References:')) {
                             const references = `\n\nReferences:\n${context.map(c => c.filename).join('\n- ')}`;
                             controller.enqueue(
@@ -161,7 +130,6 @@ export const POST: RequestHandler = async ({ request }) => {
                             );
                         }
 
-                        // Send done signal
                         controller.enqueue(
                             new TextEncoder().encode(
                                 JSON.stringify({ done: true }) + '\n'