Skip to content

Commit

Permalink
Implement LangSmith Tracing
Browse files Browse the repository at this point in the history
  • Loading branch information
zx8086 committed Feb 5, 2025
1 parent 448681c commit c926c57
Show file tree
Hide file tree
Showing 6 changed files with 336 additions and 169 deletions.
178 changes: 124 additions & 54 deletions bun.lock

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"build:docker": "bun run svelte-kit sync && DISABLE_OPENTELEMETRY=true NODE_ENV=production bun --bun vite build",
"preview": "bun --bun vite preview --host",
"build:no-telemetry": "LANG=C.UTF-8 LC_ALL=C.UTF-8 LC_CTYPE=C.UTF-8 DISABLE_OPENTELEMETRY=true VITE_BUILD_LEGACY=true VITE_BUILD_SSR=true VITE_BUILD_CJS=true NODE_ENV=production bun --bun vite build",
"start": "bun run --preload ./src/instrumentation.ts ./build/index.js",
"start": "NODE_DEBUG=http BUN_CONFIG_VERBOSE_FETCH=true bun run --preload ./src/instrumentation.ts ./build/index.js",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
"prepublish": "bun run snyk-protect",
Expand Down Expand Up @@ -63,6 +63,8 @@
"@azure/msal-node": "^3.0.1",
"@elastic/apm-rum": "^5.16.3",
"@elastic/ecs-winston-format": "^1.5.3",
"@langchain/core": "^0.3.38",
"@langchain/openai": "^0.4.2",
"@microsoft/microsoft-graph-client": "^3.0.7",
"@microsoft/msgraph-sdk": "^1.0.0-preview.30",
"@openreplay/tracker": "^15.0.3",
Expand Down Expand Up @@ -103,6 +105,8 @@
"graphql": "^16.10.0",
"graphql-request": "^7.1.2",
"help": "^3.0.2",
"langchain": "^0.3.15",
"langsmith": "^0.3.5",
"mode-watcher": "^0.5.0",
"net": "^1.0.2",
"node-cron": "^3.0.3",
Expand Down
25 changes: 25 additions & 0 deletions src/otlp/MonitoredOTLPExporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ export abstract class MonitoredOTLPExporter<T> {
this.logTimer = setInterval(() => {
this.logStatistics();
}, this.logIntervalMs);

this.startHealthChecks();
}

private async verifyDNSPrefetch(): Promise<void> {
Expand Down Expand Up @@ -257,4 +259,27 @@ export abstract class MonitoredOTLPExporter<T> {
items: T,
resultCallback: (result: ExportResult) => void,
): Promise<void>;

protected async validateEndpoint(): Promise<void> {
try {
const response = await fetch(this.url, {
method: 'OPTIONS',
timeout: 5000
});

if (!response.ok) {
warn(`OpenTelemetry endpoint ${this.url} returned status ${response.status}`);
}
} catch (error) {
err(`Failed to validate OpenTelemetry endpoint ${this.url}:`, error);
}
}

private startHealthChecks(): void {
setInterval(() => {
this.validateEndpoint();
this.checkNetworkConnectivity();
this.logSystemResources();
}, this.logIntervalMs);
}
}
27 changes: 27 additions & 0 deletions src/otlp/otlpConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,31 @@ import { backendConfig } from "../backend-config";

export const otlpConfig = {
logIntervalMs: backendConfig.openTelemetry.SUMMARY_LOG_INTERVAL,
isEnabled: () => {
// Check enable flag first
const enableFlag = process.env.ENABLE_OPENTELEMETRY?.toLowerCase();
if (enableFlag === 'true') return true;

// Check disable flag second (for backwards compatibility)
const disableFlag = process.env.DISABLE_OPENTELEMETRY?.toLowerCase();
if (disableFlag === 'true') return false;

// Default to enabled if in production, disabled otherwise
return process.env.NODE_ENV === 'production';
},
validateConfig: () => {
const enabled = otlpConfig.isEnabled();

if (enabled) {
// Log the configuration state
console.debug('OpenTelemetry Configuration:', {
ENABLE_OPENTELEMETRY: process.env.ENABLE_OPENTELEMETRY,
DISABLE_OPENTELEMETRY: process.env.DISABLE_OPENTELEMETRY,
NODE_ENV: process.env.NODE_ENV,
isEnabled: enabled
});
}

return enabled;
}
};
156 changes: 62 additions & 94 deletions src/routes/api/chat/+server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,71 @@ import { json } from '@sveltejs/kit';
import { Pinecone } from "@pinecone-database/pinecone";
import OpenAI from 'openai';
import type { RequestHandler } from './$types';
import { traceable } from "langsmith/traceable";
import { wrapOpenAI } from "langsmith/wrappers";

// console.log('🔑 Environment Variables:', {
// OPENAI_API_KEY: Bun.env.OPENAI_API_KEY,
// PINECONE_API_KEY: Bun.env.PINECONE_API_KEY
// });

// Initialize OpenAI
const openai = new OpenAI({
// Initialize OpenAI with LangSmith tracing
const openai = wrapOpenAI(new OpenAI({
apiKey: Bun.env.OPENAI_API_KEY
}));

// Wrap the RAG pipeline with tracing
const ragPipeline = traceable(async (message: string) => {
// Initialize Pinecone
const pc = new Pinecone({
apiKey: Bun.env.PINECONE_API_KEY as string,
});

// Get index with specific namespace
const index = pc.index(Bun.env.PINECONE_INDEX_NAME as string)
.namespace(Bun.env.PINECONE_NAMESPACE as string);

// Generate embedding
console.log('🔄 Generating embedding...');
const embeddingResponse = await openai.embeddings.create({
model: "text-embedding-ada-002",
input: message
});

// Query Pinecone
const queryResponse = await index.query({
vector: embeddingResponse.data[0].embedding,
topK: 3,
includeMetadata: true
});

// Extract context
const context = queryResponse.matches
?.map(match => ({
text: match.metadata?.text,
filename: match.metadata?.filename || 'Unknown source'
}))
.filter(item => item.text);

// Generate completion
const stream = await openai.chat.completions.create({
model: "gpt-4-turbo-preview",
messages: [
{
role: "system",
content: "You are a helpful assistant. Use the following context to answer the user's question. Always end your response with '\n\nReferences:' followed by the source filenames. If you cannot answer the question based on the context, say so."
},
{
role: "user",
content: `Context: ${context?.map(c => c.text).join('\n\n---\n\n')}\n\nSource files: ${context?.map(c => c.filename).join(', ')}\n\nQuestion: ${message}`
}
],
temperature: 0.7,
max_tokens: 2000,
stream: true
});

return { stream, context };
});

// Keep existing GET handler
Expand Down Expand Up @@ -40,109 +96,23 @@ export const GET: RequestHandler = async () => {
}
};

// Add POST handler for chat
// Modify POST handler to use traced pipeline
export const POST: RequestHandler = async ({ request }) => {
const { message } = await request.json();

try {
// Initialize Pinecone with correct host URL
const pc = new Pinecone({
apiKey: Bun.env.PINECONE_API_KEY as string,
});

// Get index with specific namespace
const index = pc.index(Bun.env.PINECONE_INDEX_NAME as string).namespace(Bun.env.PINECONE_NAMESPACE as string);

// Generate embedding
console.log('🔄 Generating embedding...');
const embeddingResponse = await openai.embeddings.create({
model: "text-embedding-ada-002",
input: message
});
console.log('✅ Embedding details:', {
dimensions: embeddingResponse.data[0].embedding.length,
embedding: `${embeddingResponse.data[0].embedding.slice(0, 3)}...`
});

// Query within that namespace
console.log('🔄 Querying Pinecone...', {
indexName: "platform-engineering-rag",
namespace: "capella-document-search",
vectorDimensions: embeddingResponse.data[0].embedding.length
});

const queryResponse = await index.query({
vector: embeddingResponse.data[0].embedding,
topK: 3,
includeMetadata: true
});

console.log('📊 Pinecone matches:', {
totalMatches: queryResponse.matches?.length,
matches: queryResponse.matches?.map(m => ({
score: m.score,
metadata: m.metadata,
id: m.id
}))
});

if (!queryResponse.matches?.length) {
console.warn('⚠️ No matches found in Pinecone');
return json({
response: "I couldn't find any relevant information to answer your question."
});
}

// Extract context
const context = queryResponse.matches
.map(match => ({
text: match.metadata?.text,
filename: match.metadata?.filename || 'Unknown source'
}))
.filter(item => item.text);

console.log('📝 Context details:', {
contextLength: context.length,
matchCount: queryResponse.matches.length,
topMatchScores: queryResponse.matches.map(m => m.score)
});

console.log('🔍 Using RAG Context:', {
question: message,
retrievedContext: context.map(c => c.text).join('\n\n---\n\n'),
similarityScore: queryResponse.matches[0].score
});

const stream = await openai.chat.completions.create({
model: "gpt-4-turbo-preview",
messages: [
{
role: "system",
content: "You are a helpful assistant. Use the following context to answer the user's question. Always end your response with '\n\nReferences:' followed by the source filenames. If you cannot answer the question based on the context, say so."
},
{
role: "user",
content: `Context: ${context.map(c => c.text).join('\n\n---\n\n')}\n\nSource files: ${context.map(c => c.filename).join(', ')}\n\nQuestion: ${message}`
}
],
temperature: 0.7,
max_tokens: 2000,
stream: true
});

// After the response, append the references if they're not already included
let fullResponse = '';
const { stream, context } = await ragPipeline(message);

// Match the working implementation's response structure
return new Response(
new ReadableStream({
async start(controller) {
try {
let fullResponse = '';

for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
fullResponse += content;
// Send each chunk as a JSON string with newline delimiter
controller.enqueue(
new TextEncoder().encode(
JSON.stringify({ content }) + '\n'
Expand All @@ -151,7 +121,6 @@ export const POST: RequestHandler = async ({ request }) => {
}
}

// If response doesn't include references, append them
if (!fullResponse.includes('References:')) {
const references = `\n\nReferences:\n${context.map(c => c.filename).join('\n- ')}`;
controller.enqueue(
Expand All @@ -161,7 +130,6 @@ export const POST: RequestHandler = async ({ request }) => {
);
}

// Send done signal
controller.enqueue(
new TextEncoder().encode(
JSON.stringify({ done: true }) + '\n'
Expand Down
Loading

0 comments on commit c926c57

Please sign in to comment.