Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ability to retry completions #106

Merged
merged 18 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ A minimal web-UI for talking to [Ollama](https://github.com/jmorganca/ollama/) s
- Large prompt fields with code editor features
- Markdown parsing w/syntax highlighting
- Easily copy markdown as raw text
- _"Knowledge"_ lets you add context to sessions
- Customizable system prompts
- Saves all changes on your browser's `localStorage`
- Responsive layout
- Dark mode
- Desktop & mobile friendly layout
- Light & dark themes
- Retryable completions
- Streams completions

### Live demo
Expand Down
29 changes: 4 additions & 25 deletions src/lib/ollama.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { get } from "svelte/store";
import type { Session } from "$lib/sessions";
import { settingsStore } from "$lib/store";

type OllamaCompletionRequest = {
context: number[];
export type OllamaCompletionRequest = {
prompt: string;
model: string;
context?: number[];
system?: string;
}

export type OllamaCompletionResponse = {
Expand Down Expand Up @@ -42,31 +42,10 @@ export type OllamaTagResponse = {
models: OllamaModel[];
};

export async function ollamaGenerate(session: Session, abortSignal: AbortSignal) {
export async function ollamaGenerate(payload: OllamaCompletionRequest, abortSignal: AbortSignal) {
const settings = get(settingsStore);
if (!settings) throw new Error('No Ollama server specified');

let payload: OllamaCompletionRequest = {
model: session.model,
context: session.context,
prompt: session.messages[session.messages.length - 1].content
};

const firstMessage = session.messages[0]
if (firstMessage.knowledge) {
payload.prompt = `
<CONTEXT
name="${firstMessage.knowledge.name}"
id="${firstMessage.knowledge.id}"
updatedAt="${firstMessage.knowledge.updatedAt}"
>
${firstMessage.knowledge.content}
</CONTEXT>

${payload.prompt}
`;
}

return await fetch(`${settings.ollamaServer}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'text/event-stream' },
Expand Down
4 changes: 2 additions & 2 deletions src/lib/sessions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ export interface Message {
role: 'user' | 'ai' | 'system';
content: string;
knowledge?: Knowledge;
context?: number[];
}

export interface Session {
id: string;
model: string;
messages: Message[];
context: number[];
updatedAt?: string;
knowledge?: Knowledge;
}
Expand All @@ -35,7 +35,7 @@ export const loadSession = (id: string): Session => {
const model = get(settingsStore)?.ollamaModel || '';

// Create a new session
session = { id, model, messages: [], context: [], updatedAt: new Date().toISOString() };
session = { id, model, messages: [], updatedAt: new Date().toISOString() };
}

return session;
Expand Down
148 changes: 94 additions & 54 deletions src/routes/sessions/[id]/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

import { loadKnowledge, type Knowledge } from '$lib/knowledge';
import { settingsStore, knowledgeStore } from '$lib/store';
import { ollamaGenerate, type OllamaCompletionResponse } from '$lib/ollama';
import {
ollamaGenerate,
type OllamaCompletionRequest,
type OllamaCompletionResponse
} from '$lib/ollama';
import {
saveSession,
type Message,
Expand Down Expand Up @@ -42,6 +46,7 @@
let prompt: string;
let promptCached: string;
let promptTextarea: HTMLTextAreaElement;
let tokenizedContext: number[];
let isPromptFullscreen = false;
let shouldFocusTextarea = false;

Expand All @@ -55,52 +60,6 @@
$: knowledge = knowledgeId ? loadKnowledge(knowledgeId) : null;
$: shouldFocusTextarea = !isPromptFullscreen;

afterUpdate(() => {
if (shouldFocusTextarea && promptTextarea) {
promptTextarea.focus();
shouldFocusTextarea = false;
}
});

async function scrollToBottom() {
if (!messageWindow) return;
await tick();
messageWindow.scrollTop = messageWindow.scrollHeight;
}

function handleError(error: Error) {
resetPrompt();

let content: string;
if (error.message === 'Failed to fetch') {
content = `Couldn't connect to Ollama. Is the [server running](/settings)?`;
} else {
content = `Sorry, something went wrong.\n\`\`\`\n${error}\n\`\`\``;
}

const message: Message = { role: 'system', content };
session.messages = [...session.messages, message];
}

async function handleCompletionDone(completion: string, context: number[]) {
const message: Message = { role: 'ai', content: completion };
session.messages = [...session.messages, message];
session.updatedAt = new Date().toISOString();

if (knowledge) {
session.knowledge = knowledge;

// Now that we used the knowledge, we no longer need an `id`
// This will prevent `knowledge` from being used again
knowledgeId = '';
}

completion = '';
promptCached = '';
shouldFocusTextarea = true;
saveSession({ ...session, context });
}

async function handleSubmit() {
if (!prompt) return;

Expand All @@ -117,16 +76,48 @@
}

const message: Message = { role: 'user', content: prompt };
abortController = new AbortController();
promptCached = prompt;
prompt = '';
completion = '';
session.messages = knowledgeContext
? [knowledgeContext, ...session.messages, message]
: [...session.messages, message];

const previousAiResponse = session.messages[session.messages.length - 2];
let payload = {
model: session.model,
context: previousAiResponse?.context,
prompt: session.messages[session.messages.length - 1].content,
system: previousAiResponse?.knowledge?.content
};

await handleCompletion(payload);
}

async function handleRetry(index: number) {
// Remove all the messages after the index
session.messages = session.messages.slice(0, index);

const mostRecentUserMessage = session.messages.filter((m) => m.role === 'user').at(-1);
const mostRecentSystemMessage = session.messages.filter((m) => m.role === 'system').at(-1);
if (!mostRecentUserMessage) throw new Error('No user message to retry');

let payload = {
model: session.model,
context: session.messages[index - 2]?.context, // Last AI response
prompt: mostRecentUserMessage.content,
system: mostRecentSystemMessage?.knowledge?.content
};

await handleCompletion(payload);
}

async function handleCompletion(payload: OllamaCompletionRequest) {
abortController = new AbortController();
completion = '';
tokenizedContext = [];

try {
const ollama = await ollamaGenerate(session, abortController.signal);
const ollama = await ollamaGenerate(payload, abortController.signal);

if (ollama && ollama.body) {
const reader = ollama.body.pipeThrough(new TextDecoderStream()).getReader();
Expand All @@ -137,7 +128,8 @@
if (!ollama.ok && value) throw new Error(JSON.parse(value).error);

if (done) {
handleCompletionDone(completion, session.context);
if (!tokenizedContext) throw new Error('Ollama response is missing context');
handleCompletionDone(completion, tokenizedContext);
break;
}

Expand All @@ -147,7 +139,7 @@
for (const line of jsonLines) {
const { response, context } = JSON.parse(line) as OllamaCompletionResponse;
completion += response;
session.context = context;
tokenizedContext = context;
}
}
}
Expand All @@ -157,6 +149,27 @@
}
}

async function handleCompletionDone(completion: string, context: number[]) {
abortController = new AbortController();

const message: Message = { role: 'ai', content: completion, context };
session.messages = [...session.messages, message];
session.updatedAt = new Date().toISOString();

if (knowledge) {
session.knowledge = knowledge;

// Now that we used the knowledge, we no longer need an `id`
// This will prevent `knowledge` from being used again
knowledgeId = '';
}

completion = '';
promptCached = '';
shouldFocusTextarea = true;
saveSession({ ...session });
}

function resetPrompt() {
// Reset the prompt to the last sent message
prompt = promptCached;
Expand All @@ -171,6 +184,33 @@
event.preventDefault();
handleSubmit();
}

async function scrollToBottom() {
if (!messageWindow) return;
await tick();
messageWindow.scrollTop = messageWindow.scrollHeight;
}

function handleError(error: Error) {
resetPrompt();

let content: string;
if (error.message === 'Failed to fetch') {
content = `Couldn't connect to Ollama. Is the [server running](/settings)?`;
} else {
content = `Sorry, something went wrong.\n\`\`\`\n${error}\n\`\`\``;
}

const message: Message = { role: 'system', content };
session.messages = [...session.messages, message];
}

afterUpdate(() => {
if (shouldFocusTextarea && promptTextarea) {
promptTextarea.focus();
shouldFocusTextarea = false;
}
});
</script>

<div class="session">
Expand Down Expand Up @@ -200,7 +240,7 @@

{#each session.messages as message, i (session.id + i)}
{#key message.role}
<Article {message} />
<Article {message} retryIndex={message.role === 'ai' ? i : undefined} {handleRetry} />
{/key}
{/each}

Expand All @@ -224,7 +264,7 @@
<FieldSelectModel />
<div class="prompt-editor__knowledge">
<FieldSelect
label="Knowledge"
label="System prompt"
name="knowledge"
disabled={!$knowledgeStore}
options={$knowledgeStore?.map((k) => ({ value: k.id, option: k.name }))}
Expand Down
Loading