Skip to content

Commit

Permalink
feat: add ability to retry completions (#106)
Browse files Browse the repository at this point in the history
Closes #9 
Closes #100
  • Loading branch information
fmaclen authored Jul 21, 2024
1 parent d0852cd commit c12ed26
Show file tree
Hide file tree
Showing 9 changed files with 219 additions and 117 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ A minimal web-UI for talking to [Ollama](https://github.com/jmorganca/ollama/) s
- Large prompt fields with code editor features
- Markdown parsing w/syntax highlighting
- Easily copy markdown as raw text
- _"Knowledge"_ lets you add context to sessions
- Customizable system prompts
- Saves all changes on your browser's `localStorage`
- Responsive layout
- Dark mode
- Desktop & mobile friendly layout
- Light & dark themes
- Retryable completions
- Streams completions

### Live demo
Expand Down
29 changes: 4 additions & 25 deletions src/lib/ollama.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { get } from "svelte/store";
import type { Session } from "$lib/sessions";
import { settingsStore } from "$lib/store";

type OllamaCompletionRequest = {
context: number[];
export type OllamaCompletionRequest = {
prompt: string;
model: string;
context?: number[];
system?: string;
}

export type OllamaCompletionResponse = {
Expand Down Expand Up @@ -42,31 +42,10 @@ export type OllamaTagResponse = {
models: OllamaModel[];
};

export async function ollamaGenerate(session: Session, abortSignal: AbortSignal) {
export async function ollamaGenerate(payload: OllamaCompletionRequest, abortSignal: AbortSignal) {
const settings = get(settingsStore);
if (!settings) throw new Error('No Ollama server specified');

let payload: OllamaCompletionRequest = {
model: session.model,
context: session.context,
prompt: session.messages[session.messages.length - 1].content
};

const firstMessage = session.messages[0]
if (firstMessage.knowledge) {
payload.prompt = `
<CONTEXT
name="${firstMessage.knowledge.name}"
id="${firstMessage.knowledge.id}"
updatedAt="${firstMessage.knowledge.updatedAt}"
>
${firstMessage.knowledge.content}
</CONTEXT>
${payload.prompt}
`;
}

return await fetch(`${settings.ollamaServer}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'text/event-stream' },
Expand Down
4 changes: 2 additions & 2 deletions src/lib/sessions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ export interface Message {
role: 'user' | 'ai' | 'system';
content: string;
knowledge?: Knowledge;
context?: number[];
}

export interface Session {
id: string;
model: string;
messages: Message[];
context: number[];
updatedAt?: string;
knowledge?: Knowledge;
}
Expand All @@ -35,7 +35,7 @@ export const loadSession = (id: string): Session => {
const model = get(settingsStore)?.ollamaModel || '';

// Create a new session
session = { id, model, messages: [], context: [], updatedAt: new Date().toISOString() };
session = { id, model, messages: [], updatedAt: new Date().toISOString() };
}

return session;
Expand Down
148 changes: 94 additions & 54 deletions src/routes/sessions/[id]/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import { loadKnowledge, type Knowledge } from '$lib/knowledge';
import { settingsStore, knowledgeStore } from '$lib/store';
import { ollamaGenerate, type OllamaCompletionResponse } from '$lib/ollama';
import {
ollamaGenerate,
type OllamaCompletionRequest,
type OllamaCompletionResponse
} from '$lib/ollama';
import {
saveSession,
type Message,
Expand Down Expand Up @@ -42,6 +46,7 @@
let prompt: string;
let promptCached: string;
let promptTextarea: HTMLTextAreaElement;
let tokenizedContext: number[];
let isPromptFullscreen = false;
let shouldFocusTextarea = false;
Expand All @@ -55,52 +60,6 @@
$: knowledge = knowledgeId ? loadKnowledge(knowledgeId) : null;
$: shouldFocusTextarea = !isPromptFullscreen;
afterUpdate(() => {
if (shouldFocusTextarea && promptTextarea) {
promptTextarea.focus();
shouldFocusTextarea = false;
}
});
async function scrollToBottom() {
if (!messageWindow) return;
await tick();
messageWindow.scrollTop = messageWindow.scrollHeight;
}
function handleError(error: Error) {
resetPrompt();
let content: string;
if (error.message === 'Failed to fetch') {
content = `Couldn't connect to Ollama. Is the [server running](/settings)?`;
} else {
content = `Sorry, something went wrong.\n\`\`\`\n${error}\n\`\`\``;
}
const message: Message = { role: 'system', content };
session.messages = [...session.messages, message];
}
async function handleCompletionDone(completion: string, context: number[]) {
const message: Message = { role: 'ai', content: completion };
session.messages = [...session.messages, message];
session.updatedAt = new Date().toISOString();
if (knowledge) {
session.knowledge = knowledge;
// Now that we used the knowledge, we no longer need an `id`
// This will prevent `knowledge` from being used again
knowledgeId = '';
}
completion = '';
promptCached = '';
shouldFocusTextarea = true;
saveSession({ ...session, context });
}
async function handleSubmit() {
if (!prompt) return;
Expand All @@ -117,16 +76,48 @@
}
const message: Message = { role: 'user', content: prompt };
abortController = new AbortController();
promptCached = prompt;
prompt = '';
completion = '';
session.messages = knowledgeContext
? [knowledgeContext, ...session.messages, message]
: [...session.messages, message];
const previousAiResponse = session.messages[session.messages.length - 2];
let payload = {
model: session.model,
context: previousAiResponse?.context,
prompt: session.messages[session.messages.length - 1].content,
system: previousAiResponse?.knowledge?.content
};
await handleCompletion(payload);
}
async function handleRetry(index: number) {
// Remove all the messages after the index
session.messages = session.messages.slice(0, index);
const mostRecentUserMessage = session.messages.filter((m) => m.role === 'user').at(-1);
const mostRecentSystemMessage = session.messages.filter((m) => m.role === 'system').at(-1);
if (!mostRecentUserMessage) throw new Error('No user message to retry');
let payload = {
model: session.model,
context: session.messages[index - 2]?.context, // Last AI response
prompt: mostRecentUserMessage.content,
system: mostRecentSystemMessage?.knowledge?.content
};
await handleCompletion(payload);
}
async function handleCompletion(payload: OllamaCompletionRequest) {
abortController = new AbortController();
completion = '';
tokenizedContext = [];
try {
const ollama = await ollamaGenerate(session, abortController.signal);
const ollama = await ollamaGenerate(payload, abortController.signal);
if (ollama && ollama.body) {
const reader = ollama.body.pipeThrough(new TextDecoderStream()).getReader();
Expand All @@ -137,7 +128,8 @@
if (!ollama.ok && value) throw new Error(JSON.parse(value).error);
if (done) {
handleCompletionDone(completion, session.context);
if (!tokenizedContext) throw new Error('Ollama response is missing context');
handleCompletionDone(completion, tokenizedContext);
break;
}
Expand All @@ -147,7 +139,7 @@
for (const line of jsonLines) {
const { response, context } = JSON.parse(line) as OllamaCompletionResponse;
completion += response;
session.context = context;
tokenizedContext = context;
}
}
}
Expand All @@ -157,6 +149,27 @@
}
}
async function handleCompletionDone(completion: string, context: number[]) {
abortController = new AbortController();
const message: Message = { role: 'ai', content: completion, context };
session.messages = [...session.messages, message];
session.updatedAt = new Date().toISOString();
if (knowledge) {
session.knowledge = knowledge;
// Now that we used the knowledge, we no longer need an `id`
// This will prevent `knowledge` from being used again
knowledgeId = '';
}
completion = '';
promptCached = '';
shouldFocusTextarea = true;
saveSession({ ...session });
}
function resetPrompt() {
// Reset the prompt to the last sent message
prompt = promptCached;
Expand All @@ -171,6 +184,33 @@
event.preventDefault();
handleSubmit();
}
async function scrollToBottom() {
if (!messageWindow) return;
await tick();
messageWindow.scrollTop = messageWindow.scrollHeight;
}
function handleError(error: Error) {
resetPrompt();
let content: string;
if (error.message === 'Failed to fetch') {
content = `Couldn't connect to Ollama. Is the [server running](/settings)?`;
} else {
content = `Sorry, something went wrong.\n\`\`\`\n${error}\n\`\`\``;
}
const message: Message = { role: 'system', content };
session.messages = [...session.messages, message];
}
afterUpdate(() => {
if (shouldFocusTextarea && promptTextarea) {
promptTextarea.focus();
shouldFocusTextarea = false;
}
});
</script>

<div class="session">
Expand Down Expand Up @@ -200,7 +240,7 @@

{#each session.messages as message, i (session.id + i)}
{#key message.role}
<Article {message} />
<Article {message} retryIndex={message.role === 'ai' ? i : undefined} {handleRetry} />
{/key}
{/each}

Expand All @@ -224,7 +264,7 @@
<FieldSelectModel />
<div class="prompt-editor__knowledge">
<FieldSelect
label="Knowledge"
label="System prompt"
name="knowledge"
disabled={!$knowledgeStore}
options={$knowledgeStore?.map((k) => ({ value: k.id, option: k.name }))}
Expand Down
Loading

0 comments on commit c12ed26

Please sign in to comment.