Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip unsupported image formats #139

Merged
merged 5 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions src/pages/Options.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export const DEFAULT_TOOL_CONFIG: ToolConfig = {
};
export const MULTIMODAL_MODELS = ["llava", "bakllava"];
export const EMBEDDING_MODELS = ["nomic-embed-text", "all-minilm"];
export const SUPPORTED_IMG_FORMATS = ["jpeg", "jpg", "png"];
export const CHAT_CONTAINER_HEIGHT_MIN = 200;
export const CHAT_CONTAINER_HEIGHT_MAX = 500;

Expand Down Expand Up @@ -295,11 +296,17 @@ const Options: React.FC = () => {
(model: string) =>
!EMBEDDING_MODELS.includes(model.split(":")[0]),
)
.map((modelName: string, index) => (
<MenuItem key={index} value={modelName}>
{`${modelName.split(":")[0]} (${modelName.split(":")[1]})`}
</MenuItem>
))}
.map((modelName) => {
const [model, tag] = modelName.split(":");
const isMulti = isMultimodal(model);
return (
<MenuItem key={modelName} value={modelName}>
{isMulti
? `${model} (${tag}, multimodal)`
: `${model} (${tag})`}
</MenuItem>
);
})}
</Select>
</FormControl>
<FormControl className="options-input" size="small">
Expand Down
78 changes: 46 additions & 32 deletions src/scripts/background.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
getLumosOptions,
isMultimodal,
LumosOptions,
SUPPORTED_IMG_FORMATS,
} from "../pages/Options";
import {
Calculator,
Expand Down Expand Up @@ -100,6 +101,49 @@ const classifyPrompt = async (
});
};

const downloadImages = async (imageURLs: string[]): Promise<string[]> => {
const base64EncodedImages: string[] = [];
let urls: string[] = imageURLs;

// filter out unsupported image formats
urls = urls.filter((url) => {
const extension = url.split(".").pop() || "";
return SUPPORTED_IMG_FORMATS.includes(extension);
});

// only download the first 10 images
for (const url of urls.slice(0, 10)) {
console.log(`Downloading image: ${url}`);
let response;

try {
response = await fetch(url);
} catch (error) {
console.log(`Failed to download image: ${url}`);
continue;
}

if (response.ok) {
const blob = await response.blob();
let base64String: string = await new Promise((resolve) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = () => {
resolve(reader.result as string);
};
});

// remove leading data url prefix `data:*/*;base64,`
base64String = base64String.split(",")[1];
base64EncodedImages.push(base64String);
} else {
console.log(`Failed to download image: ${url}`);
}
}

return base64EncodedImages;
};

const getChatModel = (options: LumosOptions): ChatOllama => {
return new ChatOllama({
baseUrl: options.ollamaHost,
Expand Down Expand Up @@ -264,7 +308,7 @@ chrome.runtime.onMessage.addListener(async (request) => {
);

// define model bindings (e.g. images, functions)
const base64EncodedImages: string[] = [];
let base64EncodedImages: string[] = [];

// classify prompt and optionally execute tools
if (
Expand All @@ -277,37 +321,7 @@ chrome.runtime.onMessage.addListener(async (request) => {
CLS_IMG_TRIGGER,
))
) {
const urls: string[] = request.imageURLs;

// only download the first 10 images
for (const url of urls.slice(0, 10)) {
console.log(`Downloading image: ${url}`);
let response;

try {
response = await fetch(url);
} catch (error) {
console.log(`Failed to download image: ${url}`);
continue;
}

if (response.ok) {
const blob = await response.blob();
let base64String: string = await new Promise((resolve) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = () => {
resolve(reader.result as string);
};
});

// remove leading data url prefix `data:*/*;base64,`
base64String = base64String.split(",")[1];
base64EncodedImages.push(base64String);
} else {
console.log(`Failed to download image: ${url}`);
}
}
base64EncodedImages = await downloadImages(request.imageURLs);
} else if (
options.toolConfig["Calculator"].enabled &&
(await classifyPrompt(
Expand Down
Loading