Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release/0.4.12 to main #2808

Merged
merged 18 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/src/node/api/restful/helper/startStopModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ const runModel = async (modelId: string, settingParams?: ModelSettingParams): Pr

const nitroResourceProbe = await getSystemResourceInfo()
const nitroModelSettings: NitroModelSettings = {
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...modelMetadata.settings,
...settingParams,
llama_model_path: modelBinaryPath,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(modelMetadata.settings.mmproj && {
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
}),
Expand Down
2 changes: 2 additions & 0 deletions extensions/assistant-extension/src/node/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ export const readEmbeddingEngine = (engineName: string) => {
const settingDirectoryPath = path.join(
getJanDataFolderPath(),
'settings',
'@janhq',
// TODO: James - To be removed
engineName === 'openai'
? 'inference-openai-extension'
: 'inference-groq-extension',
Expand Down
2 changes: 1 addition & 1 deletion extensions/inference-nitro-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@janhq/inference-nitro-extension",
"productName": "Nitro Inference Engine",
"version": "1.0.2",
"version": "1.0.4",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"id": "command-r-34b",
"object": "model",
"name": "Command-R v01 34B Q4",
"version": "1.2",
"version": "1.3",
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
"format": "gguf",
"settings": {
Expand All @@ -27,7 +27,7 @@
},
"metadata": {
"author": "CohereAI",
"tags": ["34B", "Finetuned"],
"tags": ["34B", "Finetuned", "Featured"],
"size": 21500000000
},
"engine": "nitro"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"id": "hermes-pro-7b",
"object": "model",
"name": "Hermes Pro 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.",
"format": "gguf",
"settings": {
Expand All @@ -27,7 +27,7 @@
},
"metadata": {
"author": "NousResearch",
"tags": ["7B", "Finetuned", "Featured"],
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"id": "openhermes-neural-7b",
"object": "model",
"name": "OpenHermes Neural 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"format": "gguf",
"settings": {
Expand All @@ -26,7 +26,7 @@
},
"metadata": {
"author": "Intel, Jan",
"tags": ["7B", "Merged", "Featured"],
"tags": ["7B", "Merged"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"id": "stealth-v1.2-7b",
"object": "model",
"name": "Stealth 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"format": "gguf",
"settings": {
Expand All @@ -26,7 +26,7 @@
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Finetuned", "Featured"],
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"id": "trinity-v1.2-7b",
"object": "model",
"name": "Trinity-v1.2 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf",
"settings": {
Expand All @@ -26,7 +26,7 @@
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Merged", "Featured"],
"tags": ["7B", "Merged"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
},
Expand Down
3 changes: 2 additions & 1 deletion extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,11 @@ async function loadModel(
if (!llama_model_path) return Promise.reject('No GGUF model file found')

currentSettings = {
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
// model.settings can override the default settings
...params.model.settings,
llama_model_path,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(params.model.settings.mmproj && {
mmproj: path.isAbsolute(params.model.settings.mmproj)
? params.model.settings.mmproj
Expand Down
72 changes: 36 additions & 36 deletions extensions/monitoring-extension/src/node/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,54 +67,54 @@ export class FileLogger extends Logger {
const size = maxFileSizeBytes ?? 1 * 1024 * 1024 // 1 MB
const days = daysToKeep ?? 7 // 7 days
const logDirectory = path.join(getJanDataFolderPath(), 'logs')

// Perform log cleaning
const currentDate = new Date()
fs.readdir(logDirectory, (err, files) => {
if (err) {
console.error('Error reading log directory:', err)
return
}

files.forEach((file) => {
const filePath = path.join(logDirectory, file)
fs.stat(filePath, (err, stats) => {
if (err) {
console.error('Error getting file stats:', err)
return
}
if (fs.existsSync(logDirectory))
fs.readdir(logDirectory, (err, files) => {
if (err) {
console.error('Error reading log directory:', err)
return
}

files.forEach((file) => {
const filePath = path.join(logDirectory, file)
fs.stat(filePath, (err, stats) => {
if (err) {
console.error('Error getting file stats:', err)
return
}

// Check size
if (stats.size > size) {
fs.unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
}
console.debug(
`Deleted log file due to exceeding size limit: ${filePath}`
)
})
} else {
// Check age
const creationDate = new Date(stats.ctime)
const daysDifference = Math.floor(
(currentDate.getTime() - creationDate.getTime()) /
(1000 * 3600 * 24)
)
if (daysDifference > days) {
// Check size
if (stats.size > size) {
fs.unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
}
console.debug(`Deleted old log file: ${filePath}`)
console.debug(
`Deleted log file due to exceeding size limit: ${filePath}`
)
})
} else {
// Check age
const creationDate = new Date(stats.ctime)
const daysDifference = Math.floor(
(currentDate.getTime() - creationDate.getTime()) /
(1000 * 3600 * 24)
)
if (daysDifference > days) {
fs.unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
}
console.debug(`Deleted old log file: ${filePath}`)
})
}
}
}
})
})
})
})

// Schedule the next execution with doubled delays
this.timeout = setTimeout(
Expand Down
9 changes: 9 additions & 0 deletions web/containers/DropdownListSidebar/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,19 @@ const DropdownListSidebar = ({
}

if (activeThread) {
// Default setting ctx_len for the model for a better onboarding experience
// TODO: When Cortex support hardware instructions, we should remove this
const overriddenSettings =
model?.settings.ctx_len && model.settings.ctx_len > 2048
? { ctx_len: 2048 }
: {}

const modelParams = {
...model?.parameters,
...model?.settings,
...overriddenSettings,
}

// Update model parameter to the thread state
setThreadModelParams(activeThread.id, modelParams)

Expand Down
12 changes: 12 additions & 0 deletions web/containers/Providers/EventHandler.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import { ulid } from 'ulidx'

import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'

import { toRuntimeParams } from '@/utils/modelParam'

import { extensionManager } from '@/extension'
import {
getCurrentChatMessagesAtom,
Expand All @@ -32,6 +34,7 @@ import {
threadsAtom,
isGeneratingResponseAtom,
updateThreadAtom,
getActiveThreadModelParamsAtom,
} from '@/helpers/atoms/Thread.atom'

const maxWordForThreadTitle = 10
Expand All @@ -54,6 +57,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
const updateThread = useSetAtom(updateThreadAtom)
const messagesRef = useRef(messages)
const activeModelRef = useRef(activeModel)
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
const activeModelParamsRef = useRef(activeModelParams)

useEffect(() => {
threadsRef.current = threads
Expand All @@ -71,6 +76,10 @@ export default function EventHandler({ children }: { children: ReactNode }) {
activeModelRef.current = activeModel
}, [activeModel])

useEffect(() => {
activeModelParamsRef.current = activeModelParams
}, [activeModelParams])

const onNewMessageResponse = useCallback(
(message: ThreadMessage) => {
if (message.type === MessageRequestType.Thread) {
Expand Down Expand Up @@ -247,6 +256,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
},
]

const runtimeParams = toRuntimeParams(activeModelParamsRef.current)

const messageRequest: MessageRequest = {
id: msgId,
threadId: message.thread_id,
Expand All @@ -255,6 +266,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
model: {
...activeModelRef.current,
parameters: {
...runtimeParams,
stream: false,
},
},
Expand Down
2 changes: 1 addition & 1 deletion web/containers/ServerLogs/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ const ServerLogs = (props: ServerLogsProps) => {
</div>
</div>
<div className="overflow-hidden">
{logs.length > 1 ? (
{logs.length > 0 ? (
<div className="h-full overflow-auto">
<code className="inline-block whitespace-pre-line text-xs">
{logs.slice(-limit).map((log, i) => {
Expand Down
24 changes: 16 additions & 8 deletions web/hooks/useActiveModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,31 @@ export const stateModelAtom = atom<ModelState>({
model: undefined,
})

export let loadModelController: AbortController | undefined
const pendingModelLoadAtom = atom<boolean>(false)

export function useActiveModel() {
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
const activeThread = useAtomValue(activeThreadAtom)
const [stateModel, setStateModel] = useAtom(stateModelAtom)
const downloadedModels = useAtomValue(downloadedModelsAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)
const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)

const downloadedModelsRef = useRef<Model[]>([])

useEffect(() => {
downloadedModelsRef.current = downloadedModels
}, [downloadedModels])

const startModel = async (modelId: string) => {
const startModel = async (modelId: string, abortable: boolean = true) => {
if (
(activeModel && activeModel.id === modelId) ||
(stateModel.model?.id === modelId && stateModel.loading)
) {
console.debug(`Model ${modelId} is already initialized. Ignore..`)
return Promise.resolve()
}
loadModelController = new AbortController()
setPendingModelLoad(true)

let model = downloadedModelsRef?.current.find((e) => e.id === modelId)

Expand Down Expand Up @@ -107,15 +108,16 @@ export function useActiveModel() {
})
})
.catch((error) => {
if (loadModelController?.signal.aborted)
return Promise.reject(new Error('aborted'))

setStateModel(() => ({
state: 'start',
loading: false,
model,
}))

if (!pendingModelLoad && abortable) {
return Promise.reject(new Error('aborted'))
}

toaster({
title: 'Failed!',
description: `Model ${model.id} failed to start.`,
Expand All @@ -139,9 +141,15 @@ export function useActiveModel() {
.then(() => {
setActiveModel(undefined)
setStateModel({ state: 'start', loading: false, model: undefined })
loadModelController?.abort()
setPendingModelLoad(false)
})
}, [activeModel, setActiveModel, setStateModel, stateModel])
}, [
activeModel,
setActiveModel,
setStateModel,
setPendingModelLoad,
stateModel,
])

const stopInference = useCallback(async () => {
// Loading model
Expand Down
Loading
Loading