Skip to content

Commit 7df5e86

Browse files
committed
new: reporting tokens usage
1 parent f42fb2a commit 7df5e86

13 files changed

+130
-78
lines changed

Cargo.lock

-33
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nerve-core/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ duration-string = { version = "0.4.0", optional = true }
3232
rayon = "1.10.0"
3333
glob = "0.3.1"
3434
human_bytes = "0.4.3"
35-
memory-stats = "1.1.0"
3635
sha256 = "1.5.0"
3736
bitcode = { version = "0.6.0", features = ["serde"] }
3837
intertrait = "0.2.2"

nerve-core/src/agent/generator/anthropic.rs

+14-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
use std::collections::HashMap;
22

3-
use crate::agent::{state::SharedState, Invocation};
3+
use crate::agent::{
4+
generator::{ChatResponse, Usage},
5+
state::SharedState,
6+
Invocation,
7+
};
48
use anyhow::Result;
59
use async_trait::async_trait;
610
use clust::messages::{
@@ -134,7 +138,7 @@ impl Client for AnthropicClient {
134138
&self,
135139
state: SharedState,
136140
options: &ChatOptions,
137-
) -> anyhow::Result<(String, Vec<Invocation>)> {
141+
) -> anyhow::Result<ChatResponse> {
138142
let mut messages = vec![Message::user(options.prompt.trim().to_string())];
139143
let max_tokens = MaxTokens::new(4096, self.model)?;
140144

@@ -249,7 +253,14 @@ impl Client for AnthropicClient {
249253
log::warn!("empty tool calls and content in response: {:?}", response);
250254
}
251255

252-
Ok((content.to_string(), invocations))
256+
Ok(ChatResponse {
257+
content: content.to_string(),
258+
invocations,
259+
usage: Some(Usage {
260+
input_tokens: response.usage.input_tokens,
261+
output_tokens: response.usage.output_tokens,
262+
}),
263+
})
253264
}
254265
}
255266

nerve-core/src/agent/generator/fireworks.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use anyhow::Result;
22
use async_trait::async_trait;
33

4-
use crate::agent::{state::SharedState, Invocation};
4+
use crate::agent::state::SharedState;
55

6-
use super::{openai::OpenAIClient, ChatOptions, Client};
6+
use super::{openai::OpenAIClient, ChatOptions, ChatResponse, Client};
77

88
pub struct FireworksClient {
99
client: OpenAIClient,
@@ -28,7 +28,7 @@ impl Client for FireworksClient {
2828
&self,
2929
state: SharedState,
3030
options: &ChatOptions,
31-
) -> anyhow::Result<(String, Vec<Invocation>)> {
31+
) -> anyhow::Result<ChatResponse> {
3232
self.client.chat(state, options).await
3333
}
3434
}

nerve-core/src/agent/generator/groq.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ use lazy_static::lazy_static;
1111
use regex::Regex;
1212
use serde::{Deserialize, Serialize};
1313

14-
use crate::agent::{generator::Message, state::SharedState, Invocation};
14+
use crate::agent::{
15+
generator::{ChatResponse, Message},
16+
state::SharedState,
17+
Invocation,
18+
};
1519

1620
use super::{ChatOptions, Client};
1721

@@ -109,7 +113,7 @@ impl Client for GroqClient {
109113
&self,
110114
state: SharedState,
111115
options: &ChatOptions,
112-
) -> anyhow::Result<(String, Vec<Invocation>)> {
116+
) -> anyhow::Result<ChatResponse> {
113117
let mut chat_history = vec![
114118
groq_api_rs::completion::message::Message::SystemMessage {
115119
role: Some("system".to_string()),
@@ -247,9 +251,9 @@ impl Client for GroqClient {
247251
return Err(error);
248252
}
249253

250-
let choice = match resp.unwrap() {
254+
let (response, choice) = match resp.unwrap() {
251255
groq_api_rs::completion::client::CompletionOption::NonStream(resp) => {
252-
resp.choices.first().unwrap().to_owned()
256+
(resp.clone(), resp.choices.first().unwrap().to_owned())
253257
}
254258
groq_api_rs::completion::client::CompletionOption::Stream(_) => {
255259
return Err(anyhow!("Groq streaming is not supported yet, if this happens please open an issue on GitHub"));
@@ -298,7 +302,14 @@ impl Client for GroqClient {
298302
}
299303
}
300304

301-
Ok((content, invocations))
305+
Ok(ChatResponse {
306+
content,
307+
invocations,
308+
usage: Some(super::Usage {
309+
input_tokens: response.usage.prompt_tokens,
310+
output_tokens: response.usage.completion_tokens,
311+
}),
312+
})
302313
}
303314
}
304315

nerve-core/src/agent/generator/huggingface.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use anyhow::Result;
22
use async_trait::async_trait;
33

4-
use crate::agent::{state::SharedState, Invocation};
4+
use crate::agent::state::SharedState;
55

6-
use super::{openai::OpenAIClient, ChatOptions, Client};
6+
use super::{openai::OpenAIClient, ChatOptions, ChatResponse, Client};
77

88
pub struct HuggingfaceMessageClient {
99
client: OpenAIClient,
@@ -27,7 +27,7 @@ impl Client for HuggingfaceMessageClient {
2727
&self,
2828
state: SharedState,
2929
options: &ChatOptions,
30-
) -> anyhow::Result<(String, Vec<Invocation>)> {
30+
) -> anyhow::Result<ChatResponse> {
3131
self.client.chat(state, options).await
3232
}
3333
}

nerve-core/src/agent/generator/mod.rs

+14-5
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,26 @@ impl Display for Message {
7272
}
7373
}
7474

75+
pub struct Usage {
76+
/// The number of input tokens which were used.
77+
pub input_tokens: u32,
78+
/// The number of output tokens which were used.
79+
pub output_tokens: u32,
80+
}
81+
82+
pub struct ChatResponse {
83+
pub content: String,
84+
pub invocations: Vec<Invocation>,
85+
pub usage: Option<Usage>,
86+
}
87+
7588
#[async_trait]
7689
pub trait Client: mini_rag::Embedder + Send + Sync {
7790
fn new(url: &str, port: u16, model_name: &str, context_window: u32) -> Result<Self>
7891
where
7992
Self: Sized;
8093

81-
async fn chat(
82-
&self,
83-
state: SharedState,
84-
options: &ChatOptions,
85-
) -> Result<(String, Vec<Invocation>)>;
94+
async fn chat(&self, state: SharedState, options: &ChatOptions) -> Result<ChatResponse>;
8695

8796
async fn check_native_tools_support(&self) -> Result<bool> {
8897
Ok(false)

nerve-core/src/agent/generator/novita.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use anyhow::Result;
22
use async_trait::async_trait;
33

4-
use crate::agent::{state::SharedState, Invocation};
4+
use crate::agent::state::SharedState;
55

6-
use super::{openai::OpenAIClient, Client, ChatOptions};
6+
use super::{openai::OpenAIClient, ChatOptions, ChatResponse, Client};
77

88
pub struct NovitaClient {
99
client: OpenAIClient,
@@ -28,7 +28,7 @@ impl Client for NovitaClient {
2828
&self,
2929
state: SharedState,
3030
options: &ChatOptions,
31-
) -> anyhow::Result<(String, Vec<Invocation>)> {
31+
) -> anyhow::Result<ChatResponse> {
3232
self.client.chat(state, options).await
3333
}
3434
}

nerve-core/src/agent/generator/ollama.rs

+24-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use ollama_rs::{
1818

1919
use crate::agent::{state::SharedState, Invocation};
2020

21-
use super::{ChatOptions, Client, Message};
21+
use super::{ChatOptions, ChatResponse, Client, Message};
2222

2323
pub struct OllamaClient {
2424
model: String,
@@ -92,7 +92,7 @@ impl Client for OllamaClient {
9292
&self,
9393
state: SharedState,
9494
options: &ChatOptions,
95-
) -> anyhow::Result<(String, Vec<Invocation>)> {
95+
) -> anyhow::Result<ChatResponse> {
9696
// TODO: images for multimodal (see todo for screenshot action)
9797

9898
// build chat history:
@@ -225,10 +225,30 @@ impl Client for OllamaClient {
225225
}
226226

227227
log::debug!("ollama.invocations = {:?}", &invocations);
228-
Ok((content, invocations))
228+
Ok(ChatResponse {
229+
content,
230+
invocations,
231+
usage: match res.final_data {
232+
Some(final_data) => Some(super::Usage {
233+
input_tokens: final_data.prompt_eval_count as u32,
234+
output_tokens: final_data.eval_count as u32,
235+
}),
236+
None => None,
237+
},
238+
})
229239
} else {
230240
log::warn!("model returned an empty message.");
231-
Ok(("".to_string(), vec![]))
241+
Ok(ChatResponse {
242+
content: "".to_string(),
243+
invocations: vec![],
244+
usage: match res.final_data {
245+
Some(final_data) => Some(super::Usage {
246+
input_tokens: final_data.prompt_eval_count as u32,
247+
output_tokens: final_data.eval_count as u32,
248+
}),
249+
None => None,
250+
},
251+
})
232252
}
233253
}
234254
}

nerve-core/src/agent/generator/openai.rs

+13-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
99

1010
use crate::agent::{state::SharedState, Invocation};
1111

12-
use super::{ChatOptions, Client, Message};
12+
use super::{ChatOptions, ChatResponse, Client, Message};
1313

1414
#[derive(Debug, Clone, Serialize, Deserialize)]
1515
pub struct OpenAiToolFunctionParameterProperty {
@@ -190,7 +190,7 @@ impl Client for OpenAIClient {
190190
&self,
191191
state: SharedState,
192192
options: &ChatOptions,
193-
) -> anyhow::Result<(String, Vec<Invocation>)> {
193+
) -> anyhow::Result<ChatResponse> {
194194
let mut chat_history = vec![
195195
openai_api_rust::Message {
196196
role: Role::System,
@@ -304,7 +304,17 @@ impl Client for OpenAIClient {
304304
}
305305
}
306306

307-
Ok((content, invocations))
307+
Ok(ChatResponse {
308+
content: content.to_string(),
309+
invocations,
310+
usage: match resp.usage.prompt_tokens {
311+
Some(prompt_tokens) => Some(super::Usage {
312+
input_tokens: prompt_tokens,
313+
output_tokens: resp.usage.completion_tokens.unwrap_or(0),
314+
}),
315+
None => None,
316+
},
317+
})
308318
}
309319
}
310320

nerve-core/src/agent/generator/openai_compatible.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use anyhow::Result;
22
use async_trait::async_trait;
33

4-
use crate::agent::{state::SharedState, Invocation};
4+
use crate::agent::state::SharedState;
55

6-
use super::{openai::OpenAIClient, ChatOptions, Client};
6+
use super::{openai::OpenAIClient, ChatOptions, ChatResponse, Client};
77

88
pub struct OpenAiCompatibleClient {
99
client: OpenAIClient,
@@ -34,7 +34,7 @@ impl Client for OpenAiCompatibleClient {
3434
&self,
3535
state: SharedState,
3636
options: &ChatOptions,
37-
) -> anyhow::Result<(String, Vec<Invocation>)> {
37+
) -> anyhow::Result<ChatResponse> {
3838
self.client.chat(state, options).await
3939
}
4040
}

0 commit comments

Comments
 (0)