diff --git a/README.md b/README.md index d410e21..67a02f7 100644 --- a/README.md +++ b/README.md @@ -951,7 +951,7 @@ The available endpoints for the LLaMA Box server mode are: ## Tools It was so hard to find a Chat UI that was directly compatible with OpenAI, -that mean, no installation required (I can live with `docker run`), +that mean, no installation required (we can live with `docker run`), no tokens (or optional), no [Ollama](https://github.com/ollama/ollama) required, just a simple RESTful API. So we are inspired by @@ -984,6 +984,12 @@ $ # interactive image editing $ IMAGE=/path/to/image.png ./llama-box/tools/image_generate.sh ``` +And we also provide a tool for batch testing with the LLaMA Box. + +```shell +$ ./llama-box/tools/batch.sh +``` + ## License MIT diff --git a/llama-box/tools/batch.sh b/llama-box/tools/batch.sh new file mode 100755 index 0000000..5fbbb02 --- /dev/null +++ b/llama-box/tools/batch.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +set -e + +user_contents=( + "Introduce China in at least 500 words." + "Hello, please introduce yourself in at least 100 words." + "Write a simple number guessing game in Python." + "How to make an apple juice? Please write down the steps in detail." + "Long long ago, there was a girl and a boy ... Now, tell me a story about a cat and a dog in at least 300 words." + "I want to travel to Hong Kong. Are there any recommended attractions? Also, I live in New York. How can I get there?" + "I want to use thread pools in Java programming, what issues do I need to pay attention to?" + "Detailed analysis of the three major attention mechanisms in the Transformer architecture used by large models." +) +api_server="${API_SERVER:-http://127.0.0.1:8080}" +temp=${TEMP:-1} +top_p=${TOP_P:-0.95} +min_p=${MIN_P:-0.05} +top_k=${TOP_K:-40} +max_tokens=${MAX_TOKENS:-1024} +seed=${SEED:-$(date +%s)} + +function request() { + rm -rf /tmp/response_*.json + + cc=${1:-1} + + # start + if command -v gdate >/dev/null 2>&1; then + start_time=$(gdate +%s%N) + else + start_time=$(date +%s%N) + fi + + # requesting + for ((i=0; i "/tmp/response_$i.json" & + done + wait + + # end + if command -v gdate >/dev/null 2>&1; then + end_time=$(gdate +%s%N) + else + end_time=$(date +%s%N) + fi + tt=$((($end_time - $start_time) / 1000000)) + + # observe + oks=$cc + ppss=0 + dpss=0 + pts=0 + dts=0 + for ((i=0; i/dev/null 2>&1 + continue; + fi + request "${batchs[$j]}" + done +fi +