Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Video Input testcase #869

Merged
merged 3 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 74 additions & 32 deletions offline_inferece.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from vllm import SamplingParams
from vllm.assets.image import ImageAsset
import PIL
import cv2
from PIL import Image

import argparse

Expand All @@ -10,6 +12,7 @@
# Add arguments
parser.add_argument("-m", "--model", help="model name or path")
parser.add_argument("-i", "--image", help="type of image")
parser.add_argument("-v", "--video", help="Video Input")
parser.add_argument(
"--multiple_prompts", action="store_true", help="to run with multiple prompts"
)
Expand All @@ -18,7 +21,25 @@
# Parse the arguments
args = parser.parse_args()

# Load the image
# Process video input and select specified number of evenly distributed frames

def sample_frames(path, num_frames):
video = cv2.VideoCapture(path)
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
interval = total_frames // num_frames
frames = []
for i in range(total_frames):
ret, frame = video.read()
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
if not ret:
continue
if i % interval == 0:
pil_img = pil_img.resize((256, 256))
frames.append(pil_img)
video.release()
return frames[:num_frames]

# Load the image / Video

if args.image == "synthetic":
image = ImageAsset("stop_sign").pil_image
Expand All @@ -27,11 +48,13 @@
elif args.image == "snowscat":
filename = "/tmp/snowscat-H3oXiq7_bII-unsplash.jpg"
image = PIL.Image.open(filename)
elif args.video:
video = sample_frames(args.video, 50)
else:
print(f"unknow image {args.image}")
print(f"unknow image/Video Input {args.image} {args.video}")
exit

sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

Check failure on line 57 in offline_inferece.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (B018)

offline_inferece.py:57:5: B018 Found useless expression. Either assign it to a variable or remove it.

tested_models = [
"meta-llama/Llama-3.2-11B-Vision-Instruct",
Expand All @@ -41,44 +64,63 @@
mdl = args.model
multiple_prompt = args.multiple_prompts

question = "Describe this image."
# Prompt example: https://docs.vllm.ai/en/v0.6.2/getting_started/examples/offline_inference_vision_language.html
if "Qwen2" in mdl:
llm = LLM(
model=mdl,
enforce_eager=False,
max_model_len=32768,
max_num_seqs=5,
limit_mm_per_prompt={"image": 1},
)
prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{question}g<|im_end|>\n<|im_start|>assistant\n"
if args.video:
question = "Describe this video."
llm = LLM(model=mdl,
enforce_eager=True,
dtype='bfloat16' ,
gpu_memory_utilization=0.6)

prompt = f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|video_pad|><|vision_end|>{question}<|im_end|>\n<|im_start|>assistant\n'

if multiple_prompt:

Check failure on line 76 in offline_inferece.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

offline_inferece.py:76:81: E501 Line too long (182 > 80)
batch_data = [
{"prompt": prompt, "multi_modal_data": {"image": image}},
{"prompt": prompt, "multi_modal_data": {"video": video}},
{
"prompt": "<|im_start|>system\nYou are a nice person.<|im_end|>\n<|im_start|>user\nTell me about you.<|im_end|>\n<|im_start|>assistant\n"
"prompt": "<|im_start|>system\nYou are a nice person.<|im_end|>\n<|im_start|>user\ntell me about future of global warming.<|im_end|>\n<|im_start|>assistant\n"
},
{"prompt": prompt, "multi_modal_data": {"image": image}},
{"prompt": prompt, "multi_modal_data": {"video": video}},
]
else:
batch_data = {"prompt": prompt, "multi_modal_data": {"image": image}}

elif "Llama-3.2" in mdl:
llm = LLM(
model=mdl,
max_model_len=2048,
max_num_seqs=64,
tensor_parallel_size=1,
num_scheduler_steps=32,
max_num_prefill_seqs=4,
)
from vllm import TextPrompt
batch_data = TextPrompt(prompt=f"<|image|><|begin_of_text|>{question}")
batch_data["multi_modal_data"] = {"image": image}
else:
print(f"{mdl} is not known model?")
batch_data = {"prompt": prompt, "multi_modal_data": {"video": video}}

else :
question = "Describe this image."

Check failure on line 88 in offline_inferece.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

offline_inferece.py:88:81: E501 Line too long (164 > 80)
# Prompt example: https://docs.vllm.ai/en/v0.6.2/getting_started/examples/offline_inference_vision_language.html
if "Qwen2" in mdl:
llm = LLM(
model=mdl,
enforce_eager=False,
max_model_len=32768,
max_num_seqs=5,
limit_mm_per_prompt={"image": 1},
)
prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{question}g<|im_end|>\n<|im_start|>assistant\n"

if multiple_prompt:
batch_data = [
{"prompt": prompt, "multi_modal_data": {"image": image}},
{
"prompt": "<|im_start|>system\nYou are a nice person.<|im_end|>\n<|im_start|>user\nTell me about you.<|im_end|>\n<|im_start|>assistant\n"
},
{"prompt": prompt, "multi_modal_data": {"image": image}},
]
else:
batch_data = {"prompt": prompt, "multi_modal_data": {"image": image}}

elif "Llama-3.2" in mdl:

Check failure on line 111 in offline_inferece.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

offline_inferece.py:111:81: E501 Line too long (187 > 80)
llm = LLM(
model=mdl,
max_model_len=2048,
max_num_seqs=64,
tensor_parallel_size=1,
num_scheduler_steps=32,
max_num_prefill_seqs=4,
)
prompt = f"<|image|><|begin_of_text|>{question}"
else:
print(f"{mdl} is not known model?")

Check failure on line 123 in offline_inferece.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

offline_inferece.py:123:81: E501 Line too long (147 > 80)
for i in range(int(args.iter)):
print(f"==ITER : [{i}]")
outputs = llm.generate(batch_data)
Expand Down
28 changes: 26 additions & 2 deletions run_offline.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
set -ex
# set -ex

usage() {
echo "Usage: $0"
echo "Options:"
echo " --model|-m Model path lub model stub"
echo " --image_type|-i Type model: snowscat synthetic"
echo " --video URL of the video file"
echo " --iter number of iterations(Default:1)"
exit 1
}
Expand All @@ -31,6 +32,15 @@ while [[ $# -gt 0 ]]; do
ImageType=$2
shift 2
;;
--video | -v)
if [[ -n "$2" && ! "$2" =~ ^- ]]; then # Assign URL if provided
video=$2
shift 2
else
video="https://huggingface.co/spaces/merve/llava-interleave/resolve/main/cats_1.mp4" # video set to default video URL file
shift 1
fi
;;
--iter)
iter=$2
shift 2
Expand All @@ -44,6 +54,7 @@ done
#Set Default values
iter=${iter:-1}
ImageType=${ImageType:-"snowscat"}
video=${video}

if [[ -n $HELP ]]; then
usage
Expand Down Expand Up @@ -71,6 +82,13 @@ if [[ $ImageType == "snowscat" ]] && [[ ! $(md5sum /tmp/snowscat-H3oXiq7_bII-uns
'wb').write(r.content); image = Image.open(f'./{filename}'); image = image.resize((1200, 600)); image.save(f'/tmp/{filename}')"
fi

if [[ -n "$video" ]]; then
filename=$(basename "$video")
echo "Downloading Video $filename from $video"
wget -O /tmp/$filename "$video"
videofile="/tmp/$filename"
fi

if [[ -n $InstallVLLM ]]; then
git clone https://github.com/HabanaAI/vllm-fork.git -b sarkar/qwen2
cd vllm-fork
Expand All @@ -97,5 +115,11 @@ if [[ "$model" == *"Qwen2"* ]]; then
export WORKAROUND=1
fi

ARGS="-m $model -i $ImageType --iter $iter $EXTRAARGS"
if [[ -n "$video" ]]; then
ARGS="-m $model -v $videofile --iter $iter $EXTRAARGS"
else
ARGS="-m $model -i $ImageType --iter $iter $EXTRAARGS"
fi


python offline_inferece.py $ARGS
4 changes: 4 additions & 0 deletions test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ bash run_offline.sh -m $ModelName -i snowscat --multiple_prompts --skip_warmup
bash run_offline.sh -m $ModelName -i snowscat --skip_warmup
bash run_offline.sh -m $ModelName -i synthetic --multiple_prompts --skip_warmup
bash run_offline.sh -m $ModelName -i synthetic --skip_warmup
bash run_offline.sh -m $ModelName -v --skip_warmup
bash run_offline.sh -m $ModelName -v --multiple_prompts --skip_warmup
# with warmups
bash run_offline.sh -m $ModelName -i snowscat --multiple_prompts
bash run_offline.sh -m $ModelName -i snowscat
bash run_offline.sh -m $ModelName -i synthetic --multiple_prompts
bash run_offline.sh -m $ModelName -i synthetic
bash run_offline.sh -m $ModelName -v
bash run_offline.sh -m $ModelName -v --multiple_prompts
Loading