From 6405a39b178726c6fdfb8bb69367e5190bb79828 Mon Sep 17 00:00:00 2001 From: sgolebiewski-intel Date: Tue, 24 Sep 2024 15:08:18 +0200 Subject: [PATCH] Updating Interactive Tutorials section --- docs/nbdoc/consts.py | 2 +- .../3D-pose-estimation-with-output.rst | 261 +-- ...-segmentation-point-clouds-with-output.rst | 5 +- .../action-recognition-webcam-with-output.rst | 1 + ...on-recognition-webcam-with-output_22_0.png | 4 +- docs/notebooks/all_notebooks_paths.txt | 5 +- ...-lightweight-text-to-image-with-output.rst | 289 ++-- ...tweight-text-to-image-with-output_28_0.jpg | 3 - ...tweight-text-to-image-with-output_28_0.png | 3 - ...tweight-text-to-image-with-output_29_0.jpg | 3 + ...tweight-text-to-image-with-output_29_0.png | 3 + ...tweight-text-to-image-with-output_37_2.jpg | 3 - ...tweight-text-to-image-with-output_37_2.png | 3 - ...tweight-text-to-image-with-output_38_2.jpg | 3 + ...tweight-text-to-image-with-output_38_2.png | 3 + ...tweight-text-to-image-with-output_7_0.jpg} | 0 ...tweight-text-to-image-with-output_7_0.png} | 0 docs/notebooks/animate-anyone-with-output.rst | 60 +- docs/notebooks/async-api-with-output.rst | 7 +- .../async-api-with-output_23_0.png | 4 +- docs/notebooks/auto-device-with-output.rst | 189 +-- .../auto-device-with-output_27_0.png | 4 +- .../auto-device-with-output_28_0.png | 4 +- .../bark-text-to-audio-with-output.rst | 22 +- ...visual-language-processing-with-output.rst | 3 + ...clip-language-saliency-map-with-output.rst | 162 +- ...p-zero-shot-classification-with-output.rst | 25 +- ...ontrolnet-stable-diffusion-with-output.rst | 39 +- .../convert-to-openvino-with-output.rst | 17 +- .../convnext-classification-with-output.rst | 58 +- ...ss-lingual-books-alignment-with-output.rst | 31 +- ...segmentation-quantize-nncf-with-output.rst | 207 +-- ...ntation-quantize-nncf-with-output_37_1.png | 4 +- ...ddcolor-image-colorization-with-output.rst | 192 +-- ...or-image-colorization-with-output_25_0.jpg | 4 +- ...or-image-colorization-with-output_25_0.png | 4 +- .../depth-anything-v2-with-output.rst | 51 +- .../depth-anything-v2-with-output_43_0.png | 4 +- docs/notebooks/depth-anything-with-output.rst | 43 +- .../depth-anything-with-output_44_0.png | 4 +- .../detectron2-to-openvino-with-output.rst | 25 +- ...etectron2-to-openvino-with-output_22_0.jpg | 4 +- ...etectron2-to-openvino-with-output_22_0.png | 4 +- ...etectron2-to-openvino-with-output_32_0.jpg | 4 +- ...etectron2-to-openvino-with-output_32_0.png | 4 +- .../distil-whisper-asr-with-output.rst | 31 +- ...rt-sequence-classification-with-output.rst | 59 +- ...ly-2-instruction-following-with-output.rst | 30 +- ...micrafter-animating-images-with-output.rst | 662 ++++---- docs/notebooks/efficient-sam-with-output.rst | 358 ++--- .../efficient-sam-with-output_17_1.png | 4 +- .../efficient-sam-with-output_25_1.png | 4 +- .../efficient-sam-with-output_36_1.png | 4 +- .../encodec-audio-compression-with-output.rst | 21 +- .../explainable-ai-1-basic-with-output.rst | 48 +- ...explainable-ai-2-deep-dive-with-output.rst | 107 +- ...le-ai-3-map-interpretation-with-output.rst | 122 +- .../fast-segment-anything-with-output.rst | 201 +-- docs/notebooks/film-slowmo-with-output.rst | 18 +- docs/notebooks/florence2-with-output.rst | 37 +- .../florence2-with-output_18_0.png | 4 +- .../flux.1-image-generation-with-output.rst | 25 +- ...ux.1-image-generation-with-output_20_1.jpg | 4 +- ...ux.1-image-generation-with-output_20_1.png | 4 +- .../freevc-voice-conversion-with-output.rst | 213 ++- docs/notebooks/gpu-device-with-output.rst | 50 +- .../grammar-correction-with-output.rst | 31 +- .../grounded-segment-anything-with-output.rst | 27 +- .../notebooks/handwritten-ocr-with-output.rst | 49 +- .../notebooks/hello-detection-with-output.rst | 45 +- docs/notebooks/hello-npu-with-output.rst | 47 +- .../hello-segmentation-with-output.rst | 55 +- docs/notebooks/hello-world-with-output.rst | 39 +- .../hugging-face-hub-with-output.rst | 149 +- ...nyuan-dit-image-generation-with-output.rst | 205 ++- docs/notebooks/image-bind-with-output.rst | 19 +- ...lassification-quantization-with-output.rst | 68 +- docs/notebooks/instant-id-with-output.rst | 32 +- ...ruct-pix2pix-image-editing-with-output.rst | 289 ++-- docs/notebooks/internvl2-with-output.rst | 556 +++++++ .../internvl2-with-output_16_0.jpg | 3 + .../internvl2-with-output_16_0.png | 3 + docs/notebooks/jina-clip-with-output.rst | 13 +- .../jina-clip-with-output_39_0.png | 4 +- .../knowledge-graphs-conve-with-output.rst | 31 +- ...modal-large-language-model-with-output.rst | 587 +++++-- ...-large-language-model-with-output_29_1.jpg | 4 +- ...-large-language-model-with-output_29_1.png | 4 +- ...-large-language-model-with-output_48_1.png | 4 +- ...l-large-language-model-with-output_8_0.jpg | 4 +- ...l-large-language-model-with-output_8_0.png | 4 +- .../language-quantize-bert-with-output.rst | 87 +- ...cy-models-image-generation-with-output.rst | 22 +- ...stency-models-optimum-demo-with-output.rst | 165 +- ...y-models-optimum-demo-with-output_15_1.jpg | 4 +- ...y-models-optimum-demo-with-output_15_1.png | 4 +- ...cy-models-optimum-demo-with-output_8_1.jpg | 4 +- ...cy-models-optimum-demo-with-output_8_1.png | 4 +- .../lcm-lora-controlnet-with-output.rst | 300 ++-- .../llava-multimodal-chatbot-with-output.rst | 21 +- ...va-next-multimodal-chatbot-with-output.rst | 33 +- ...lm-agent-functioncall-qwen-with-output.rst | 235 +-- .../llm-agent-rag-llamaindex-with-output.rst | 395 ++--- .../llm-agent-react-langchain-with-output.rst | 142 +- .../llm-chatbot-generate-api-with-output.rst | 135 +- docs/notebooks/llm-chatbot-with-output.rst | 316 ++-- .../llm-question-answering-with-output.rst | 204 +-- .../llm-rag-langchain-with-output.rst | 647 +++----- .../llm-rag-llamaindex-with-output.rst | 533 ++---- ...a-content-type-recognition-with-output.rst | 84 +- docs/notebooks/meter-reader-with-output.rst | 193 +-- ...nicpm-v-multimodal-chatbot-with-output.rst | 407 ++++- ...sively-multilingual-speech-with-output.rst | 101 +- .../mobileclip-video-search-with-output.rst | 15 +- ...bilevlm-language-assistant-with-output.rst | 60 +- docs/notebooks/model-server-with-output.rst | 1 + .../music-generation-with-output.rst | 142 +- .../named-entity-recognition-with-output.rst | 47 +- ...o-llava-multimodal-chatbot-with-output.rst | 108 +- ...tract-structure-extraction-with-output.rst | 89 +- .../object-detection-with-output.rst | 4 +- .../object-detection-with-output_19_0.png | 4 +- .../oneformer-segmentation-with-output.rst | 65 +- docs/notebooks/openvino-api-with-output.rst | 130 +- .../openvino-tokenizers-with-output.rst | 263 +-- docs/notebooks/openvoice-with-output.rst | 537 +++---- ...ical-character-recognition-with-output.rst | 306 ++-- .../optimize-preprocessing-with-output.rst | 11 +- .../paddle-ocr-webcam-with-output.rst | 5 +- .../paddle-ocr-webcam-with-output_30_0.png | 4 +- ...to-openvino-classification-with-output.rst | 24 +- .../paint-by-example-with-output.rst | 95 +- .../parler-tts-text-to-speech-with-output.rst | 182 +-- .../notebooks/person-counting-with-output.rst | 269 ++++ .../person-counting-with-output_14_0.png | 3 + .../notebooks/person-tracking-with-output.rst | 1 + .../person-tracking-with-output_25_0.png | 4 +- docs/notebooks/phi-3-vision-with-output.rst | 142 +- ....jpg => phi-3-vision-with-output_14_1.jpg} | 0 ....png => phi-3-vision-with-output_14_1.png} | 0 docs/notebooks/photo-maker-with-output.rst | 153 +- .../photo-maker-with-output_33_0.png | 4 +- .../pix2struct-docvqa-with-output.rst | 63 +- docs/notebooks/pixart-with-output.rst | 1422 ++++++++++++++++- .../pixart-with-output_26_0.jpg | 3 - .../pixart-with-output_26_0.png | 3 - .../pixart-with-output_27_0.jpg | 3 + .../pixart-with-output_27_0.png | 3 + .../pixart-with-output_40_2.png | 3 + ...put_5_0.jpg => pixart-with-output_6_0.jpg} | 0 ...put_5_0.png => pixart-with-output_6_0.png} | 0 .../notebooks/pose-estimation-with-output.rst | 8 +- .../pose-estimation-with-output_22_0.png | 4 +- .../pytorch-onnx-to-openvino-with-output.rst | 12 +- ...training-quantization-nncf-with-output.rst | 110 +- ...uantization-aware-training-with-output.rst | 82 +- ...on-sparsity-aware-training-with-output.rst | 360 ++--- .../pytorch-to-openvino-with-output.rst | 29 +- docs/notebooks/qrcode-monster-with-output.rst | 92 +- docs/notebooks/qwen2-audio-with-output.rst | 604 +++++++ docs/notebooks/qwen2-vl-with-output.rst | 606 +++++++ .../qwen2-vl-with-output_16_0.jpg | 3 + .../qwen2-vl-with-output_16_0.png | 3 + .../riffusion-text-to-music-with-output.rst | 75 +- .../rmbg-background-removal-with-output.rst | 148 +- ...ce-text-to-video-retrieval-with-output.rst | 115 +- docs/notebooks/sdxl-turbo-with-output.rst | 194 +-- .../segment-anything-with-output.rst | 257 +-- docs/notebooks/segmind-vegart-with-output.rst | 51 +- ...-shot-image-classification-with-output.rst | 31 +- ...-image-classification-with-output_24_1.png | 4 +- ...tch-to-image-pix2pix-turbo-with-output.rst | 504 ++---- ...o-image-pix2pix-turbo-with-output_18_0.jpg | 4 +- ...o-image-pix2pix-turbo-with-output_18_0.png | 4 +- .../softvc-voice-conversion-with-output.rst | 82 +- ...sound-generation-audioldm2-with-output.rst | 174 +- .../sparsity-optimization-with-output.rst | 72 +- .../speculative-sampling-with-output.rst | 144 +- ...tion-quantization-wav2vec2-with-output.rst | 172 +- ...hbrain-emotion-recognition-with-output.rst | 116 +- docs/notebooks/stable-audio-with-output.rst | 71 +- ...e-cascade-image-generation-with-output.rst | 150 +- ...cade-image-generation-with-output_29_2.jpg | 4 +- ...cade-image-generation-with-output_29_2.png | 4 +- ...table-diffusion-ip-adapter-with-output.rst | 78 +- ...-diffusion-ip-adapter-with-output_22_1.png | 4 +- ...-diffusion-ip-adapter-with-output_25_0.png | 4 +- ...-diffusion-ip-adapter-with-output_28_0.png | 4 +- .../stable-diffusion-keras-cv-with-output.rst | 325 ++-- ...le-diffusion-text-to-image-with-output.rst | 17 +- ...fusion-torchdynamo-backend-with-output.rst | 109 +- ...n-torchdynamo-backend-with-output_14_1.jpg | 3 + ...n-torchdynamo-backend-with-output_14_1.png | 3 + ...n-torchdynamo-backend-with-output_14_2.jpg | 3 - ...n-torchdynamo-backend-with-output_14_2.png | 3 - ...diffusion-v2-infinite-zoom-with-output.rst | 106 +- ...-diffusion-v2-optimum-demo-with-output.rst | 19 +- ...sion-v2-text-to-image-demo-with-output.rst | 26 +- ...diffusion-v2-text-to-image-with-output.rst | 272 ++-- .../stable-diffusion-v3-with-output.rst | 98 +- .../stable-diffusion-xl-with-output.rst | 22 +- docs/notebooks/stable-fast-3d-with-output.rst | 319 ++++ .../stable-video-diffusion-with-output.rst | 170 +- docs/notebooks/style-transfer-with-output.rst | 67 +- .../style-transfer-with-output_25_0.png | 4 +- ...-line-level-text-detection-with-output.rst | 191 +-- .../table-question-answering-with-output.rst | 142 +- ...fication-nncf-quantization-with-output.rst | 35 +- ...ion-nncf-quantization-with-output_10_1.png | 3 + ...tion-nncf-quantization-with-output_9_1.png | 3 + ...classification-to-openvino-with-output.rst | 43 +- docs/notebooks/tensorflow-hub-with-output.rst | 68 +- ...e-segmentation-to-openvino-with-output.rst | 106 +- ...mentation-to-openvino-with-output_39_0.png | 4 +- ...ject-detection-to-openvino-with-output.rst | 110 +- ...detection-to-openvino-with-output_38_0.png | 4 +- ...uantization-aware-training-with-output.rst | 80 +- ...tflite-selfie-segmentation-with-output.rst | 5 +- ...e-selfie-segmentation-with-output_33_0.png | 4 +- .../tflite-to-openvino-with-output.rst | 61 +- .../tiny-sd-image-generation-with-output.rst | 103 +- .../triposr-3d-reconstruction-with-output.rst | 288 ++-- docs/notebooks/typo-detector-with-output.rst | 50 +- ...-detection-and-recognition-with-output.rst | 1 + ...eollava-multimodal-chatbot-with-output.rst | 17 +- .../vision-background-removal-with-output.rst | 7 +- .../vision-monodepth-with-output.rst | 94 +- ...isper-subtitles-generation-with-output.rst | 17 +- ...uerstchen-image-generation-with-output.rst | 50 +- .../yolov10-optimization-with-output.rst | 74 +- .../yolov7-optimization-with-output.rst | 1299 --------------- .../yolov7-optimization-with-output_10_0.jpg | 3 - .../yolov7-optimization-with-output_10_0.png | 3 - .../yolov7-optimization-with-output_27_0.jpg | 3 - .../yolov7-optimization-with-output_27_0.png | 3 - .../yolov7-optimization-with-output_44_0.jpg | 3 - .../yolov7-optimization-with-output_44_0.png | 3 - ...lov8-instance-segmentation-with-output.rst | 240 ++- ...instance-segmentation-with-output_18_1.jpg | 3 - ...instance-segmentation-with-output_18_1.png | 4 +- ...instance-segmentation-with-output_46_1.jpg | 3 - ...instance-segmentation-with-output_46_1.png | 4 +- ...instance-segmentation-with-output_62_0.png | 4 +- ...-instance-segmentation-with-output_9_3.jpg | 3 - ...-instance-segmentation-with-output_9_3.png | 4 +- .../yolov8-keypoint-detection-with-output.rst | 378 +++-- ...v8-keypoint-detection-with-output_16_1.jpg | 3 - ...v8-keypoint-detection-with-output_16_1.png | 4 +- ...v8-keypoint-detection-with-output_44_1.jpg | 3 - ...v8-keypoint-detection-with-output_44_1.png | 4 +- ...v8-keypoint-detection-with-output_60_0.png | 4 +- ...ov8-keypoint-detection-with-output_9_3.jpg | 3 - ...ov8-keypoint-detection-with-output_9_3.png | 4 +- docs/notebooks/yolov8-obb-with-output.rst | 3 + .../yolov8-object-detection-with-output.rst | 459 +++++- ...lov8-object-detection-with-output_16_1.jpg | 3 - ...lov8-object-detection-with-output_16_1.png | 4 +- ...lov8-object-detection-with-output_43_1.jpg | 3 - ...lov8-object-detection-with-output_43_1.png | 4 +- ...lov8-object-detection-with-output_70_0.jpg | 3 - ...lov8-object-detection-with-output_70_0.png | 4 +- ...lov8-object-detection-with-output_76_0.png | 4 +- ...olov8-object-detection-with-output_9_3.jpg | 3 - ...olov8-object-detection-with-output_9_3.png | 4 +- ...tion-with-accuracy-control-with-output.rst | 1 + .../yolov9-optimization-with-output.rst | 68 +- .../yolov9-optimization-with-output_36_0.png | 4 +- .../zeroscope-text2video-with-output.rst | 53 +- 268 files changed, 13193 insertions(+), 11169 deletions(-) delete mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.jpg delete mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.png create mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.jpg create mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.png delete mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.jpg delete mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.png create mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.jpg create mode 100644 docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.png rename docs/notebooks/amused-lightweight-text-to-image-with-output_files/{amused-lightweight-text-to-image-with-output_6_0.jpg => amused-lightweight-text-to-image-with-output_7_0.jpg} (100%) rename docs/notebooks/amused-lightweight-text-to-image-with-output_files/{amused-lightweight-text-to-image-with-output_6_0.png => amused-lightweight-text-to-image-with-output_7_0.png} (100%) create mode 100644 docs/notebooks/internvl2-with-output.rst create mode 100644 docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.jpg create mode 100644 docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.png create mode 100644 docs/notebooks/person-counting-with-output.rst create mode 100644 docs/notebooks/person-counting-with-output_files/person-counting-with-output_14_0.png rename docs/notebooks/phi-3-vision-with-output_files/{phi-3-vision-with-output_12_1.jpg => phi-3-vision-with-output_14_1.jpg} (100%) rename docs/notebooks/phi-3-vision-with-output_files/{phi-3-vision-with-output_12_1.png => phi-3-vision-with-output_14_1.png} (100%) delete mode 100644 docs/notebooks/pixart-with-output_files/pixart-with-output_26_0.jpg delete mode 100644 docs/notebooks/pixart-with-output_files/pixart-with-output_26_0.png create mode 100644 docs/notebooks/pixart-with-output_files/pixart-with-output_27_0.jpg create mode 100644 docs/notebooks/pixart-with-output_files/pixart-with-output_27_0.png create mode 100644 docs/notebooks/pixart-with-output_files/pixart-with-output_40_2.png rename docs/notebooks/pixart-with-output_files/{pixart-with-output_5_0.jpg => pixart-with-output_6_0.jpg} (100%) rename docs/notebooks/pixart-with-output_files/{pixart-with-output_5_0.png => pixart-with-output_6_0.png} (100%) create mode 100644 docs/notebooks/qwen2-audio-with-output.rst create mode 100644 docs/notebooks/qwen2-vl-with-output.rst create mode 100644 docs/notebooks/qwen2-vl-with-output_files/qwen2-vl-with-output_16_0.jpg create mode 100644 docs/notebooks/qwen2-vl-with-output_files/qwen2-vl-with-output_16_0.png create mode 100644 docs/notebooks/stable-diffusion-torchdynamo-backend-with-output_files/stable-diffusion-torchdynamo-backend-with-output_14_1.jpg create mode 100644 docs/notebooks/stable-diffusion-torchdynamo-backend-with-output_files/stable-diffusion-torchdynamo-backend-with-output_14_1.png delete mode 100644 docs/notebooks/stable-diffusion-torchdynamo-backend-with-output_files/stable-diffusion-torchdynamo-backend-with-output_14_2.jpg delete mode 100644 docs/notebooks/stable-diffusion-torchdynamo-backend-with-output_files/stable-diffusion-torchdynamo-backend-with-output_14_2.png create mode 100644 docs/notebooks/stable-fast-3d-with-output.rst create mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png create mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png delete mode 100644 docs/notebooks/yolov7-optimization-with-output.rst delete mode 100644 docs/notebooks/yolov7-optimization-with-output_files/yolov7-optimization-with-output_10_0.jpg delete mode 100644 docs/notebooks/yolov7-optimization-with-output_files/yolov7-optimization-with-output_10_0.png delete mode 100644 docs/notebooks/yolov7-optimization-with-output_files/yolov7-optimization-with-output_27_0.jpg delete mode 100644 docs/notebooks/yolov7-optimization-with-output_files/yolov7-optimization-with-output_27_0.png delete mode 100644 docs/notebooks/yolov7-optimization-with-output_files/yolov7-optimization-with-output_44_0.jpg delete mode 100644 docs/notebooks/yolov7-optimization-with-output_files/yolov7-optimization-with-output_44_0.png delete mode 100644 docs/notebooks/yolov8-instance-segmentation-with-output_files/yolov8-instance-segmentation-with-output_18_1.jpg delete mode 100644 docs/notebooks/yolov8-instance-segmentation-with-output_files/yolov8-instance-segmentation-with-output_46_1.jpg delete mode 100644 docs/notebooks/yolov8-instance-segmentation-with-output_files/yolov8-instance-segmentation-with-output_9_3.jpg delete mode 100644 docs/notebooks/yolov8-keypoint-detection-with-output_files/yolov8-keypoint-detection-with-output_16_1.jpg delete mode 100644 docs/notebooks/yolov8-keypoint-detection-with-output_files/yolov8-keypoint-detection-with-output_44_1.jpg delete mode 100644 docs/notebooks/yolov8-keypoint-detection-with-output_files/yolov8-keypoint-detection-with-output_9_3.jpg delete mode 100644 docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_16_1.jpg delete mode 100644 docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_43_1.jpg delete mode 100644 docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_70_0.jpg delete mode 100644 docs/notebooks/yolov8-object-detection-with-output_files/yolov8-object-detection-with-output_9_3.jpg diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 76f783052ba445..70670e40a91790 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20240827220813/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20240923220849/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index 298b27841fbbb6..dd85261aab6e8d 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -11,14 +11,14 @@ of this notebook, you will see live inference results from your webcam out the algorithms. **Make sure you have properly installed the**\ `Jupyter extension `__\ **and -been using JupyterLab to run the demo as suggested in the -``README.md``** +been using JupyterLab to run the demo as suggested in the** +``README.md`` **NOTE**: *To use a webcam, you must run this Jupyter notebook on a computer with a webcam. If you run on a remote server, the webcam will not work. However, you can still do inference on a video file in - the final step. This demo utilizes the Python interface in - ``Three.js`` integrated with WebGL to process data from the model + the final step. This demo utilizes the Python interface in* + ``Three.js`` *integrated with WebGL to process data from the model inference. These results are processed and displayed in the notebook.* @@ -26,6 +26,7 @@ been using JupyterLab to run the demo as suggested in the recommended browser on one of the following operating systems:* *Ubuntu, Windows: Chrome* *macOS: Safari* + **Table of contents:** @@ -86,101 +87,13 @@ Prerequisites -**The ``pythreejs`` extension may not display properly when using a +**The** ``pythreejs`` **extension may not display properly when using a Jupyter Notebook release. Therefore, it is recommended to use Jupyter Lab instead.** .. code:: ipython3 - %pip install pythreejs "openvino-dev>=2024.0.0" "opencv-python" "torch" "onnx" --extra-index-url https://download.pytorch.org/whl/cpu - - -.. parsed-literal:: - - Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Collecting pythreejs - Using cached pythreejs-2.4.2-py3-none-any.whl.metadata (5.4 kB) - Collecting openvino-dev>=2024.0.0 - Using cached openvino_dev-2024.3.0-16041-py3-none-any.whl.metadata (16 kB) - Collecting opencv-python - Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB) - Collecting torch - Using cached https://download.pytorch.org/whl/cpu/torch-2.4.0%2Bcpu-cp38-cp38-linux_x86_64.whl (195.0 MB) - Collecting onnx - Using cached onnx-1.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) - Collecting ipydatawidgets>=1.1.1 (from pythreejs) - Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) - Collecting numpy (from pythreejs) - Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) - Requirement already satisfied: defusedxml>=0.7.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (0.7.1) - Collecting networkx<=3.1.0 (from openvino-dev>=2024.0.0) - Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) - Collecting openvino-telemetry>=2023.2.1 (from openvino-dev>=2024.0.0) - Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (24.1) - Requirement already satisfied: pyyaml>=5.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (6.0.2) - Requirement already satisfied: requests>=2.25.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino-dev>=2024.0.0) (2.32.0) - Collecting openvino==2024.3.0 (from openvino-dev>=2024.0.0) - Using cached openvino-2024.3.0-16041-cp38-cp38-manylinux2014_x86_64.whl.metadata (8.9 kB) - Collecting filelock (from torch) - Using cached filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) - Collecting sympy (from torch) - Using cached sympy-1.13.2-py3-none-any.whl.metadata (12 kB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) - Collecting fsspec (from torch) - Using cached fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB) - Collecting protobuf>=3.20.2 (from onnx) - Using cached protobuf-5.27.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) - Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) - Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (3.8) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2.2.2) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.25.1->openvino-dev>=2024.0.0) (2024.7.4) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) - Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) - Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.47) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.0.1) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) - Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) - Using cached openvino_dev-2024.3.0-16041-py3-none-any.whl (4.7 MB) - Using cached openvino-2024.3.0-16041-cp38-cp38-manylinux2014_x86_64.whl (40.5 MB) - Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) - Using cached onnx-1.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB) - Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) - Using cached networkx-3.1-py3-none-any.whl (2.1 MB) - Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) - Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) - Using cached protobuf-5.27.3-cp38-abi3-manylinux2014_x86_64.whl (309 kB) - Using cached filelock-3.15.4-py3-none-any.whl (16 kB) - Using cached fsspec-2024.6.1-py3-none-any.whl (177 kB) - Using cached sympy-1.13.2-py3-none-any.whl (6.2 MB) - Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) - Installing collected packages: openvino-telemetry, mpmath, traittypes, sympy, protobuf, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, onnx, openvino-dev, ipydatawidgets, pythreejs - Successfully installed filelock-3.15.4 fsspec-2024.6.1 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 onnx-1.16.2 opencv-python-4.10.0.84 openvino-2024.3.0 openvino-dev-2024.3.0 openvino-telemetry-2024.1.0 protobuf-5.27.3 pythreejs-2.4.2 sympy-1.13.2 torch-2.4.0+cpu traittypes-0.2.1 - Note: you may need to restart the kernel to use updated packages. - + %pip install pythreejs "openvino-dev>=2024.0.0" "opencv-python" "torch" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu Imports ------- @@ -192,28 +105,28 @@ Imports import collections import time from pathlib import Path - + import cv2 import ipywidgets as widgets import numpy as np from IPython.display import clear_output, display import openvino as ov - + # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) with open("notebook_utils.py", "w") as f: f.write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", ) with open("engine3js.py", "w") as f: f.write(r.text) - + import notebook_utils as utils import engine3js as engine @@ -235,35 +148,23 @@ directory structure and downloads the selected model. # directory where model will be downloaded base_model_dir = "model" - + # model name as named in Open Model Zoo model_name = "human-pose-estimation-3d-0001" # selected precision (FP32, FP16) precision = "FP32" - + BASE_MODEL_NAME = f"{base_model_dir}/public/{model_name}/{model_name}" model_path = Path(BASE_MODEL_NAME).with_suffix(".pth") onnx_path = Path(BASE_MODEL_NAME).with_suffix(".onnx") - - ir_model_path = f"model/public/{model_name}/{precision}/{model_name}.xml" - model_weights_path = f"model/public/{model_name}/{precision}/{model_name}.bin" - + + ir_model_path = Path(f"model/public/{model_name}/{precision}/{model_name}.xml") + model_weights_path = Path(f"model/public/{model_name}/{precision}/{model_name}.bin") + if not model_path.exists(): download_command = f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}" ! $download_command - -.. parsed-literal:: - - ################|| Downloading human-pose-estimation-3d-0001 ||################ - - ========== Downloading model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - - - ========== Unpacking model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.tar.gz - - - Convert Model to OpenVINO IR format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -282,28 +183,6 @@ IR format. ) ! $convert_command - -.. parsed-literal:: - - ========== Converting human-pose-estimation-3d-0001 to ONNX - Conversion to ONNX command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py --model-path=model/public/human-pose-estimation-3d-0001 --model-name=PoseEstimationWithMobileNet --model-param=is_convertible_by_mo=True --import-module=model --weights=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.pth --input-shape=1,3,256,448 --input-names=data --output-names=features,heatmaps,pafs --output-file=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/omz_tools/internal_scripts/pytorch_to_onnx.py:147: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - model.load_state_dict(torch.load(weights, map_location='cpu')) - ONNX check passed successfully. - - ========== Converting human-pose-estimation-3d-0001 to IR (FP32) - Conversion command: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/bin/python -- /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/bin/mo --framework=onnx --output_dir=model/public/human-pose-estimation-3d-0001/FP32 --model_name=human-pose-estimation-3d-0001 --input=data '--mean_values=data[128.0,128.0,128.0]' '--scale_values=data[255.0,255.0,255.0]' --output=features,heatmaps,pafs --input_model=model/public/human-pose-estimation-3d-0001/human-pose-estimation-3d-0001.onnx '--layout=data(NCHW)' '--input_shape=[1, 3, 256, 448]' --compress_to_fp16=False - - [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. - In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. - Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html - [ SUCCESS ] Generated IR version 11 model. - [ SUCCESS ] XML file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml - [ SUCCESS ] BIN file: /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/3D-pose-estimation-webcam/model/public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.bin - - - Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -314,17 +193,8 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 device = utils.device_widget() - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - + device Load the model ~~~~~~~~~~~~~~ @@ -349,7 +219,7 @@ created to infer the compiled model. compiled_model = core.compile_model(model=model, device_name=device.value) infer_request = compiled_model.create_infer_request() input_tensor_name = model.inputs[0].get_any_name() - + # get input and output names of nodes input_layer = compiled_model.input(0) output_layers = list(compiled_model.outputs) @@ -361,15 +231,6 @@ heat maps, PAF (part affinity fields) and features. input_layer.any_name, [o.any_name for o in output_layers] - - - -.. parsed-literal:: - - ('data', ['features', 'heatmaps', 'pafs']) - - - Processing ---------- @@ -389,25 +250,25 @@ input for the 3D model. This is how you obtain the output heat maps, PAF def model_infer(scaled_img, stride): """ Run model inference on the input image - + Parameters: scaled_img: resized image according to the input size of the model stride: int, the stride of the window """ - + # Remove excess space from the picture img = scaled_img[ 0 : scaled_img.shape[0] - (scaled_img.shape[0] % stride), 0 : scaled_img.shape[1] - (scaled_img.shape[1] % stride), ] - + img = np.transpose(img, (2, 0, 1))[None,] infer_request.infer({input_tensor_name: img}) # A set of three inference results is obtained results = {name: infer_request.get_tensor(name).data[:] for name in {"features", "heatmaps", "pafs"}} # Get the results results = (results["features"][0], results["heatmaps"][0], results["pafs"][0]) - + return results Draw 2D Pose Overlays @@ -447,8 +308,8 @@ from Open Model Zoo. [13, 14], # neck - r_hip - r_knee - r_ankle ] ) - - + + body_edges_2d = np.array( [ [0, 1], # neck - nose @@ -470,25 +331,25 @@ from Open Model Zoo. [13, 14], # neck - r_hip - r_knee - r_ankle ] ) - - + + def draw_poses(frame, poses_2d, scaled_img, use_popup): """ Draw 2D pose overlays on the image to visualize estimated poses. Joints are drawn as circles and limbs are drawn as lines. - + :param frame: the input image :param poses_2d: array of human joint pairs """ for pose in poses_2d: pose = np.array(pose[0:-1]).reshape((-1, 3)).transpose() was_found = pose[2] > 0 - + pose[0], pose[1] = ( pose[0] * frame.shape[1] / scaled_img.shape[1], pose[1] * frame.shape[0] / scaled_img.shape[0], ) - + # Draw joints. for edge in body_edges_2d: if was_found[edge[0]] and was_found[edge[1]]: @@ -511,7 +372,7 @@ from Open Model Zoo. -1, cv2.LINE_AA, ) - + return frame Main Processing Function @@ -528,18 +389,18 @@ webcam feed or a video file. """ 2D image as input, using OpenVINO as inference backend, get joints 3D coordinates, and draw 3D human skeleton in the scene - + :param source: The webcam number to feed the video stream with primary webcam set to "0", or the video path. :param flip: To be used by VideoPlayer function for flipping capture image. :param use_popup: False for showing encoded frames over this notebook, True for creating a popup window. :param skip_frames: Number of frames to skip at the beginning of the video. """ - + focal_length = -1 # default stride = 8 player = None skeleton_set = None - + try: # create video player to play with target fps video_path # get the frame from camera @@ -547,16 +408,16 @@ webcam feed or a video file. player = utils.VideoPlayer(source, flip=flip, fps=30, skip_first_frames=skip_frames) # start capturing player.start() - + input_image = player.next() # set the window size resize_scale = 450 / input_image.shape[1] windows_width = int(input_image.shape[1] * resize_scale) windows_height = int(input_image.shape[0] * resize_scale) - + # use visualization library engine3D = engine.Engine3js(grid=True, axis=True, view_width=windows_width, view_height=windows_height) - + if use_popup: # display the 3D human pose in this notebook, and origin frame in popup window display(engine3D.renderer) @@ -566,43 +427,43 @@ webcam feed or a video file. # set the 2D image box, show both human pose and image in the notebook imgbox = widgets.Image(format="jpg", height=windows_height, width=windows_width) display(widgets.HBox([engine3D.renderer, imgbox])) - + skeleton = engine.Skeleton(body_edges=body_edges) - + processing_times = collections.deque() - + while True: # grab the frame frame = player.next() if frame is None: print("Source ended") break - + # resize image and change dims to fit neural network input # (see https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/human-pose-estimation-3d-0001) scaled_img = cv2.resize(frame, dsize=(model.inputs[0].shape[3], model.inputs[0].shape[2])) - + if focal_length < 0: # Focal length is unknown focal_length = np.float32(0.8 * scaled_img.shape[1]) - + # inference start start_time = time.time() # get results inference_result = model_infer(scaled_img, stride) - + # inference stop stop_time = time.time() processing_times.append(stop_time - start_time) # Process the point to point coordinates of the data poses_3d, poses_2d = engine.parse_poses(inference_result, 1, stride, focal_length, True) - + # use processing times from last 200 frames if len(processing_times) > 200: processing_times.popleft() - + processing_time = np.mean(processing_times) * 1000 fps = 1000 / processing_time - + if len(poses_3d) > 0: # From here, you can rotate the 3D point positions using the function "draw_poses", # or you can directly make the correct mapping below to properly display the object image on the screen @@ -615,28 +476,28 @@ webcam feed or a video file. -y + np.ones(poses_3d[:, 2::4].shape) * 100, -x, ) - + poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3] people = skeleton(poses_3d=poses_3d) - + try: engine3D.scene_remove(skeleton_set) except Exception: pass - + engine3D.scene_add(people) skeleton_set = people - + # draw 2D frame = draw_poses(frame, poses_2d, scaled_img, use_popup) - + else: try: engine3D.scene_remove(skeleton_set) skeleton_set = None except Exception: pass - + cv2.putText( frame, f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", @@ -647,7 +508,7 @@ webcam feed or a video file. 1, cv2.LINE_AA, ) - + if use_popup: cv2.imshow(title, frame) key = cv2.waitKey(1) @@ -661,9 +522,9 @@ webcam feed or a video file. frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90], )[1].tobytes() - + engine3D.renderer.render(engine3D.scene, engine3D.cam) - + except KeyboardInterrupt: print("Interrupted") except RuntimeError as e: @@ -710,10 +571,10 @@ picture on the left to interact. .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 - video_path = "https://github.com/intel-iot-devkit/sample-videos/raw/master/face-demographics-walking.mp4" - + video_path = "https://storage.openvinotoolkit.org/data/test_data/videos/face-demographics-walking.mp4" + source = cam_id if USE_WEBCAM else video_path - + run_pose_estimation(source=source, flip=isinstance(source, int), use_popup=False) diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index 838bae8fecd1f4..9c441fdf0d11c1 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -22,6 +22,7 @@ segmentation, to scene semantic parsing. It is highly efficient and effective, showing strong performance on par or even better than state of the art. + **Table of contents:** @@ -226,7 +227,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_56852/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_60730/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -320,7 +321,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_56852/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_60730/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output.rst b/docs/notebooks/action-recognition-webcam-with-output.rst index 946e0c42d0c086..6f9ff9a062ace7 100644 --- a/docs/notebooks/action-recognition-webcam-with-output.rst +++ b/docs/notebooks/action-recognition-webcam-with-output.rst @@ -35,6 +35,7 @@ Transformer and `ResNet34 `__. + **Table of contents:** diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png index 2840e4943d977c..76fe35e4bad0db 100644 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e39c1aea1fd6a34829f65cb9ce4fa87b8e9dcb718c27a71c1dfb9b3ec606469 -size 68359 +oid sha256:3abf9ccb9dfa3a1f55db22dbb1bbdd3ea307c9495ae9b756acbc071a036b86b5 +size 68096 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index e6a67c78a2bc1a..a1d92044cc9bf5 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -46,6 +46,7 @@ notebooks/image-bind/image-bind.ipynb notebooks/image-classification-quantization/image-classification-quantization.ipynb notebooks/instant-id/instant-id.ipynb notebooks/instruct-pix2pix-image-editing/instruct-pix2pix-image-editing.ipynb +notebooks/internvl2/internvl2.ipynb notebooks/jina-clip/jina-clip.ipynb notebooks/knowledge-graphs-conve/knowledge-graphs-conve.ipynb notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb @@ -101,6 +102,8 @@ notebooks/pytorch-to-openvino/pytorch-to-openvino.ipynb notebooks/qrcode-monster/qrcode-monster.ipynb notebooks/quantizing-model-with-accuracy-control/speech-recognition-quantization-wav2vec2.ipynb notebooks/quantizing-model-with-accuracy-control/yolov8-quantization-with-accuracy-control.ipynb +notebooks/qwen2-audio/qwen2-audio.ipynb +notebooks/qwen2-vl/qwen2-vl.ipynb notebooks/riffusion-text-to-music/riffusion-text-to-music.ipynb notebooks/rmbg-background-removal/rmbg-background-removal.ipynb notebooks/s3d-mil-nce-text-to-video-retrieval/s3d-mil-nce-text-to-video-retrieval.ipynb @@ -127,6 +130,7 @@ notebooks/stable-diffusion-v2/stable-diffusion-v2-text-to-image.ipynb notebooks/stable-diffusion-v3/stable-diffusion-v3.ipynb notebooks/stable-diffusion-xl/segmind-vegart.ipynb notebooks/stable-diffusion-xl/stable-diffusion-xl.ipynb +notebooks/stable-fast-3d/stable-fast-3d.ipynb notebooks/stable-video-diffusion/stable-video-diffusion.ipynb notebooks/style-transfer-webcam/style-transfer.ipynb notebooks/surya-line-level-text-detection/surya-line-level-text-detection.ipynb @@ -148,7 +152,6 @@ notebooks/vision-monodepth/vision-monodepth.ipynb notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb notebooks/wuerstchen-image-generation/wuerstchen-image-generation.ipynb notebooks/yolov10-optimization/yolov10-optimization.ipynb -notebooks/yolov7-optimization/yolov7-optimization.ipynb notebooks/yolov8-optimization/yolov8-instance-segmentation.ipynb notebooks/yolov8-optimization/yolov8-keypoint-detection.ipynb notebooks/yolov8-optimization/yolov8-obb.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst index 5d0105e469f6e0..7ea11af6e2eee3 100644 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst @@ -14,6 +14,7 @@ small parameter count and few forward pass generation process, amused can generate many images quickly. This benefit is seen particularly at larger batch sizes. + **Table of contents:** @@ -66,6 +67,25 @@ Prerequisites Note: you may need to restart the kernel to use updated packages. +.. code:: ipython3 + + # Fetch the notebook utils script from the openvino_notebooks repo + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + + + +.. parsed-literal:: + + 24692 + + + Load and run the original pipeline ---------------------------------- @@ -75,12 +95,12 @@ Load and run the original pipeline import torch from diffusers import AmusedPipeline - - + + pipe = AmusedPipeline.from_pretrained( "amused/amused-256", ) - + prompt = "kind smiling ghost" image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] image.save("text2image_256.png") @@ -105,7 +125,7 @@ Load and run the original pipeline -.. image:: amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_6_0.png +.. image:: amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_7_0.png @@ -134,8 +154,8 @@ Define paths for converted models: .. code:: ipython3 from pathlib import Path - - + + TRANSFORMER_OV_PATH = Path("models/transformer_ir.xml") TEXT_ENCODER_OV_PATH = Path("models/text_encoder_ir.xml") VQVAE_OV_PATH = Path("models/vqvae_ir.xml") @@ -148,10 +168,10 @@ file. .. code:: ipython3 import torch - + import openvino as ov - - + + def convert(model: torch.nn.Module, xml_path: str, example_input): xml_path = Path(xml_path) if not xml_path.exists(): @@ -159,7 +179,7 @@ file. with torch.no_grad(): converted_model = ov.convert_model(model, example_input=example_input) ov.save_model(converted_model, xml_path, compress_to_fp16=False) - + # cleanup memory torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() @@ -176,17 +196,17 @@ Convert the Text Encoder def __init__(self, text_encoder): super().__init__() self.text_encoder = text_encoder - + def forward(self, input_ids=None, return_dict=None, output_hidden_states=None): outputs = self.text_encoder( input_ids=input_ids, return_dict=return_dict, output_hidden_states=output_hidden_states, ) - + return outputs.text_embeds, outputs.last_hidden_state, outputs.hidden_states - - + + input_ids = pipe.tokenizer( prompt, return_tensors="pt", @@ -194,35 +214,35 @@ Convert the Text Encoder truncation=True, max_length=pipe.tokenizer.model_max_length, ) - + input_example = { "input_ids": input_ids.input_ids, "return_dict": torch.tensor(True), "output_hidden_states": torch.tensor(True), } - + convert(TextEncoderWrapper(pipe.text_encoder), TEXT_ENCODER_OV_PATH, input_example) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:86: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:797: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:797: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:802: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:825: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:825: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:828: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:828: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:924: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:924: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1415: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1415: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not return_dict: @@ -237,7 +257,7 @@ Convert the U-ViT transformer def __init__(self, transformer): super().__init__() self.transformer = transformer - + def forward( self, latents=None, @@ -251,21 +271,21 @@ Convert the U-ViT transformer pooled_text_emb=pooled_text_emb, encoder_hidden_states=encoder_hidden_states, ) - - + + shape = (1, 16, 16) latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) latents = torch.cat([latents] * 2) - - + + example_input = { "latents": latents, "micro_conds": torch.rand([2, 5], dtype=torch.float32), "pooled_text_emb": torch.rand([2, 768], dtype=torch.float32), "encoder_hidden_states": torch.rand([2, 77, 768], dtype=torch.float32), } - - + + pipe.transformer.eval() w_transformer = TransformerWrapper(pipe.transformer) convert(w_transformer, TRANSFORMER_OV_PATH, example_input) @@ -284,7 +304,7 @@ suitable. This function repeats part of ``AmusedPipeline``. shape = (1, 16, 16) latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) model_input = torch.cat([latents] * 2) - + model_output = pipe.transformer( model_input, micro_conds=torch.rand([2, 5], dtype=torch.float32), @@ -294,50 +314,50 @@ suitable. This function repeats part of ``AmusedPipeline``. guidance_scale = 10.0 uncond_logits, cond_logits = model_output.chunk(2) model_output = uncond_logits + guidance_scale * (cond_logits - uncond_logits) - + latents = pipe.scheduler.step( model_output=model_output, timestep=torch.tensor(0), sample=latents, ).prev_sample - + return latents - - + + class VQVAEWrapper(torch.nn.Module): def __init__(self, vqvae): super().__init__() self.vqvae = vqvae - + def forward(self, latents=None, force_not_quantize=True, shape=None): outputs = self.vqvae.decode( latents, force_not_quantize=force_not_quantize, shape=shape.tolist(), ) - + return outputs - - + + latents = get_latents() example_vqvae_input = { "latents": latents, "force_not_quantize": torch.tensor(True), "shape": torch.tensor((1, 16, 16, 64)), } - + convert(VQVAEWrapper(pipe.vqvae), VQVAE_OV_PATH, example_vqvae_input) .. parsed-literal:: - /tmp/ipykernel_57648/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /tmp/ipykernel_61545/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -350,17 +370,10 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 - import ipywidgets as widgets - - - core = ov.Core() - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) - + from notebook_utils import device_widget + + device = device_widget() + device @@ -374,6 +387,8 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 + core = ov.Core() + ov_text_encoder = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) ov_transformer = core.compile_model(TRANSFORMER_OV_PATH, device.value) ov_vqvae = core.compile_model(VQVAE_OV_PATH, device.value) @@ -385,29 +400,29 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. .. code:: ipython3 from collections import namedtuple - - + + class ConvTextEncoderWrapper(torch.nn.Module): def __init__(self, text_encoder, config): super().__init__() self.config = config self.text_encoder = text_encoder - + def forward(self, input_ids=None, return_dict=None, output_hidden_states=None): inputs = { "input_ids": input_ids, "return_dict": return_dict, "output_hidden_states": output_hidden_states, } - + outs = self.text_encoder(inputs) - + outputs = namedtuple("CLIPTextModelOutput", ("text_embeds", "last_hidden_state", "hidden_states")) - + text_embeds = torch.from_numpy(outs[0]) last_hidden_state = torch.from_numpy(outs[1]) hidden_states = list(torch.from_numpy(out) for out in outs.values())[2:] - + return outputs(text_embeds, last_hidden_state, hidden_states) .. code:: ipython3 @@ -417,7 +432,7 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. super().__init__() self.config = config self.transformer = transformer - + def forward(self, latents=None, micro_conds=None, pooled_text_emb=None, encoder_hidden_states=None, **kwargs): outputs = self.transformer( { @@ -428,7 +443,7 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. }, share_inputs=False, ) - + return torch.from_numpy(outputs[0]) .. code:: ipython3 @@ -439,17 +454,17 @@ wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. self.vqvae = vqvae self.dtype = dtype self.config = config - + def decode(self, latents=None, force_not_quantize=True, shape=None): inputs = { "latents": latents, "force_not_quantize": force_not_quantize, "shape": torch.tensor(shape), } - + outs = self.vqvae(inputs) outs = namedtuple("VQVAE", "sample")(torch.from_numpy(outs[0])) - + return outs And insert wrappers instances in the pipeline: @@ -457,25 +472,25 @@ And insert wrappers instances in the pipeline: .. code:: ipython3 prompt = "kind smiling ghost" - + transformer = pipe.transformer vqvae = pipe.vqvae text_encoder = pipe.text_encoder - + pipe.__dict__["_internal_dict"]["_execution_device"] = pipe._execution_device # this is to avoid some problem that can occur in the pipeline pipe.register_modules( text_encoder=ConvTextEncoderWrapper(ov_text_encoder, text_encoder.config), transformer=ConvTransformerWrapper(ov_transformer, transformer.config), vqvae=ConvVQVAEWrapper(ov_vqvae, vqvae.dtype, vqvae.config), ) - + image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] image.save("text2image_256.png") .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -492,7 +507,7 @@ And insert wrappers instances in the pipeline: -.. image:: amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.png +.. image:: amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.png @@ -537,10 +552,12 @@ improve model inference speed. .. code:: ipython3 + from notebook_utils import quantization_widget + QUANTIZED_TRANSFORMER_OV_PATH = Path(str(TRANSFORMER_OV_PATH).replace(".xml", "_quantized.xml")) - + skip_for_device = "GPU" in device.value - to_quantize = widgets.Checkbox(value=not skip_for_device, description="Quantization", disabled=skip_for_device) + to_quantize = quantization_widget(not skip_for_device) to_quantize @@ -554,13 +571,11 @@ improve model inference speed. .. code:: ipython3 - import requests - r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Prepare calibration dataset @@ -576,42 +591,42 @@ model inputs for calibration we customize ``CompiledModel``. .. code:: ipython3 %%skip not $to_quantize.value - + import datasets from tqdm.auto import tqdm from typing import Any, Dict, List import pickle import numpy as np - - + + def disable_progress_bar(pipeline, disable=True): if not hasattr(pipeline, "_progress_bar_config"): pipeline._progress_bar_config = {'disable': disable} else: pipeline._progress_bar_config['disable'] = disable - - + + class CompiledModelDecorator(ov.CompiledModel): def __init__(self, compiled_model: ov.CompiledModel, data_cache: List[Any] = None, keep_prob: float = 0.5): super().__init__(compiled_model) self.data_cache = data_cache if data_cache is not None else [] self.keep_prob = keep_prob - + def __call__(self, *args, **kwargs): if np.random.rand() <= self.keep_prob: self.data_cache.append(*args) return super().__call__(*args, **kwargs) - - + + def collect_calibration_data(ov_transformer_model, calibration_dataset_size: int) -> List[Dict]: calibration_dataset_filepath = Path(f"calibration_data/{calibration_dataset_size}.pkl") if not calibration_dataset_filepath.exists(): calibration_data = [] pipe.transformer.transformer = CompiledModelDecorator(ov_transformer_model, calibration_data, keep_prob=1.0) disable_progress_bar(pipe) - + dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42) - + # Run inference for data collection pbar = tqdm(total=calibration_dataset_size) for batch in dataset: @@ -622,14 +637,14 @@ model inputs for calibration we customize ``CompiledModel``. pbar.update(len(calibration_data) - pbar.n) if pbar.n >= calibration_dataset_size: break - + pipe.transformer.transformer = ov_transformer_model disable_progress_bar(pipe, disable=False) - + calibration_dataset_filepath.parent.mkdir(exist_ok=True, parents=True) with open(calibration_dataset_filepath, 'wb') as f: pickle.dump(calibration_data, f) - + with open(calibration_dataset_filepath, 'rb') as f: calibration_data = pickle.load(f) return calibration_data @@ -645,14 +660,14 @@ model. .. code:: ipython3 %%skip not $to_quantize.value - + from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters from nncf.quantization.range_estimator import RangeEstimatorParameters, StatisticsCollectorParameters, StatisticsType, \ AggregatorType import nncf - + CALIBRATION_DATASET_SIZE = 12 * 25 - + if not QUANTIZED_TRANSFORMER_OV_PATH.exists(): calibration_data = collect_calibration_data(ov_transformer, CALIBRATION_DATASET_SIZE) quantized_model = nncf.quantize( @@ -692,10 +707,8 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/storage.py:414: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - return torch.load(io.BytesIO(b)) @@ -727,7 +740,6 @@ model. .. parsed-literal:: INFO:nncf:3 ignored nodes were found by types in the NNCFGraph - INFO:nncf:182 ignored nodes were found by names in the NNCFGraph INFO:nncf:Not adding activation input quantizer for operation: 53 __module.transformer.embed.conv/aten::_convolution/Convolution INFO:nncf:Not adding activation input quantizer for operation: 1986 __module.transformer.mlm_layer.conv1/aten::_convolution/Convolution INFO:nncf:Not adding activation input quantizer for operation: 2927 __module.transformer.mlm_layer.conv2/aten::_convolution/Convolution @@ -748,17 +760,17 @@ model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/tensor/tensor.py:100: RuntimeWarning: invalid value encountered in multiply return Tensor(self.data * unwrap_tensor_data(other)) @@ -768,21 +780,21 @@ Demo generation with quantized pipeline .. code:: ipython3 %%skip not $to_quantize.value - + original_ov_transformer_model = pipe.transformer.transformer pipe.transformer.transformer = core.compile_model(QUANTIZED_TRANSFORMER_OV_PATH, device.value) - + image = pipe(prompt, generator=torch.Generator('cpu').manual_seed(8)).images[0] image.save('text2image_256_quantized.png') - + pipe.transformer.transformer = original_ov_transformer_model - + display(image) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -793,7 +805,7 @@ Demo generation with quantized pipeline -.. image:: amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.png +.. image:: amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.png Compute Inception Scores and inference time @@ -813,24 +825,24 @@ a rough estimate of generation quality. .. code:: ipython3 %%skip not $to_quantize.value - + from torchmetrics.image.inception import InceptionScore from torchvision import transforms as transforms from itertools import islice import time - + VALIDATION_DATASET_SIZE = 100 - + def compute_inception_score(ov_transformer_model_path, validation_set_size, batch_size=100): original_ov_transformer_model = pipe.transformer.transformer pipe.transformer.transformer = core.compile_model(ov_transformer_model_path, device.value) - + disable_progress_bar(pipe) dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", "unlabeled", split="validation", trust_remote_code=True).shuffle(seed=42) dataset = islice(dataset, validation_set_size) - + inception_score = InceptionScore(normalize=True, splits=1) - + images = [] infer_times = [] for batch in tqdm(dataset, total=validation_set_size, desc="Computing Inception Score"): @@ -842,21 +854,21 @@ a rough estimate of generation quality. infer_times.append(time.perf_counter() - start_time) image = transforms.ToTensor()(image) images.append(image) - + mean_perf_time = sum(infer_times) / len(infer_times) - + while len(images) > 0: images_batch = torch.stack(images[-batch_size:]) images = images[:-batch_size] inception_score.update(images_batch) kl_mean, kl_std = inception_score.compute() - + pipe.transformer.transformer = original_ov_transformer_model disable_progress_bar(pipe, disable=False) - + return kl_mean, mean_perf_time - - + + original_inception_score, original_time = compute_inception_score(TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) print(f"Original pipeline Inception Score: {original_inception_score}") quantized_inception_score, quantized_time = compute_inception_score(QUANTIZED_TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) @@ -866,7 +878,7 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. warnings.warn(\*args, \*\*kwargs) # noqa: B028 @@ -878,15 +890,13 @@ a rough estimate of generation quality. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. - deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:176: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/image/inception.py:175: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1808.) return kl.mean(), kl.std() .. parsed-literal:: - Original pipeline Inception Score: 11.146076202392578 + Original pipeline Inception Score: 11.875359535217285 @@ -897,8 +907,8 @@ a rough estimate of generation quality. .. parsed-literal:: - Quantized pipeline Inception Score: 9.630990028381348 - Quantization speed-up: 2.08x + Quantized pipeline Inception Score: 11.0730562210083 + Quantization speed-up: 2.06x Interactive inference @@ -910,14 +920,16 @@ Below you can select which pipeline to run: original or quantized. .. code:: ipython3 + import ipywidgets as widgets + quantized_model_present = QUANTIZED_TRANSFORMER_OV_PATH.exists() - + use_quantized_model = widgets.Checkbox( value=True if quantized_model_present else False, description="Use quantized pipeline", disabled=not quantized_model_present, ) - + use_quantized_model @@ -931,24 +943,23 @@ Below you can select which pipeline to run: original or quantized. .. code:: ipython3 - import requests from pathlib import Path - + if not Path("gradio_helper.py").exists(): r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/amused-lightweight-text-to-image/gradio_helper.py" ) open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + pipe.transformer.transformer = core.compile_model( QUANTIZED_TRANSFORMER_OV_PATH if use_quantized_model.value else TRANSFORMER_OV_PATH, device.value, ) - + demo = make_demo(pipe) - + try: demo.queue().launch(debug=False) except Exception: @@ -961,7 +972,7 @@ Below you can select which pipeline to run: original or quantized. .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.jpg b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.jpg deleted file mode 100644 index 2f80635f8c137d..00000000000000 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0ff16c3e16aa46d8734fca3c7bd1e1571222c5e95bfa9a8e464e980c38dc1b4 -size 5523 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.png b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.png deleted file mode 100644 index b7cd3bcc370215..00000000000000 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_28_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b874aa2efb3b7a438526ed7d8ba3f20fc4b164c6d25d70abd3b1d65dafa8673e -size 81854 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.jpg b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.jpg new file mode 100644 index 00000000000000..e58796ba05efb0 --- /dev/null +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ff9b8e08053e9085a87d478b11840166d75f914439548030623f6dcdeff6e1 +size 5463 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.png b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.png new file mode 100644 index 00000000000000..a26ff93feb8363 --- /dev/null +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_29_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbad4fe44f716d12763c49716a30e7efc1e309b326763247f54d435b0fb2b78 +size 81866 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.jpg b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.jpg deleted file mode 100644 index 944ca035340458..00000000000000 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7f3abc24fb5cb6c674974109aeb1e98eb87dfa188eaf36ffd314e0ed5d370824 -size 5343 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.png b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.png deleted file mode 100644 index 74c10d39f3f9af..00000000000000 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_37_2.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c76a4f5e4a5d2f65626843461cf1b637efdef7c8da3cba9ff63baf9602845e70 -size 78975 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.jpg b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.jpg new file mode 100644 index 00000000000000..8375676b065729 --- /dev/null +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d91d865dc9c27626eb7f4f75cde57d608404036d7df640cefbb8b15498cb73c +size 6085 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.png b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.png new file mode 100644 index 00000000000000..72abe7f263481a --- /dev/null +++ b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_38_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf53ee0112359fb6f61c06be7a8f14ac4832397d3c710f5d090680c8124c979f +size 86521 diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_6_0.jpg b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_7_0.jpg similarity index 100% rename from docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_6_0.jpg rename to docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_7_0.jpg diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_6_0.png b/docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_7_0.png similarity index 100% rename from docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_6_0.png rename to docs/notebooks/amused-lightweight-text-to-image-with-output_files/amused-lightweight-text-to-image-with-output_7_0.png diff --git a/docs/notebooks/animate-anyone-with-output.rst b/docs/notebooks/animate-anyone-with-output.rst index cb2397700d155f..1c5e5bd322fead 100644 --- a/docs/notebooks/animate-anyone-with-output.rst +++ b/docs/notebooks/animate-anyone-with-output.rst @@ -36,7 +36,7 @@ repo `__ and .. warning:: - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. + This tutorial requires at least **96 GB **of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. **Table of contents:** @@ -60,6 +60,8 @@ repo `__ and - `Video post-processing <#video-post-processing>`__ - `Interactive inference <#interactive-inference>`__ + + This is a self-contained example that relies solely on its own code. We recommend running the notebook in a virtual environment. You only @@ -92,6 +94,12 @@ Prerequisites url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + %load_ext skip_kernel_extension @@ -145,7 +153,6 @@ Note that we clone a fork of original repo with tweaked forward methods. from diffusers.image_processor import VaeImageProcessor from transformers import CLIPImageProcessor import torch - import ipywidgets as widgets from src.pipelines.pipeline_pose2vid_long import Pose2VideoPipeline from src.utils.util import get_fps, read_frames @@ -155,11 +162,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( @@ -201,7 +208,7 @@ Prepare base model if saved_path.exists(): continue hf_hub_download( - repo_id="runwayml/stable-diffusion-v1-5", + repo_id="botp/stable-diffusion-v1-5", subfolder=PurePosixPath(saved_path.parent.name), filename=PurePosixPath(saved_path.name), local_dir=local_dir, @@ -274,25 +281,25 @@ Download weights .. parsed-literal:: - diffusion_pytorch_model.bin: 0%| | 0.00/335M [00:00 - + Your browser does not support the video tag. diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 70df1a3e4e2400..74704ad6cc7fea 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -11,6 +11,7 @@ device is busy with inference, the application can perform other tasks in parallel (for example, populating inputs or scheduling other requests) rather than wait for the current inference to complete first. + **Table of contents:** @@ -354,7 +355,7 @@ Test performance in Sync Mode .. parsed-literal:: Source ended - average throuput in sync mode: 60.84 fps + average throuput in sync mode: 62.96 fps Async Mode @@ -493,7 +494,7 @@ Test the performance in Async Mode .. parsed-literal:: Source ended - average throuput in async mode: 104.64 fps + average throuput in async mode: 105.40 fps Compare the performance @@ -636,5 +637,5 @@ Test the performance with ``AsyncInferQueue`` .. parsed-literal:: - average throughput in async mode with async infer queue: 147.78 fps + average throughput in async mode with async infer queue: 140.92 fps diff --git a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png index 9a0d4ef3d21eea..66cfd441941135 100644 --- a/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png +++ b/docs/notebooks/async-api-with-output_files/async-api-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:940bdaaa5cb575d87dbcabd934cc9262c35556c1361832f4691acb6bd42a99ea -size 29427 +oid sha256:7ae89d325549471fb262f2c192fc3fc094fe0449df44306d0e4e5c8dfd349ccb +size 29439 diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index 5afc760c1c23e8..b19ab69ca4cd65 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -30,6 +30,7 @@ first inference. auto + **Table of contents:** @@ -79,10 +80,10 @@ Import modules and create Core .. code:: ipython3 import platform - + # Install required packages - %pip install -q "openvino>=2023.1.0" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu - + %pip install -q "openvino>=2023.1.0" "numpy<2" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" else: @@ -99,13 +100,13 @@ Import modules and create Core import time import sys - + import openvino as ov - + from IPython.display import Markdown, display - + core = ov.Core() - + if not any("GPU" in device for device in core.available_devices): display( Markdown( @@ -149,11 +150,11 @@ For more information about model conversion API, see this import torchvision from pathlib import Path - + base_model_dir = Path("./model") base_model_dir.mkdir(exist_ok=True) model_path = base_model_dir / "resnet50.xml" - + if not model_path.exists(): pt_model = torchvision.models.resnet50(weights="DEFAULT") ov_model = ov.convert_model(pt_model, input=[[1, 3, 224, 224]]) @@ -184,28 +185,31 @@ By default, ``compile_model`` API will select **AUTO** as .. code:: ipython3 + import openvino.properties.log as log + + # Set LOG_LEVEL to LOG_INFO. - core.set_property("AUTO", {"LOG_LEVEL": "LOG_INFO"}) - + core.set_property("AUTO", {log.level(): log.Level.INFO}) + # Load the model onto the target device. compiled_model = core.compile_model(ov_model) - + if isinstance(compiled_model, ov.CompiledModel): print("Successfully compiled model without a device_name.") .. parsed-literal:: - [23:28:52.0942]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [23:28:52.0942]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [23:28:52.0942]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [23:28:52.0942]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [23:28:52.0943]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [23:28:52.0943]I[schedule.cpp:17][AUTO] scheduler starting - [23:28:52.0943]I[auto_schedule.cpp:181][AUTO] select device:CPU - [23:28:52.2507]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 156.383395 ms - [23:28:52.2510]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [23:28:52.2512]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [23:33:07.3079]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [23:33:07.3079]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [23:33:07.3079]I[schedule.cpp:17][AUTO] scheduler starting + [23:33:07.3080]I[auto_schedule.cpp:181][AUTO] select device:CPU + [23:33:07.4176]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 109.630635 ms + [23:33:07.4178]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [23:33:07.4178]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -218,8 +222,8 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: + [23:33:07.4229]I[schedule.cpp:308][AUTO] scheduler ending Deleted compiled_model - [23:28:52.2625]I[schedule.cpp:308][AUTO] scheduler ending Explicitly pass AUTO as device_name to Core::compile_model API @@ -233,10 +237,10 @@ improve readability of your code. .. code:: ipython3 # Set LOG_LEVEL to LOG_NONE. - core.set_property("AUTO", {"LOG_LEVEL": "LOG_NONE"}) - + core.set_property("AUTO", {log.level(): log.Level.NO}) + compiled_model = core.compile_model(model=ov_model, device_name="AUTO") - + if isinstance(compiled_model, ov.CompiledModel): print("Successfully compiled model using AUTO.") @@ -286,25 +290,25 @@ function, we will reuse it for preparing input data. # Fetch `notebook_utils` module import requests - + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import download_file .. code:: ipython3 from PIL import Image - + # Download the image from the openvino_notebooks storage image_filename = download_file( "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco.jpg", directory="data", ) - + image = Image.open(str(image_filename)) input_transform = torchvision.models.ResNet50_Weights.DEFAULT.transforms() - + input_tensor = input_transform(image) input_tensor = input_tensor.unsqueeze(0).numpy() image @@ -335,10 +339,10 @@ Load the model to GPU device and perform inference # Start time. gpu_load_start_time = time.perf_counter() compiled_model = core.compile_model(model=ov_model, device_name="GPU") # load to GPU - + # Execute the first inference. results = compiled_model(input_tensor)[0] - + # Measure time to the first inference. gpu_fil_end_time = time.perf_counter() gpu_fil_span = gpu_fil_end_time - gpu_load_start_time @@ -364,11 +368,11 @@ executed on CPU until GPU is ready. # Start time. auto_load_start_time = time.perf_counter() compiled_model = core.compile_model(model=ov_model) # The device_name is AUTO by default. - + # Execute the first inference. results = compiled_model(input_tensor)[0] - - + + # Measure time to the first inference. auto_fil_end_time = time.perf_counter() auto_fil_span = auto_fil_end_time - auto_load_start_time @@ -377,7 +381,7 @@ executed on CPU until GPU is ready. .. parsed-literal:: - Time to load model using AUTO device and get first inference: 0.16 seconds. + Time to load model using AUTO device and get first inference: 0.13 seconds. .. code:: ipython3 @@ -421,7 +425,7 @@ Class and callback definition :member: latency_list: Record the latency of each inference execution over @interval seconds duration. :member: interval: The metrics will be updated every @interval seconds """ - + def __init__(self, interval): """ Create and initilize one instance of class PerformanceMetrics. @@ -431,11 +435,11 @@ Class and callback definition """ self.fps = 0 self.latency = 0 - + self.start_time = time.perf_counter() self.latency_list = [] self.interval = interval - + def update(self, infer_request: ov.InferRequest) -> bool: """ Update the metrics if current ongoing @interval seconds duration is expired. Record the latency only if it is not expired. @@ -457,8 +461,8 @@ Class and callback definition return True else: return False - - + + class InferContext: """ Inference context. Record and update peforamnce metrics via @metrics, set @feed_inference to False once @remaining_update_num <=0 @@ -466,7 +470,7 @@ Class and callback definition :member: remaining_update_num: the remaining times for peforamnce metrics updating. :member: feed_inference: if feed inference request is required or not. """ - + def __init__(self, update_interval, num): """ Create and initilize one instance of class InferContext. @@ -478,7 +482,7 @@ Class and callback definition self.metrics = PerformanceMetrics(update_interval) self.remaining_update_num = num self.feed_inference = True - + def update(self, infer_request: ov.InferRequest): """ Update the context. Set @feed_inference to False if the number of remaining performance metric updates (@remaining_update_num) reaches 0 @@ -487,13 +491,13 @@ Class and callback definition """ if self.remaining_update_num <= 0: self.feed_inference = False - + if self.metrics.update(infer_request): self.remaining_update_num = self.remaining_update_num - 1 if self.remaining_update_num <= 0: self.feed_inference = False - - + + def completion_callback(infer_request: ov.InferRequest, context) -> None: """ callback for the inference request, pass the @infer_request to @context for updating @@ -502,8 +506,8 @@ Class and callback definition :returns: None """ context.update(infer_request) - - + + # Performance metrics update interval (seconds) and number of times. metrics_update_interval = 10 metrics_update_num = 6 @@ -518,30 +522,33 @@ Loop for inference and update the FPS/Latency every .. code:: ipython3 + import openvino.properties.hint as hints + + THROUGHPUT_hint_context = InferContext(metrics_update_interval, metrics_update_num) - + print("Compiling Model for AUTO device with THROUGHPUT hint") sys.stdout.flush() - - compiled_model = core.compile_model(model=ov_model, config={"PERFORMANCE_HINT": "THROUGHPUT"}) - + + compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}) + infer_queue = ov.AsyncInferQueue(compiled_model, 0) # Setting to 0 will query optimal number by default. infer_queue.set_callback(completion_callback) - + print(f"Start inference, {metrics_update_num: .0f} groups of FPS/latency will be measured over {metrics_update_interval: .0f}s intervals") sys.stdout.flush() - + while THROUGHPUT_hint_context.feed_inference: infer_queue.start_async(input_tensor, THROUGHPUT_hint_context) - + infer_queue.wait_all() - + # Take the FPS and latency of the latest period. THROUGHPUT_hint_fps = THROUGHPUT_hint_context.metrics.fps THROUGHPUT_hint_latency = THROUGHPUT_hint_context.metrics.latency - + print("Done") - + del compiled_model @@ -549,12 +556,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 183.93fps, latency: 30.99ms, time interval: 10.02s - throughput: 183.54fps, latency: 31.90ms, time interval: 10.01s - throughput: 182.94fps, latency: 32.06ms, time interval: 10.00s - throughput: 183.07fps, latency: 31.98ms, time interval: 10.01s - throughput: 183.57fps, latency: 31.91ms, time interval: 10.01s - throughput: 184.26fps, latency: 31.79ms, time interval: 10.00s + throughput: 184.55fps, latency: 31.14ms, time interval: 10.00s + throughput: 183.88fps, latency: 31.86ms, time interval: 10.01s + throughput: 182.58fps, latency: 32.11ms, time interval: 10.00s + throughput: 183.39fps, latency: 31.91ms, time interval: 10.00s + throughput: 183.80fps, latency: 31.85ms, time interval: 10.01s + throughput: 183.74fps, latency: 31.86ms, time interval: 10.00s Done @@ -569,30 +576,30 @@ Loop for inference and update the FPS/Latency for each .. code:: ipython3 LATENCY_hint_context = InferContext(metrics_update_interval, metrics_update_num) - + print("Compiling Model for AUTO Device with LATENCY hint") sys.stdout.flush() - - compiled_model = core.compile_model(model=ov_model, config={"PERFORMANCE_HINT": "LATENCY"}) - + + compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode(): hints.PerformanceMode.LATENCY}) + # Setting to 0 will query optimal number by default. infer_queue = ov.AsyncInferQueue(compiled_model, 0) infer_queue.set_callback(completion_callback) - + print(f"Start inference, {metrics_update_num: .0f} groups fps/latency will be out with {metrics_update_interval: .0f}s interval") sys.stdout.flush() - + while LATENCY_hint_context.feed_inference: infer_queue.start_async(input_tensor, LATENCY_hint_context) - + infer_queue.wait_all() - + # Take the FPS and latency of the latest period. LATENCY_hint_fps = LATENCY_hint_context.metrics.fps LATENCY_hint_latency = LATENCY_hint_context.metrics.latency - + print("Done") - + del compiled_model @@ -600,12 +607,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 136.75fps, latency: 6.79ms, time interval: 10.00s - throughput: 140.82fps, latency: 6.69ms, time interval: 10.01s - throughput: 141.15fps, latency: 6.71ms, time interval: 10.00s - throughput: 141.14fps, latency: 6.71ms, time interval: 10.00s - throughput: 140.99fps, latency: 6.70ms, time interval: 10.00s - throughput: 141.25fps, latency: 6.71ms, time interval: 10.00s + throughput: 139.69fps, latency: 6.62ms, time interval: 10.00s + throughput: 141.89fps, latency: 6.61ms, time interval: 10.00s + throughput: 142.44fps, latency: 6.64ms, time interval: 10.00s + throughput: 142.12fps, latency: 6.61ms, time interval: 10.01s + throughput: 142.13fps, latency: 6.60ms, time interval: 10.00s + throughput: 141.86fps, latency: 6.66ms, time interval: 10.00s Done @@ -617,16 +624,16 @@ Difference in FPS and latency .. code:: ipython3 import matplotlib.pyplot as plt - + TPUT = 0 LAT = 1 labels = ["THROUGHPUT hint", "LATENCY hint"] - + fig1, ax1 = plt.subplots(1, 1) fig1.patch.set_visible(False) ax1.axis("tight") ax1.axis("off") - + cell_text = [] cell_text.append( [ @@ -635,7 +642,7 @@ Difference in FPS and latency ] ) cell_text.append(["%.2f%s" % (LATENCY_hint_fps, " FPS"), "%.2f%s" % (LATENCY_hint_latency, " ms")]) - + table = ax1.table( cellText=cell_text, colLabels=["FPS (Higher is better)", "Latency (Lower is better)"], @@ -650,7 +657,7 @@ Difference in FPS and latency table.auto_set_column_width(0) table.auto_set_column_width(1) table.scale(1, 3) - + fig1.tight_layout() plt.show() @@ -664,28 +671,28 @@ Difference in FPS and latency # Output the difference. width = 0.4 fontsize = 14 - + plt.rc("font", size=fontsize) fig, ax = plt.subplots(1, 2, figsize=(10, 8)) - + rects1 = ax[0].bar([0], THROUGHPUT_hint_fps, width, label=labels[TPUT], color="#557f2d") rects2 = ax[0].bar([width], LATENCY_hint_fps, width, label=labels[LAT]) ax[0].set_ylabel("frames per second") ax[0].set_xticks([width / 2]) ax[0].set_xticklabels(["FPS"]) ax[0].set_xlabel("Higher is better") - + rects1 = ax[1].bar([0], THROUGHPUT_hint_latency, width, label=labels[TPUT], color="#557f2d") rects2 = ax[1].bar([width], LATENCY_hint_latency, width, label=labels[LAT]) ax[1].set_ylabel("milliseconds") ax[1].set_xticks([width / 2]) ax[1].set_xticklabels(["Latency (ms)"]) ax[1].set_xlabel("Lower is better") - + fig.suptitle("Performance Hints") fig.legend(labels, fontsize=fontsize) fig.tight_layout() - + plt.show() diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index 62878b27cd2444..aca6ede1f773a1 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5697a7529322266f60af2b150f9e18856f2ca15581e99a6dee40b29f411a8c56 -size 26372 +oid sha256:8441bdf4d1afa3e31512c0f101778671ce6ad70445bebca05b730dd10219208f +size 26638 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index 57133e10154a84..f9d1b83be7e6de 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b700323973dd0e05b0c932768d3ed6677d05cc0206fb5043a0ee797ce728b1c5 -size 40079 +oid sha256:e7d5449cf917e635df8929f98f63f423e264eb608bd858c58097793690227522 +size 40041 diff --git a/docs/notebooks/bark-text-to-audio-with-output.rst b/docs/notebooks/bark-text-to-audio-with-output.rst index 5a29ff0e8c3ef8..631e1e652b363c 100644 --- a/docs/notebooks/bark-text-to-audio-with-output.rst +++ b/docs/notebooks/bark-text-to-audio-with-output.rst @@ -54,6 +54,7 @@ tokens into audio codec tokens to generate the full waveform. To enable the community to use Bark via public code, EnCodec codec from Facebook is used to act as an audio representation. + **Table of contents:** @@ -92,6 +93,15 @@ Prerequisites %pip install -q "openvino>=2023.1.0" "gradio>=4.19" %pip install -q "git+https://github.com/suno-ai/bark.git" --extra-index-url https://download.pytorch.org/whl/cpu +.. code:: ipython3 + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + Download and Convert models --------------------------- @@ -906,17 +916,9 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets - import openvino as ov + from notebook_utils import device_widget - core = ov.Core() - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) + device = device_widget() device diff --git a/docs/notebooks/blip-visual-language-processing-with-output.rst b/docs/notebooks/blip-visual-language-processing-with-output.rst index 32045e7fc15313..09d58ec75b4fd0 100644 --- a/docs/notebooks/blip-visual-language-processing-with-output.rst +++ b/docs/notebooks/blip-visual-language-processing-with-output.rst @@ -30,6 +30,7 @@ The tutorial consists of the following parts: 5. Compare original and optimized models 6. Launch interactive demo + **Table of contents:** @@ -70,6 +71,8 @@ The tutorial consists of the following parts: - `Interactive demo <#interactive-demo>`__ + + This is a self-contained example that relies solely on its own code. We recommend running the notebook in a virtual environment. You only diff --git a/docs/notebooks/clip-language-saliency-map-with-output.rst b/docs/notebooks/clip-language-saliency-map-with-output.rst index 066b31623d5a79..3c19a581410863 100644 --- a/docs/notebooks/clip-language-saliency-map-with-output.rst +++ b/docs/notebooks/clip-language-saliency-map-with-output.rst @@ -79,6 +79,7 @@ used to build the saliency map. Here is how it can be done: 5. Update the corresponding region on the ``saliency map``. 6. Repeat steps 2-5 multiple times (``n_iters``). + **Table of contents:** @@ -121,14 +122,14 @@ Initial Implementation with Transformers and Pytorch # Install requirements %pip install -q "openvino>=2023.1.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers "torch>=2.1" "gradio>=4.19" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers "numpy<2" "torch>=2.1" "gradio>=4.19" .. code:: ipython3 from pathlib import Path from typing import Tuple, Union, Optional import requests - + from matplotlib import colors import matplotlib.pyplot as plt import numpy as np @@ -136,6 +137,11 @@ Initial Implementation with Transformers and Pytorch import tqdm from PIL import Image from transformers import CLIPModel, CLIPProcessor + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) To get the CLIP model, you will use the ``transformers`` library and the official ``openai/clip-vit-base-patch16`` from OpenAI. You can use any @@ -151,7 +157,7 @@ steps. .. code:: ipython3 model_checkpoint = "openai/clip-vit-base-patch16" - + model = CLIPModel.from_pretrained(model_checkpoint).eval() processor = CLIPProcessor.from_pretrained(model_checkpoint) @@ -169,19 +175,19 @@ formula above. x = np.random.randint(image_width - crop_size + 1) y = np.random.randint(image_height - crop_size + 1) return x, y, crop_size - - + + def get_cropped_image(im_tensor: np.array, x: int, y: int, crop_size: int) -> np.array: return im_tensor[y : y + crop_size, x : x + crop_size, ...] - - + + def update_saliency_map(saliency_map: np.array, similarity: float, x: int, y: int, crop_size: int) -> None: saliency_map[ y : y + crop_size, x : x + crop_size, ] += similarity - - + + def cosine_similarity(one: Union[np.ndarray, torch.Tensor], other: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: return one @ other.T / (np.linalg.norm(one) * np.linalg.norm(other)) @@ -205,17 +211,17 @@ parameters at the end, when you get an optimized model. n_iters = 300 min_crop_size = 50 - + query = "Who developed the Theory of General Relativity?" image_path = Path("example.jpg") - + r = requests.get("https://github.com/user-attachments/assets/a5bedef2-e915-4286-bcc9-d599083a99a6") - + with image_path.open("wb") as f: f.write(r.content) image = Image.open(image_path) im_tensor = np.array(image) - + x_dim, y_dim = image.size Given the ``model`` and ``processor``, the actual inference is simple: @@ -249,15 +255,15 @@ items in the “How To Build a Saliency Map With CLIP?” list above. initial_similarity = cosine_similarity(results.text_embeds, results.image_embeds).item() # 1. Computing query and image similarity saliency_map = np.zeros((y_dim, x_dim)) - + for _ in tqdm.notebook.tqdm(range(n_iters)): # 6. Setting number of the procedure iterations x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) im_crop = get_cropped_image(im_tensor, x, y, crop_size) # 2. Getting a random crop of the image - + inputs = processor(text=[query], images=[im_crop], return_tensors="pt") with torch.no_grad(): results = model(**inputs) # 3. Computing crop and query similarity - + similarity = ( cosine_similarity(results.text_embeds, results.image_embeds).item() - initial_similarity ) # 4. Subtracting query and image similarity from crop and query similarity @@ -307,8 +313,8 @@ Let us overlay the saliency map on the image: plt.title(f'Query: "{query}"') plt.axis("off") return fig - - + + plot_saliency_map(im_tensor, saliency_map, query); @@ -335,21 +341,21 @@ obtain embeddings for the cropped images. with torch.no_grad(): results = model(**inputs) text_embeds = results.text_embeds # save text embeddings to use them later - + initial_similarity = cosine_similarity(text_embeds, results.image_embeds).item() saliency_map = np.zeros((y_dim, x_dim)) - + for _ in tqdm.notebook.tqdm(range(n_iters)): x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) im_crop = get_cropped_image(im_tensor, x, y, crop_size) - + image_inputs = processor(images=[im_crop], return_tensors="pt") # crop preprocessing with torch.no_grad(): image_embeds = model.get_image_features(**image_inputs) # calculate image embeddings only - + similarity = cosine_similarity(text_embeds, image_embeds).item() - initial_similarity update_saliency_map(saliency_map, similarity, x, y, crop_size) - + plot_saliency_map(im_tensor, saliency_map, query); @@ -393,9 +399,9 @@ details about that can be found in HuggingFace Transformers .. code:: ipython3 import openvino as ov - + model_name = model_checkpoint.split("/")[-1] - + model.config.torchscript = True model.forward = model.get_text_features text_ov_model = ov.convert_model( @@ -405,7 +411,7 @@ details about that can be found in HuggingFace Transformers "attention_mask": inputs.attention_mask, }, ) - + # get image size after preprocessing from the processor crops_info = processor.image_processor.crop_size.values() if hasattr(processor, "image_processor") else processor.feature_extractor.crop_size.values() model.forward = model.get_image_features @@ -414,12 +420,12 @@ details about that can be found in HuggingFace Transformers example_input={"pixel_values": inputs.pixel_values}, input=[1, 3, *crops_info], ) - + ov_dir = Path("ir") ov_dir.mkdir(exist_ok=True) text_model_path = ov_dir / f"{model_name}_text.xml" image_model_path = ov_dir / f"{model_name}_image.xml" - + # write resulting models on disk ov.save_model(text_ov_model, text_model_path) ov.save_model(image_ov_model, image_model_path) @@ -483,7 +489,7 @@ Inference with OpenVINO™ .. code:: ipython3 core = ov.Core() - + text_model = core.read_model(text_model_path) image_model = core.read_model(image_model_path) @@ -496,15 +502,10 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) - + from notebook_utils import device_widget + + device = device_widget() + device @@ -535,23 +536,23 @@ the inference process is mostly similar. text_inputs = dict(processor(text=[query], images=[im_tensor], return_tensors="np")) image_inputs = text_inputs.pop("pixel_values") - + text_embeds = text_model(text_inputs)[0] image_embeds = image_model(image_inputs)[0] - + initial_similarity = cosine_similarity(text_embeds, image_embeds) saliency_map = np.zeros((y_dim, x_dim)) - + for _ in tqdm.notebook.tqdm(range(n_iters)): x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) im_crop = get_cropped_image(im_tensor, x, y, crop_size) - + image_inputs = processor(images=[im_crop], return_tensors="np").pixel_values image_embeds = image_model(image_inputs)[image_model.output()] - + similarity = cosine_similarity(text_embeds, image_embeds) - initial_similarity update_saliency_map(saliency_map, similarity, x, y, crop_size) - + plot_saliency_map(im_tensor, saliency_map, query); @@ -599,24 +600,25 @@ performance hint. .. code:: ipython3 from typing import Dict, Any - - + import openvino.properties.hint as hints + + image_model = core.read_model(image_model_path) - + image_model = core.compile_model( model=image_model, device_name=device.value, - config={"PERFORMANCE_HINT": "THROUGHPUT"}, + config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}, ) .. code:: ipython3 text_inputs = dict(processor(text=[query], images=[im_tensor], return_tensors="np")) image_inputs = text_inputs.pop("pixel_values") - + text_embeds = text_model(text_inputs)[text_model.output()] image_embeds = image_model(image_inputs)[image_model.output()] - + initial_similarity = cosine_similarity(text_embeds, image_embeds) saliency_map = np.zeros((y_dim, x_dim)) @@ -639,14 +641,14 @@ should pass a progress bar object and call ``update`` method after user_data: Dict[str, Any], # data that you passed along with input pixel values ) -> None: pbar = user_data.pop("pbar") - + image_embeds = infer_request.get_output_tensor().data similarity = cosine_similarity(user_data.pop("text_embeds"), image_embeds) - user_data.pop("initial_similarity") update_saliency_map(**user_data, similarity=similarity) - + pbar.update(1) # update the progress bar - - + + infer_queue = ov.AsyncInferQueue(image_model) infer_queue.set_callback(completion_callback) @@ -670,9 +672,9 @@ should pass a progress bar object and call ``update`` method after for _ in range(n_iters): x, y, crop_size = get_random_crop_params(y_dim, x_dim, min_crop_size) im_crop = get_cropped_image(im_tensor, x, y, crop_size) - + image_inputs = processor(images=[im_crop], return_tensors="np") - + # push data to the queue infer_queue.start_async( # pass inference data as usual @@ -688,13 +690,13 @@ should pass a progress bar object and call ``update`` method after "pbar": pbar, }, ) - + # after you pushed all data to the queue you wait until all callbacks finished infer_queue.wait_all() - + return plot_saliency_map(im_tensor, saliency_map, query if include_query else None) - - + + infer( im_tensor, x_dim, @@ -731,8 +733,8 @@ Let us wrap all code in the function and add a user interface to it. .. code:: ipython3 import ipywidgets as widgets - - + + def build_saliency_map( image: Image, query: str, @@ -743,16 +745,16 @@ Let us wrap all code in the function and add a user interface to it. ): x_dim, y_dim = image.size im_tensor = np.array(image) - + text_inputs = dict(processor(text=[query], images=[im_tensor], return_tensors="np")) image_inputs = text_inputs.pop("pixel_values") - + text_embeds = text_model(text_inputs)[text_model.output()] image_embeds = image_model(image_inputs)[image_model.output()] - + initial_similarity = cosine_similarity(text_embeds, image_embeds) saliency_map = np.zeros((y_dim, x_dim)) - + return infer( im_tensor, x_dim, @@ -785,8 +787,8 @@ done so far in the notebook. max=200, description="min_crop_size", ) - - + + @widgets.interact_manual(image_link="", query="", n_iters=n_iters_widget, min_crop_size=min_crop_size_widget) def build_saliency_map_from_image_link( image_link: str, @@ -799,10 +801,10 @@ done so far in the notebook. except requests.RequestException as e: print(f"Cannot load image from link: {image_link}\nException: {e}") return - + image = Image.open(image_bytes) image = image.convert("RGB") # remove transparency channel or convert grayscale 1 channel to 3 channels - + build_saliency_map(image, query, n_iters, min_crop_size) @@ -817,15 +819,15 @@ The second version will enable loading the image from your computer. .. code:: ipython3 import io - - + + load_file_widget = widgets.FileUpload( accept="image/*", multiple=False, description="Image file", ) - - + + @widgets.interact_manual( file=load_file_widget, query="", @@ -844,9 +846,9 @@ The second version will enable loading the image from your computer. except Exception as e: print(f"Cannot load the image: {e}") return - + image = image.convert("RGB") - + build_saliency_map(image, query, n_iters, min_crop_size) @@ -866,11 +868,11 @@ Interactive demo with Gradio if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/clip-language-saliency-map/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(build_saliency_map) - + try: demo.queue().launch(debug=False) except Exception: @@ -883,7 +885,7 @@ Interactive demo with Gradio .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index 119682c9550680..5a2e54c69394a1 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -36,6 +36,7 @@ The notebook contains the following steps: 8. Compare performance of converted and quantized models. 9. Launch interactive demo + **Table of contents:** @@ -120,6 +121,13 @@ tokenizer and preparing the images. else: %pip install -q "matplotlib>=3.4,<3.7" + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + .. code:: ipython3 from transformers import CLIPProcessor, CLIPModel @@ -270,14 +278,9 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import device_widget - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) + device = device_widget() device @@ -337,11 +340,9 @@ inference faster. The optimization process contains the following steps: .. code:: ipython3 - to_quantize = widgets.Checkbox( - value=True, - description="Quantization", - disabled=False, - ) + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() to_quantize diff --git a/docs/notebooks/controlnet-stable-diffusion-with-output.rst b/docs/notebooks/controlnet-stable-diffusion-with-output.rst index 6b5850ee5e34ac..a922fe445a7897 100644 --- a/docs/notebooks/controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/controlnet-stable-diffusion-with-output.rst @@ -141,6 +141,7 @@ of the target in the image: This tutorial focuses mainly on conditioning by pose. However, the discussed steps are also applicable to other annotation modes. + **Table of contents:** @@ -199,6 +200,13 @@ Prerequisites %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" %pip install -q "diffusers>=0.14.0" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" "datasets>=2.14.6" "nncf>=2.7.0" + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) Instantiating Generation Pipeline --------------------------------- @@ -230,7 +238,7 @@ controlnet model and ``stable-diffusion-v1-5``: from diffusers import StableDiffusionControlNetPipeline, ControlNetModel controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_openpose", torch_dtype=torch.float32) - pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet) + pipe = StableDiffusionControlNetPipeline.from_pretrained("botp/stable-diffusion-v1-5", controlnet=controlnet) OpenPose ~~~~~~~~ @@ -412,6 +420,7 @@ model with the OpenVINO model, using the following code: def __init__(self, core, model_path, device="AUTO"): self.core = core self.model = core.read_model(model_path) + self._device = device self.compiled_model = core.compile_model(self.model, device) def __call__(self, input_tensor: torch.Tensor): @@ -441,7 +450,7 @@ model with the OpenVINO model, using the following code: None """ self.model.reshape({0: [1, 3, height, width]}) - self.compiled_model = self.core.compile_model(self.model) + self.compiled_model = self.core.compile_model(self.model, self._device) def parameters(self): Device = namedtuple("Device", ["device"]) @@ -459,14 +468,9 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import device_widget - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) + device = device_widget() device @@ -1006,7 +1010,7 @@ on OpenVINO. self.register_to_config(controlnet=core.compile_model(controlnet, device)) self.register_to_config(unet=core.compile_model(unet, device)) self.unet_out = self.unet.output(0) - self.vae_decoder = core.compile_model(vae_decoder) + self.vae_decoder = core.compile_model(vae_decoder, device) self.vae_decoder_out = self.vae_decoder.output(0) def __call__( @@ -1328,16 +1332,9 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets - core = ov.Core() - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="CPU", - description="Device:", - disabled=False, - ) + device = device_widget("CPU") device @@ -1419,7 +1416,9 @@ improve model inference speed. .. code:: ipython3 - to_quantize = widgets.Checkbox(value=True, description="Quantization") + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() to_quantize @@ -1760,6 +1759,8 @@ launch the interactive demo. .. code:: ipython3 + import ipywidgets as widgets + quantized_model_present = int8_pipe is not None use_quantized_model = widgets.Checkbox( diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index 73dbeaebdcc3c4..fd5f10a68ead6c 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -4,6 +4,7 @@ OpenVINO™ Model conversion This notebook shows how to convert a model from original framework format to OpenVINO Intermediate Representation (IR). + **Table of contents:** @@ -40,12 +41,12 @@ Guide =2024.0.0" "requests" "tqdm" "transformers[onnx]>=4.31" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" + "openvino-dev>=2024.0.0" "requests" "tqdm" "transformers>=4.31" "onnx<1.16.2" "torch>=2.1" "torchvision" "tensorflow_hub" "tensorflow" .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) + Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.2) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -191,13 +192,13 @@ NLP model from Hugging Face and export it in ONNX format: .. parsed-literal:: - 2024-08-27 23:49:38.725485: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-08-27 23:49:38.759500: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-09-23 23:53:52.285454: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-09-23 23:53:52.319037: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-08-27 23:49:39.287418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + 2024-09-23 23:53:52.844892: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -674,7 +675,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-08-27 23:49:56.464642: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-09-23 23:54:09.260359: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 5ff2a9d3477d0a..1204ea2c17f106 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -22,6 +22,7 @@ maintaining the simplicity and efficiency of standard ConvNets. The several pretrained ConvNeXt model. In this tutorial we will use ConvNeXt Tiny model. + **Table of contents:** @@ -73,14 +74,14 @@ image from an open dataset. .. code:: ipython3 import requests - + from torchvision.io import read_image import torchvision.transforms as transforms - - + + img_path = "cats_image.jpeg" r = requests.get("https://huggingface.co/datasets/huggingface/cats-image/resolve/main/cats_image.jpeg") - + with open(img_path, "wb") as f: f.write(r.content) image = read_image(img_path) @@ -101,12 +102,12 @@ models str: book_metadata_url = gutendex_url + "/books/" + str(book_id) @@ -494,15 +500,10 @@ For starting work, we should select device for inference first: .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import device_widget core = ov.Core() - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) + device = device_widget() device @@ -846,11 +847,10 @@ Save the OpenVINO model to disk for future use: .. code:: ipython3 - from openvino.runtime import serialize + from pathlib import Path - - ov_model_path = "ov_model/model.xml" - serialize(ov_model, ov_model_path) + ov_model_path = Path("ov_model/model.xml") + ov.save_model(ov_model, ov_model_path) To read the model from disk, use the ``read_model`` method of the ``Core`` object: @@ -892,11 +892,13 @@ parameters for execution on the available hardware. from typing import Any + import openvino.properties.hint as hints + compiled_throughput_hint = core.compile_model( ov_model, device_name=device.value, - config={"PERFORMANCE_HINT": "THROUGHPUT"}, + config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}, ) To further optimize hardware utilization, let’s change the inference @@ -1030,7 +1032,10 @@ Let’s compare the models and plot the results. .. code:: ipython3 - cpu_name = core.get_property("CPU", "FULL_DEVICE_NAME") + import openvino.properties as props + + + cpu_name = core.get_property("CPU", props.device.full_name) plot = sns.barplot(benchmark_dataframe, errorbar="sd") plot.set(ylabel="Sentences Per Second", title=f"Sentence Embeddings Benchmark\n{cpu_name}") diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index c340d54ba03d0e..90e4dd19d66f30 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -53,6 +53,7 @@ demonstration purposes, this tutorial will download one converted CT scan and use that scan for quantization and inference. For production purposes, use a representative dataset for quantizing the model. + **Table of contents:** @@ -98,9 +99,9 @@ Guide =2023.3.0" "monai>=0.9.1" "torchmetrics>=0.11.0" "nncf>=2.8.0" "opencv-python" torch tqdm --extra-index-url https://download.pytorch.org/whl/cpu - + if platform.system() != "Windows": %pip install -q "matplotlib>=3.4" else: @@ -122,15 +123,14 @@ Imports import logging import os - import random import time import warnings import zipfile from pathlib import Path from typing import Union - + warnings.filterwarnings("ignore", category=UserWarning) - + import cv2 import matplotlib.pyplot as plt import monai @@ -142,19 +142,19 @@ Imports from nncf.common.logging.logger import set_log_level from torchmetrics import F1Score as F1 import requests - - + + set_log_level(logging.ERROR) # Disables all NNCF info and warning messages - + # Fetch `notebook_utils` module r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget - + if not Path("./custom_segmentation.py").exists(): download_file(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/ct-segmentation-quantize/custom_segmentation.py") from custom_segmentation import SegmentationModel - + if not Path("./async_pipeline.py").exists(): download_file(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/ct-segmentation-quantize/async_pipeline.py") from async_pipeline import show_live_inference @@ -162,10 +162,10 @@ Imports .. parsed-literal:: - 2024-08-27 23:50:21.702393: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-08-27 23:50:21.736777: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-09-23 23:54:34.184267: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-09-23 23:54:34.218186: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-08-27 23:50:22.325338: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-09-23 23:54:34.809758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -207,13 +207,13 @@ notebook `__. state_dict_url = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/models/kidney-segmentation-kits19/unet_kits19_state_dict.pth" state_dict_file = download_file(state_dict_url, directory="pretrained_model") state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) - + new_state_dict = {} for k, v in state_dict.items(): new_key = k.replace("_model.", "") new_state_dict[new_key] = v new_state_dict.pop("loss_function.pos_weight") - + model = monai.networks.nets.BasicUNet(spatial_dims=2, in_channels=1, out_channels=1).eval() model.load_state_dict(new_state_dict) @@ -231,7 +231,7 @@ notebook `__. .. parsed-literal:: - /tmp/ipykernel_71353/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_75492/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -301,8 +301,8 @@ method to display the images in the expected orientation: def rotate_and_flip(image): """Rotate `image` by 90 degrees and flip horizontally""" return cv2.flip(cv2.rotate(image, rotateCode=cv2.ROTATE_90_CLOCKWISE), flipCode=1) - - + + class KitsDataset: def __init__(self, basedir: str): """ @@ -311,35 +311,35 @@ method to display the images in the expected orientation: with each subdirectory containing directories imaging_frames, with jpg images, and segmentation_frames with segmentation masks as png files. See [data-preparation-ct-scan](./data-preparation-ct-scan.ipynb) - + :param basedir: Directory that contains the prepared CT scans """ masks = sorted(BASEDIR.glob("case_*/segmentation_frames/*png")) - + self.basedir = basedir self.dataset = masks print(f"Created dataset with {len(self.dataset)} items. " f"Base directory for data: {basedir}") - + def __getitem__(self, index): """ Get an item from the dataset at the specified index. - + :return: (image, segmentation_mask) """ mask_path = self.dataset[index] image_path = str(mask_path.with_suffix(".jpg")).replace("segmentation_frames", "imaging_frames") - + # Load images with MONAI's LoadImage to match data loading in training notebook mask = LoadImage(image_only=True, dtype=np.uint8)(str(mask_path)).numpy() img = LoadImage(image_only=True, dtype=np.float32)(str(image_path)).numpy() - + if img.shape[:2] != (512, 512): img = cv2.resize(img.astype(np.uint8), (512, 512)).astype(np.float32) mask = cv2.resize(mask, (512, 512)) - + input_image = np.expand_dims(img, axis=0) return input_image, mask - + def __len__(self): return len(self.dataset) @@ -357,10 +357,10 @@ kidney pixels to verify that the annotations look correct: image_data, mask = next(item for item in dataset if np.count_nonzero(item[1]) > 5000) # Remove extra image dimension and rotate and flip the image for visualization image = rotate_and_flip(image_data.squeeze()) - + # The data loader returns annotations as (index, mask) and mask in shape (H,W) mask = rotate_and_flip(mask) - + fig, ax = plt.subplots(1, 2, figsize=(12, 6)) ax[0].imshow(image, cmap="gray") ax[1].imshow(mask, cmap="gray"); @@ -438,8 +438,9 @@ this notebook. .. code:: ipython3 fp32_ir_path = MODEL_DIR / Path("unet_kits19_fp32.xml") - - fp32_ir_model = ov.convert_model(model, example_input=torch.ones(1, 1, 512, 512, dtype=torch.float32)) + dummy_input = torch.randn(1, 1, 512, 512) + + fp32_ir_model = ov.convert_model(model, example_input=dummy_input, input=dummy_input.shape) ov.save_model(fp32_ir_model, str(fp32_ir_path)) @@ -451,7 +452,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -470,7 +471,7 @@ steps: 1. Create a Dataset for quantization. 2. Run `nncf.quantize` for getting an optimized model. - 3. Export the quantized model to ONNX and then convert to OpenVINO IR model. + 3. Export the quantized model to OpenVINO IR model. 4. Serialize the INT8 model using `ov.save_model` function for benchmarking. .. code:: ipython3 @@ -483,8 +484,8 @@ steps: """ images, _ = data_item return images - - + + data_loader = torch.utils.data.DataLoader(dataset) calibration_dataset = nncf.Dataset(data_loader, transform_fn) quantized_model = nncf.quantize( @@ -526,26 +527,25 @@ Convert quantized model to OpenVINO IR model and save it. .. code:: ipython3 dummy_input = torch.randn(1, 1, 512, 512) - int8_onnx_path = MODEL_DIR / "unet_kits19_int8.onnx" - int8_ir_path = Path(int8_onnx_path).with_suffix(".xml") + int8_ir_path = MODEL_DIR / "unet_kits19_int8.xml" int8_ir_model = ov.convert_model(quantized_model, example_input=dummy_input, input=dummy_input.shape) ov.save_model(int8_ir_model, str(int8_ir_path)) .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - - Mismatched elements: 251114 / 262144 (95.8%) - Greatest absolute difference: 3.779088854789734 at index (0, 0, 110, 259) (up to 1e-05 allowed) - Greatest relative difference: 18063.837693864687 at index (0, 0, 89, 42) (up to 1e-05 allowed) + + Mismatched elements: 246600 / 262144 (94.1%) + Greatest absolute difference: 3.6983602046966553 at index (0, 0, 464, 188) (up to 1e-05 allowed) + Greatest relative difference: 29992.842219662372 at index (0, 0, 248, 255) (up to 1e-05 allowed) _check_trace( @@ -570,7 +570,7 @@ Compare File Size fp32_ir_model_size = fp32_ir_path.with_suffix(".bin").stat().st_size / 1024 quantized_model_size = int8_ir_path.with_suffix(".bin").stat().st_size / 1024 - + print(f"FP32 IR model size: {fp32_ir_model_size:.2f} KB") print(f"INT8 model size: {quantized_model_size:.2f} KB") @@ -591,7 +591,7 @@ Select Inference Device core = ov.Core() # By default, benchmark on MULTI:CPU,GPU if a GPU is available, otherwise on CPU. device_list = ["MULTI:CPU,GPU" if "GPU" in core.available_devices else "AUTO"] - + device = device_widget(device_list[0], added=device_list) device @@ -613,7 +613,7 @@ Compare Metrics for the original model and the quantized model to be sure that t int8_compiled_model = core.compile_model(int8_ir_model, device.value) int8_f1 = compute_f1(int8_compiled_model, dataset) - + print(f"FP32 F1: {fp32_f1:.3f}") print(f"INT8 F1: {int8_f1:.3f}") @@ -660,32 +660,32 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] + [ INFO ] Build ................................. 2024.4.0-16579-c3152d32c9c-releases/2024/4 + [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] - [ INFO ] + [ INFO ] Build ................................. 2024.4.0-16579-c3152d32c9c-releases/2024/4 + [ INFO ] + [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.76 ms + [ INFO ] Read model took 8.94 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: - [ INFO ] x (node: x) : f32 / [...] / [?,?,?,?] + [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] [ INFO ] Model outputs: - [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [?,1,16..,16..] + [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [Step 6/11] Configuring input of the model [ INFO ] Model inputs: - [ INFO ] x (node: x) : f32 / [...] / [?,?,?,?] + [ INFO ] x (node: x) : f32 / [N,C,H,W] / [1,1,512,512] [ INFO ] Model outputs: - [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [?,1,16..,16..] + [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 161.67 ms + [ INFO ] Compile model took 250.27 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -718,13 +718,21 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] LOADED_FROM_CACHE: False [ INFO ] PERF_COUNT: False [Step 9/11] Creating infer requests and preparing input tensors - [ ERROR ] Input x is dynamic. Provide data shapes! - Traceback (most recent call last): - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/main.py", line 486, in main - data_queue = get_input_data(paths_to_input, app_inputs_info) - File "/opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino/tools/benchmark/utils/inputs_filling.py", line 123, in get_input_data - raise Exception(f"Input {info.name} is dynamic. Provide data shapes!") - Exception: Input x is dynamic. Provide data shapes! + [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! + [ INFO ] Fill input 'x' with random values + [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 50.04 ms + [Step 11/11] Dumping statistics report + [ INFO ] Execution Devices:['CPU'] + [ INFO ] Count: 429 iterations + [ INFO ] Duration: 15001.16 ms + [ INFO ] Latency: + [ INFO ] Median: 34.68 ms + [ INFO ] Average: 34.74 ms + [ INFO ] Min: 34.33 ms + [ INFO ] Max: 37.05 ms + [ INFO ] Throughput: 28.60 FPS .. code:: ipython3 @@ -739,18 +747,18 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Parsing input parameters [Step 2/11] Loading OpenVINO Runtime [ INFO ] OpenVINO: - [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] + [ INFO ] Build ................................. 2024.4.0-16579-c3152d32c9c-releases/2024/4 + [ INFO ] [ INFO ] Device info: [ INFO ] AUTO - [ INFO ] Build ................................. 2024.3.0-16041-1e3b88e4e3f-releases/2024/3 - [ INFO ] - [ INFO ] + [ INFO ] Build ................................. 2024.4.0-16579-c3152d32c9c-releases/2024/4 + [ INFO ] + [ INFO ] [Step 3/11] Setting device configuration [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.80 ms + [ INFO ] Read model took 10.95 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -764,7 +772,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 228.40 ms + [ INFO ] Compile model took 246.69 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -798,20 +806,20 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] PERF_COUNT: False [Step 9/11] Creating infer requests and preparing input tensors [ WARNING ] No input files were given for input 'x'!. This input will be filled with random values! - [ INFO ] Fill input 'x' with random values + [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 30.97 ms + [ INFO ] First inference took 27.16 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 960 iterations - [ INFO ] Duration: 15012.20 ms + [ INFO ] Count: 950 iterations + [ INFO ] Duration: 15005.10 ms [ INFO ] Latency: - [ INFO ] Median: 15.37 ms - [ INFO ] Average: 15.43 ms - [ INFO ] Min: 15.13 ms - [ INFO ] Max: 17.61 ms - [ INFO ] Throughput: 63.95 FPS + [ INFO ] Median: 15.55 ms + [ INFO ] Average: 15.59 ms + [ INFO ] Min: 15.30 ms + [ INFO ] Max: 17.50 ms + [ INFO ] Throughput: 63.31 FPS Visually Compare Inference Results @@ -845,11 +853,11 @@ seed is displayed to enable reproducing specific runs of this cell. # to binary segmentation masks def sigmoid(x): return np.exp(-np.logaddexp(0, -x)) - - + + num_images = 4 colormap = "gray" - + # Load FP32 and INT8 models core = ov.Core() fp_model = core.read_model(fp32_ir_path) @@ -858,18 +866,27 @@ seed is displayed to enable reproducing specific runs of this cell. compiled_model_int8 = core.compile_model(int8_model, device_name=device.value) output_layer_fp = compiled_model_fp.output(0) output_layer_int8 = compiled_model_int8.output(0) - + # Create subset of dataset background_slices = (item for item in dataset if np.count_nonzero(item[1]) == 0) kidney_slices = (item for item in dataset if np.count_nonzero(item[1]) > 50) - data_subset = random.sample(list(background_slices), 2) + random.sample(list(kidney_slices), 2) - + + background_slices_l = list(background_slices) + kidney_slices_l = list(kidney_slices) + if len(background_slices_l) != 0: + background_id = np.random.choice(len(background_slices_l), 2) + kidney_id = np.random.choice(len(kidney_slices_l), 2) + data_subset = [background_slices_l[idx] for idx in background_id] + [kidney_slices_l[idx] for idx in kidney_id] + else: + kidkey_id = np.random.choice(len(kidneyslices_l), 2) + data_subset = [kidney_slices_l[idx] for idx in kidney_id] + # Set seed to current time. To reproduce specific results, copy the printed seed # and manually set `seed` to that value. seed = int(time.time()) - random.seed(seed) + np.random.seed(seed) print(f"Visualizing results with seed {seed}") - + fig, ax = plt.subplots(nrows=num_images, ncols=4, figsize=(24, num_images * 4)) for i, (image, mask) in enumerate(data_subset): display_image = rotate_and_flip(image.squeeze()) @@ -878,13 +895,13 @@ seed is displayed to enable reproducing specific runs of this cell. input_image = np.expand_dims(image, 0) res_fp = compiled_model_fp([input_image]) res_int8 = compiled_model_int8([input_image]) - + # Process inference outputs and convert to binary segementation masks result_mask_fp = sigmoid(res_fp[output_layer_fp]).squeeze().round().astype(np.uint8) result_mask_int8 = sigmoid(res_int8[output_layer_int8]).squeeze().round().astype(np.uint8) result_mask_fp = rotate_and_flip(result_mask_fp) result_mask_int8 = rotate_and_flip(result_mask_int8) - + # Display images, annotations, FP32 result and INT8 result ax[i, 0].imshow(display_image, cmap=colormap) ax[i, 1].imshow(target_mask, cmap=colormap) @@ -896,7 +913,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1724795489 + Visualizing results with seed 1727128557 @@ -912,7 +929,7 @@ To show live inference on the model in the notebook, we will use the asynchronous processing feature of OpenVINO. We use the ``show_live_inference`` function from `Notebook -Utils `__ to show live inference. This +Utils `__ to show live inference. This function uses `Open Model Zoo `__\ ’s Async Pipeline and Model API to perform asynchronous inference. After @@ -938,7 +955,7 @@ overlay of the segmentation mask on the original image/frame. .. code:: ipython3 CASE = 117 - + segmentation_model = SegmentationModel(ie=core, model_path=int8_ir_path, sigmoid=True, rotate_and_flip=True) case_path = BASEDIR / f"case_{CASE:05d}" image_paths = sorted(case_path.glob("imaging_frames/*jpg")) @@ -980,7 +997,7 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.37 seconds, fps:29.11 + Total time for 68 frames: 2.31 seconds, fps:29.93 References diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index c8f9f978888f3e..479fbbc13510c0 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6827439250e99556dfa27c49018bd8ab66da4aa4ba7beb66a083b26e4e42386 -size 384271 +oid sha256:de1590c308306d9f19115503c44cbed2ae2b9b10e2ca29e02620b00f2d16fec3 +size 386773 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index 57ce98bce45412..e556a6c2a3ef4b 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -25,12 +25,10 @@ In this tutorial we consider how to convert and run DDColor using OpenVINO. Additionally, we will demonstrate how to optimize this model using `NNCF `__. -🪄 Let’s start to explore magic of image colorization! +🪄 Let’s start to explore magic of image colorization! **Table of contents:** - - - `Prerequisites <#prerequisites>`__ - `Load PyTorch model <#load-pytorch-model>`__ - `Run PyTorch model inference <#run-pytorch-model-inference>`__ @@ -70,7 +68,7 @@ Prerequisites .. code:: ipython3 import platform - + %pip install -q "nncf>=2.11.0" "torch>=2.1" "torchvision" "timm" "opencv_python" "pillow" "PyYAML" "scipy" "scikit-image" "datasets" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -Uq "openvino>=2024.3.0" if platform.python_version_tuple()[1] in ["8", "9"]: @@ -90,14 +88,20 @@ Prerequisites import sys from pathlib import Path - + import requests + repo_dir = Path("DDColor") - + if not repo_dir.exists(): !git clone https://github.com/piddnad/DDColor.git - + sys.path.append(str(repo_dir)) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + .. parsed-literal:: @@ -106,10 +110,18 @@ Prerequisites remote: Counting objects: 100% (76/76), done. remote: Compressing objects: 100% (42/42), done. remote: Total 233 (delta 54), reused 34 (delta 34), pack-reused 157 (from 1) - Receiving objects: 100% (233/233), 13.34 MiB | 21.08 MiB/s, done. + Receiving objects: 100% (233/233), 13.34 MiB | 21.85 MiB/s, done. Resolving deltas: 100% (80/80), done. + + +.. parsed-literal:: + + 24692 + + + .. code:: ipython3 try: @@ -131,14 +143,14 @@ models from DDColor family. .. code:: ipython3 import torch - + model_name = "ddcolor_paper_tiny" - + ddcolor_model = DDColorHF.from_pretrained(f"piddnad/{model_name}") - - + + colorizer = ImageColorizationPipelineHF(model=ddcolor_model, input_size=512) - + ddcolor_model.to("cpu") colorizer.device = torch.device("cpu") @@ -151,12 +163,12 @@ Run PyTorch model inference import cv2 import PIL - + IMG_PATH = "DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg" - - + + img = cv2.imread(IMG_PATH) - + PIL.Image.fromarray(img[:, :, ::-1]) @@ -195,9 +207,9 @@ loading on device using ``core.complie_model``. import openvino as ov import torch - + OV_COLORIZER_PATH = Path("ddcolor.xml") - + if not OV_COLORIZER_PATH.exists(): ov_model = ov.convert_model(ddcolor_model, example_input=torch.ones((1, 3, 512, 512)), input=[1, 3, 512, 512]) ov.save_model(ov_model, OV_COLORIZER_PATH) @@ -211,17 +223,12 @@ Select one of supported devices for inference using dropdown list. .. code:: ipython3 - import ipywidgets as widgets - + from notebook_utils import device_widget + core = ov.Core() - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) - + + device = device_widget() + device @@ -243,36 +250,36 @@ Select one of supported devices for inference using dropdown list. import numpy as np import torch import torch.nn.functional as F - - + + def process(img, compiled_model): # Preprocess input image height, width = img.shape[:2] - + # Normalize to [0, 1] range img = (img / 255.0).astype(np.float32) orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] # (h, w, 1) - + # Resize rgb image -> lab -> get grey -> rgb img = cv2.resize(img, (512, 512)) img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + # Transpose HWC -> CHW and add batch dimension tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0) - + # Run model inference output_ab = compiled_model(tensor_gray_rgb)[0] - + # Postprocess result # resize ab -> concat original l -> rgb output_ab_resize = F.interpolate(torch.from_numpy(output_ab), size=(height, width))[0].float().numpy().transpose(1, 2, 0) output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1) output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR) - + output_img = (output_bgr * 255.0).round().astype(np.uint8) - + return output_img .. code:: ipython3 @@ -310,12 +317,9 @@ improve model inference speed. .. code:: ipython3 - to_quantize = widgets.Checkbox( - value=True, - description="Quantization", - disabled=False, - ) - + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() to_quantize @@ -330,15 +334,15 @@ improve model inference speed. .. code:: ipython3 import requests - + OV_INT8_COLORIZER_PATH = Path("ddcolor_int8.xml") compiled_int8_model = None - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Collect quantization dataset @@ -353,12 +357,12 @@ dataset from Hugging Face as calibration data. .. code:: ipython3 %%skip not $to_quantize.value - + from datasets import load_dataset - + subset_size = 300 calibration_data = [] - + if not OV_INT8_COLORIZER_PATH.exists(): dataset = load_dataset("ummagumm-a/colorization_dataset", split="train", streaming=True).shuffle(seed=42).take(subset_size) for idx, batch in enumerate(dataset): @@ -370,7 +374,7 @@ dataset from Hugging Face as calibration data. img_l = cv2.cvtColor(np.stack([img, img, img], axis=2), cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + image = np.expand_dims(img_gray_rgb.transpose((2, 0, 1)).astype(np.float32), axis=0) calibration_data.append(image) @@ -382,9 +386,9 @@ Perform model quantization .. code:: ipython3 %%skip not $to_quantize.value - + import nncf - + if not OV_INT8_COLORIZER_PATH.exists(): ov_model = core.read_model(OV_COLORIZER_PATH) quantized_model = nncf.quantize( @@ -402,10 +406,10 @@ Perform model quantization .. parsed-literal:: - 2024-08-27 23:53:59.503040: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-08-27 23:53:59.542136: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-09-23 23:58:13.958747: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-09-23 23:58:13.997019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-08-27 23:53:59.945639: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-09-23 23:58:14.401686: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -442,7 +446,7 @@ Run INT8 model inference .. code:: ipython3 from IPython.display import display - + if OV_INT8_COLORIZER_PATH.exists(): compiled_int8_model = core.compile_model(OV_INT8_COLORIZER_PATH, device.value) img = cv2.imread("DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg") @@ -462,9 +466,9 @@ Compare FP16 and INT8 model size .. code:: ipython3 fp16_ir_model_size = OV_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 - + print(f"FP16 model size: {fp16_ir_model_size:.2f} MB") - + if OV_INT8_COLORIZER_PATH.exists(): quantized_model_size = OV_INT8_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 print(f"INT8 model size: {quantized_model_size:.2f} MB") @@ -502,18 +506,18 @@ Tool `__ to speed up the model. + **Table of contents:** @@ -96,10 +97,10 @@ Prerequisites remote: Enumerating objects: 154, done. remote: Counting objects: 100% (150/150), done. remote: Compressing objects: 100% (147/147), done. - remote: Total 154 (delta 43), reused 0 (delta 0), pack-reused 4 (from 1) - Receiving objects: 100% (154/154), 7.76 MiB | 13.08 MiB/s, done. - Resolving deltas: 100% (43/43), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2 + remote: Total 154 (delta 46), reused 0 (delta 0), pack-reused 4 (from 1) + Receiving objects: 100% (154/154), 7.75 MiB | 11.12 MiB/s, done. + Resolving deltas: 100% (46/46), done. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2 .. code:: ipython3 @@ -239,7 +240,7 @@ is preprocessed image height, ``W`` is preprocessed image width. xFormers not available xFormers not available - /tmp/ipykernel_74177/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_78323/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_path, map_location="cpu")) @@ -271,7 +272,7 @@ is preprocessed image height, ``W`` is preprocessed image width. .. parsed-literal:: - + @@ -305,13 +306,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -403,7 +404,7 @@ range. .. parsed-literal:: - + @@ -625,7 +626,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.26 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.51 + Processed 60 frames in 13.27 seconds. Total FPS (including video processing): 4.52.Inference FPS: 10.67 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -652,7 +653,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -785,10 +786,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-08-28 00:01:55.085324: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-08-28 00:01:55.118894: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-09-24 00:08:42.576231: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-09-24 00:08:42.609829: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-08-28 00:01:55.711661: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-09-24 00:08:43.185945: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -817,12 +818,6 @@ quantization code below may take some time. -.. parsed-literal:: - - INFO:nncf:36 ignored nodes were found by names in the NNCFGraph - INFO:nncf:48 ignored nodes were found by names in the NNCFGraph - - .. parsed-literal:: @@ -920,10 +915,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.95 seconds. Total FPS (including video processing): 4.63.Inference FPS: 12.66 + Processed 60 frames in 12.69 seconds. Total FPS (including video processing): 4.73.Inference FPS: 13.15 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -1003,9 +998,9 @@ Tool `__ to speed up the model. + **Table of contents:** @@ -89,9 +90,9 @@ Prerequisites remote: Counting objects: 100% (161/161), done. remote: Compressing objects: 100% (120/120), done. remote: Total 441 (delta 115), reused 44 (delta 41), pack-reused 280 (from 1) - Receiving objects: 100% (441/441), 237.90 MiB | 29.17 MiB/s, done. + Receiving objects: 100% (441/441), 237.90 MiB | 23.87 MiB/s, done. Resolving deltas: 100% (158/158), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. WARNING: typer 0.12.5 does not provide the extra 'all' @@ -144,8 +145,6 @@ DepthAnything family. xFormers not available xFormers not available - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/hub_mixin.py:824: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - state_dict = torch.load(model_file, map_location=torch.device(map_location)) Prepare input data @@ -285,13 +284,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -574,7 +573,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.36 seconds. Total FPS (including video processing): 4.49.Inference FPS: 10.46 + Processed 60 frames in 13.25 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.66 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -601,7 +600,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -734,10 +733,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-08-28 00:10:42.268363: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-08-28 00:10:42.301084: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-09-24 00:17:42.805749: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-09-24 00:17:42.838788: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-08-28 00:10:42.870851: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-09-24 00:17:43.434479: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -766,12 +765,6 @@ quantization code below may take some time. -.. parsed-literal:: - - INFO:nncf:36 ignored nodes were found by names in the NNCFGraph - INFO:nncf:48 ignored nodes were found by names in the NNCFGraph - - .. parsed-literal:: @@ -869,10 +862,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.77 seconds. Total FPS (including video processing): 4.70.Inference FPS: 12.70 + Processed 60 frames in 12.67 seconds. Total FPS (including video processing): 4.74.Inference FPS: 13.05 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -952,9 +945,9 @@ Tool `__ dataset as examples for object detection and instance segmentation respectively. + **Table of contents:** @@ -65,6 +66,13 @@ Install required packages for running model %pip install -q "torch" "torchvision" "opencv-python" "wheel" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/facebookresearch/detectron2.git" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2023.1.0" + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) .. parsed-literal:: @@ -74,6 +82,14 @@ Install required packages for running model Note: you may need to restart the kernel to use updated packages. + + +.. parsed-literal:: + + 24692 + + + Define helpers for PyTorch model initialization and conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -278,16 +294,11 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import device_widget core = ov.Core() - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) + device = device_widget() device diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index 57c00f491714f3..0806b69e31fd9c 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a4a467ab42400c476fcd9e154946dd0cf3da1de4d63e1bc265960e3cde7faf5 -size 57915 +oid sha256:ac760043a7d6b079844463b15892d73a9f48d534fba9bc42c03ecf9c5947f905 +size 58540 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 3e22de58350db5..1f54f06e4d2edb 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10ac81e5d0598995582dc22a386818963beb7eb5ef3c28a791c895abba98cd3e -size 509581 +oid sha256:797cb2f14de39aba4f4101c661d55537cfe69c61ddc0a4c9dc7c347532486590 +size 509037 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index 7f34d4159b6f8f..a2aeb0d422ba7c 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cd70cfc31f282e172156dafc6a76543364ea1fd6ece3945bcc4157564ef47cb -size 56946 +oid sha256:d5381214d22be861b0bb5a406157cd84fd9924aee68003de9e411ecee78bf372 +size 56822 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index 49c647262aed65..a8df1c182c8057 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a830f5102a151d058797e0c8c8c5b5726380fd7f1e4ed08a2f88ff46bb4b33a2 -size 458206 +oid sha256:6c29ed4785c3fcb93ac4b555bde92e0359ba315f37d96d4ab23bda25e838228f +size 459672 diff --git a/docs/notebooks/distil-whisper-asr-with-output.rst b/docs/notebooks/distil-whisper-asr-with-output.rst index ef976abf327795..ef263d023236d4 100644 --- a/docs/notebooks/distil-whisper-asr-with-output.rst +++ b/docs/notebooks/distil-whisper-asr-with-output.rst @@ -36,6 +36,7 @@ convert the model to OpenVINO™ IR format. To further improve OpenVINO Distil-Whisper model performance ``INT8`` post-training quantization from `NNCF `__ is applied. + **Table of contents:** @@ -84,10 +85,17 @@ Prerequisites .. code:: ipython3 - %pip install -q "transformers>=4.35" "torch>=2.1,<2.4.0" "torchvision<0.19.0" onnx "peft==0.6.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.35" "torch>=2.1,<2.4.0" "torchvision<0.19.0" "onnx<1.16.2" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" %pip install -q "openvino>=2023.2.0" datasets "gradio>=4.0" "librosa" "soundfile" %pip install -q "nncf>=2.6.0" "jiwer" + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) Load PyTorch model ------------------ @@ -311,17 +319,9 @@ Select Inference device .. code:: ipython3 - import openvino as ov - import ipywidgets as widgets + from notebook_utils import device_widget - core = ov.Core() - - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) + device = device_widget() device @@ -638,11 +638,9 @@ quantization. .. code:: ipython3 - to_quantize = widgets.Checkbox( - value=True, - description="Quantization", - disabled=False, - ) + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() to_quantize @@ -726,6 +724,7 @@ negligible. import gc import shutil import nncf + import openvino as ov CALIBRATION_DATASET_SIZE = 50 quantized_model_path = Path(f"{model_path}_quantized") diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst index 6037511ebfd44f..d987e0c9eae7be 100644 --- a/docs/notebooks/distilbert-sequence-classification-with-output.rst +++ b/docs/notebooks/distilbert-sequence-classification-with-output.rst @@ -7,6 +7,7 @@ identify, extract, quantify, and study affective states and subjective information. This notebook demonstrates how to convert and run a sequence classification model using OpenVINO. + **Table of contents:** @@ -46,31 +47,31 @@ Imports .. parsed-literal:: Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.3.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.44.2) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.0+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) - Requirement already satisfied: numpy<2.0.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.15.4) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.24.6) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.7.24) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.4) - Requirement already satisfied: tokenizers<0.20,>=0.19 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.19.1) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.2) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.8) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.2) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.7.4) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.44.2) + Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.5) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) + Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.25.1) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) + Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) + Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) + Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) + Requirement already satisfied: tokenizers<0.20,>=0.19 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.19.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.6.1) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.3.2) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) Note: you may need to restart the kernel to use updated packages. @@ -132,7 +133,7 @@ understand the context of a sentence. Here, we will use .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( @@ -172,9 +173,9 @@ optimal execution on end-point target devices. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-780/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py:215: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. mask, torch.tensor(torch.finfo(scores.dtype).min) @@ -280,7 +281,7 @@ For a single input sentence .. parsed-literal:: Label: POSITIVE - Total Time: 0.02 seconds + Total Time: 0.03 seconds Read from a text file diff --git a/docs/notebooks/dolly-2-instruction-following-with-output.rst b/docs/notebooks/dolly-2-instruction-following-with-output.rst index 6125e5db6fd20e..9f6857b608d962 100644 --- a/docs/notebooks/dolly-2-instruction-following-with-output.rst +++ b/docs/notebooks/dolly-2-instruction-following-with-output.rst @@ -81,6 +81,7 @@ dataset can be found in `Databricks blog post `__ and `repo `__ + **Table of contents:** @@ -140,9 +141,16 @@ documentation `__. %pip uninstall -q -y optimum optimum-intel %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - %pip install -q "diffusers>=0.16.1" "transformers>=4.33.0" "torch>=2.1" "nncf>=2.10.0" onnx "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "diffusers>=0.16.1" "transformers>=4.33.0" "torch>=2.1" "nncf>=2.10.0" "onnx<1.16.2" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + Convert model using Optimum-CLI tool ------------------------------------ @@ -203,7 +211,7 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments: +For INT4 quantization you can also specify the following arguments : - The ``--group-size`` parameter will define the group size to use for quantization, -1 it will results in per-column quantization. @@ -433,18 +441,12 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 - import ipywidgets as widgets + from notebook_utils import device_widget import openvino as ov core = ov.Core() - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="CPU", - description="Device:", - disabled=False, - ) - + device = device_widget("CPU", exclude=["NPU"]) device @@ -495,9 +497,15 @@ guide `__ .. code:: ipython3 from pathlib import Path + from transformers import AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM + import openvino.properties as props + import openvino.properties.hint as hints + import openvino.properties.streams as streams + + if model_to_run.value == "INT4": model_dir = int4_model_dir elif model_to_run.value == "INT8": @@ -510,7 +518,7 @@ guide `__ current_device = device.value - ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""} + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config) diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 6bd52c6f59b5e6..08850d5989f830 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -102,6 +102,7 @@ additional part demonstrates how to run optimization with + **Table of contents:** @@ -129,7 +130,9 @@ additional part demonstrates how to run optimization with - `Compare inference time of the FP32 and INT8 pipelines <#compare-inference-time-of-the-fp32-and-int8-pipelines>`__ -- `Interactive inference <#interactive-inference>`__ +- `Interactive inference <#interactive-inference>`__ + + This is a self-contained example that relies solely on its own code. @@ -148,43 +151,28 @@ Prerequisites %pip install -q "openvino>=2024.2.0" "nncf>=2.11.0" "datasets>=2.20.0" %pip install -q "gradio>=4.19" omegaconf einops pytorch_lightning kornia "open_clip_torch==2.22.0" transformers av opencv-python "torch==2.2.2" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import sys from pathlib import Path - - + import requests + + dynamicrafter_path = Path("dynamicrafter") - + if not dynamicrafter_path.exists(): dynamicrafter_path.mkdir(parents=True, exist_ok=True) !git clone https://github.com/Doubiiu/DynamiCrafter.git dynamicrafter %cd dynamicrafter !git checkout 26e665cd6c174234238d2ded661e2e56f875d360 -q # to avoid breaking changes %cd .. - + sys.path.append(str(dynamicrafter_path)) - - -.. parsed-literal:: - - Cloning into 'dynamicrafter'... - remote: Enumerating objects: 329, done. - remote: Counting objects: 100% (166/166), done. - remote: Compressing objects: 100% (100/100), done. - remote: Total 329 (delta 103), reused 87 (delta 66), pack-reused 163 (from 1) - Receiving objects: 100% (329/329), 72.40 MiB | 21.70 MiB/s, done. - Resolving deltas: 100% (118/118), done. - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images - + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) Load and run the original pipeline ---------------------------------- @@ -199,14 +187,14 @@ We will use model for 256x256 resolution as example. Also, models for import os from collections import OrderedDict - + import torch from huggingface_hub import hf_hub_download from omegaconf import OmegaConf - + from dynamicrafter.utils.utils import instantiate_from_config - - + + def load_model_checkpoint(model, ckpt): def load_checkpoint(model, ckpt, full_strict): state_dict = torch.load(ckpt, map_location="cpu") @@ -219,7 +207,7 @@ We will use model for 256x256 resolution as example. Also, models for new_pl_sd = OrderedDict() for k, v in state_dict.items(): new_pl_sd[k] = v - + for k in list(new_pl_sd.keys()): if "framestride_embed" in k: new_key = k.replace("framestride_embed", "fps_embedding") @@ -232,14 +220,14 @@ We will use model for 256x256 resolution as example. Also, models for for key in state_dict["module"].keys(): new_pl_sd[key[16:]] = state_dict["module"][key] model.load_state_dict(new_pl_sd, strict=full_strict) - + return model - + load_checkpoint(model, ckpt, full_strict=True) print(">>> model checkpoint loaded.") return model - - + + def download_model(): REPO_ID = "Doubiiu/DynamiCrafter" if not os.path.exists("./checkpoints/dynamicrafter_256_v1/"): @@ -247,7 +235,7 @@ We will use model for 256x256 resolution as example. Also, models for local_file = os.path.join("./checkpoints/dynamicrafter_256_v1/model.ckpt") if not os.path.exists(local_file): hf_hub_download(repo_id=REPO_ID, filename="model.ckpt", local_dir="./checkpoints/dynamicrafter_256_v1/", local_dir_use_symlinks=False) - + ckpt_path = "checkpoints/dynamicrafter_256_v1/model.ckpt" config_file = "dynamicrafter/configs/inference_256_v1.0.yaml" config = OmegaConf.load(config_file) @@ -256,31 +244,18 @@ We will use model for 256x256 resolution as example. Also, models for model = instantiate_from_config(model_config) model = load_model_checkpoint(model, ckpt_path) model.eval() - + return model - - + + model = download_model() -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1212: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. - For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. - warnings.warn( - - - -.. parsed-literal:: - - model.ckpt: 0%| | 0.00/10.4G [00:00>> model checkpoint loaded. - + Convert the model to OpenVINO IR -------------------------------- @@ -295,10 +270,10 @@ file. .. code:: ipython3 import gc - + import openvino as ov - - + + def convert(model: torch.nn.Module, xml_path: str, example_input, input_shape=None): xml_path = Path(xml_path) if not xml_path.exists(): @@ -309,7 +284,7 @@ file. else: converted_model = ov.convert_model(model, example_input=example_input, input=input_shape) ov.save_model(converted_model, xml_path, compress_to_fp16=False) - + # cleanup memory torch._C._jit_clear_class_registry() torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() @@ -333,26 +308,27 @@ Convert CLIP text encoder .. code:: ipython3 from dynamicrafter.lvdm.modules.encoders.condition import FrozenOpenCLIPEmbedder - - - COND_STAGE_MODEL_OV_PATH = Path("models/cond_stage_model.xml") - - + + MODEL_DIR = Path("models") + + COND_STAGE_MODEL_OV_PATH = MODEL_DIR / "cond_stage_model.xml" + + class FrozenOpenCLIPEmbedderWrapper(FrozenOpenCLIPEmbedder): def forward(self, tokens): z = self.encode_with_transformer(tokens.to(self.device)) return z - - + + cond_stage_model = FrozenOpenCLIPEmbedderWrapper(device="cpu") - + if not COND_STAGE_MODEL_OV_PATH.exists(): convert( cond_stage_model, COND_STAGE_MODEL_OV_PATH, example_input=torch.ones([1, 77], dtype=torch.long), ) - + del cond_stage_model gc.collect(); @@ -365,62 +341,19 @@ resolutions. .. code:: ipython3 - EMBEDDER_OV_PATH = Path("models/embedder_ir.xml") - - + EMBEDDER_OV_PATH = MODEL_DIR / "embedder_ir.xml" + + dummy_input = torch.rand([1, 3, 767, 767], dtype=torch.float32) - + model.embedder.model.visual.input_patchnorm = None # fix error: visual model has not attribute 'input_patchnorm' if not EMBEDDER_OV_PATH.exists(): convert(model.embedder, EMBEDDER_OV_PATH, example_input=dummy_input, input_shape=[1, 3, -1, -1]) - - + + del model.embedder gc.collect(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - std = torch.as_tensor(std, device=data.device, dtype=data.dtype) - - Convert AE encoder ~~~~~~~~~~~~~~~~~~ @@ -428,28 +361,21 @@ Convert AE encoder .. code:: ipython3 - ENCODER_FIRST_STAGE_OV_PATH = Path("models/encoder_first_stage_ir.xml") - - + ENCODER_FIRST_STAGE_OV_PATH = MODEL_DIR / "encoder_first_stage_ir.xml" + + dummy_input = torch.rand([1, 3, 256, 256], dtype=torch.float32) - + if not ENCODER_FIRST_STAGE_OV_PATH.exists(): convert( model.first_stage_model.encoder, ENCODER_FIRST_STAGE_OV_PATH, example_input=dummy_input, ) - + del model.first_stage_model.encoder gc.collect(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - w_ = w_ * (int(c)**(-0.5)) - - Convert Diffusion U-Net model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -457,19 +383,19 @@ Convert Diffusion U-Net model .. code:: ipython3 - MODEL_OV_PATH = Path("models/model_ir.xml") - - + MODEL_OV_PATH = MODEL_DIR / "model_ir.xml" + + class ModelWrapper(torch.nn.Module): def __init__(self, diffusion_model): super().__init__() self.diffusion_model = diffusion_model - + def forward(self, xc, t, context=None, fs=None, temporal_length=None): outputs = self.diffusion_model(xc, t, context=context, fs=fs, temporal_length=temporal_length) return outputs - - + + if not MODEL_OV_PATH.exists(): convert( ModelWrapper(model.model.diffusion_model), @@ -482,26 +408,11 @@ Convert Diffusion U-Net model "temporal_length": torch.tensor([16]), }, ) - + out_channels = model.model.diffusion_model.out_channels del model.model.diffusion_model gc.collect(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/ov-notebook/OVNotebookOps-761/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert x.shape[1] == self.channels - - Convert AE decoder ~~~~~~~~~~~~~~~~~~ @@ -513,31 +424,31 @@ to float32. .. code:: ipython3 import types - - + + def decode(self, z, **kwargs): z = self.post_quant_conv(z) z = z.float() dec = self.decoder(z) return dec - - + + model.first_stage_model.decode = types.MethodType(decode, model.first_stage_model) .. code:: ipython3 - DECODER_FIRST_STAGE_OV_PATH = Path("models/decoder_first_stage_ir.xml") - - + DECODER_FIRST_STAGE_OV_PATH = MODEL_DIR / "decoder_first_stage_ir.xml" + + dummy_input = torch.rand([16, 4, 32, 32], dtype=torch.float32) - + if not DECODER_FIRST_STAGE_OV_PATH.exists(): convert( model.first_stage_model.decoder, DECODER_FIRST_STAGE_OV_PATH, example_input=dummy_input, ) - + del model.first_stage_model.decoder gc.collect(); @@ -550,16 +461,11 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 - import ipywidgets as widgets - + from notebook_utils import device_widget + core = ov.Core() - device = widgets.Dropdown( - options=core.available_devices + ["AUTO"], - value="AUTO", - description="Device:", - disabled=False, - ) - + device = device_widget() + device @@ -573,11 +479,17 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 - compiled_cond_stage_model = core.compile_model(core.read_model(COND_STAGE_MODEL_OV_PATH), device.value) - compiled_encode_first_stage = core.compile_model(core.read_model(ENCODER_FIRST_STAGE_OV_PATH), device.value) - compiled_embedder = core.compile_model(core.read_model(EMBEDDER_OV_PATH), device.value) - compiled_model = core.compile_model(core.read_model(MODEL_OV_PATH), device.value) - compiled_decoder_first_stage = core.compile_model(core.read_model(DECODER_FIRST_STAGE_OV_PATH), device.value) + cond_stage_model = core.read_model(COND_STAGE_MODEL_OV_PATH) + encoder_first_stage = core.read_model(ENCODER_FIRST_STAGE_OV_PATH) + embedder = core.read_model(EMBEDDER_OV_PATH) + model_ov = core.read_model(MODEL_OV_PATH) + decoder_first_stage = core.read_model(DECODER_FIRST_STAGE_OV_PATH) + + compiled_cond_stage_model = core.compile_model(cond_stage_model, device.value) + compiled_encode_first_stage = core.compile_model(encoder_first_stage, device.value) + compiled_embedder = core.compile_model(embedder, device.value) + compiled_model = core.compile_model(model_ov, device.value) + compiled_decoder_first_stage = core.compile_model(decoder_first_stage, device.value) Building the pipeline --------------------- @@ -590,50 +502,57 @@ return ``torch.Tensor``\ s instead of ``np.array``\ s. .. code:: ipython3 + from typing import Any import open_clip - - + + class CondStageModelWrapper(torch.nn.Module): def __init__(self, cond_stage_model): super().__init__() self.cond_stage_model = cond_stage_model - + def encode(self, tokens): if isinstance(tokens, list): tokens = open_clip.tokenize(tokens[0]) outs = self.cond_stage_model(tokens)[0] - + return torch.from_numpy(outs) - - + + class EncoderFirstStageModelWrapper(torch.nn.Module): def __init__(self, encode_first_stage): super().__init__() self.encode_first_stage = encode_first_stage - + def forward(self, x): outs = self.encode_first_stage(x)[0] - + return torch.from_numpy(outs) - - + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self.forward(*args, **kwargs) + + class EmbedderWrapper(torch.nn.Module): def __init__(self, embedder): super().__init__() self.embedder = embedder - + def forward(self, x): outs = self.embedder(x)[0] - + return torch.from_numpy(outs) - - + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self.forward(*args, **kwargs) + + class CModelWrapper(torch.nn.Module): def __init__(self, diffusion_model, out_channels): super().__init__() self.diffusion_model = diffusion_model self.out_channels = out_channels - + def forward(self, xc, t, context, fs, temporal_length): inputs = { "xc": xc, @@ -642,20 +561,26 @@ return ``torch.Tensor``\ s instead of ``np.array``\ s. "fs": fs, } outs = self.diffusion_model(inputs)[0] - + return torch.from_numpy(outs) - - + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self.forward(*args, **kwargs) + + class DecoderFirstStageModelWrapper(torch.nn.Module): def __init__(self, decoder_first_stage): super().__init__() self.decoder_first_stage = decoder_first_stage - + def forward(self, x): x.float() outs = self.decoder_first_stage(x)[0] - + return torch.from_numpy(outs) + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self.forward(*args, **kwargs) And insert wrappers instances in the pipeline: @@ -676,42 +601,42 @@ Run OpenVINO pipeline inference from einops import repeat, rearrange import torchvision.transforms as transforms - - + + transform = transforms.Compose( [ transforms.Resize(min((256, 256))), transforms.CenterCrop((256, 256)), ] ) - - + + def get_latent_z(model, videos): b, c, t, h, w = videos.shape x = rearrange(videos, "b c t h w -> (b t) c h w") z = model.encode_first_stage(x) z = rearrange(z, "(b t) c h w -> b c t h w", b=b, t=t) return z - - + + def process_input(model, prompt, image, transform=transform, fs=3): text_emb = model.get_learned_conditioning([prompt]) - + # img cond img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) img_tensor = (img_tensor / 255.0 - 0.5) * 2 - + image_tensor_resized = transform(img_tensor) # 3,h,w videos = image_tensor_resized.unsqueeze(0) # bchw - + z = get_latent_z(model, videos.unsqueeze(2)) # bc,1,hw frames = model.temporal_length img_tensor_repeat = repeat(z, "b c t h w -> b c (repeat t) h w", repeat=frames) - + cond_images = model.embedder(img_tensor.unsqueeze(0)) # blc img_emb = model.image_proj_model(cond_images) imtext_cond = torch.cat([text_emb, img_emb], dim=1) - + fs = torch.tensor([fs], dtype=torch.long, device=model.device) cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]} return cond @@ -724,15 +649,15 @@ Run OpenVINO pipeline inference from lvdm.models.samplers.ddim import DDIMSampler from pytorch_lightning import seed_everything import torchvision - - + + def register_buffer(self, name, attr): if isinstance(attr, torch.Tensor): if attr.device != torch.device("cpu"): attr = attr.to(torch.device("cpu")) setattr(self, name, attr) - - + + def batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=50, ddim_eta=1.0, cfg_scale=1.0, temporal_cfg_scale=None, **kwargs): ddim_sampler = DDIMSampler(model) uncond_type = model.uncond_type @@ -754,7 +679,7 @@ Run OpenVINO pipeline inference elif uncond_type == "zero_embed": c_emb = cond["c_crossattn"][0] if isinstance(cond, dict) else cond uc_emb = torch.zeros_like(c_emb) - + # process image embedding token if hasattr(model, "embedder"): uc_img = torch.zeros(noise_shape[0], 3, 224, 224).to(model.device) @@ -762,7 +687,7 @@ Run OpenVINO pipeline inference uc_img = model.embedder(uc_img) uc_img = model.image_proj_model(uc_img) uc_emb = torch.cat([uc_emb, uc_img], dim=1) - + if isinstance(cond, dict): uc = {key: cond[key] for key in cond.keys()} uc.update({"c_crossattn": [uc_emb]}) @@ -770,10 +695,10 @@ Run OpenVINO pipeline inference uc = uc_emb else: uc = None - + x_T = None batch_variants = [] - + for _ in range(n_samples): if ddim_sampler is not None: kwargs.update({"clean_cond": True}) @@ -800,12 +725,12 @@ Run OpenVINO pipeline inference # batch, , c, t, h, w batch_variants = torch.stack(batch_variants, dim=1) return batch_variants - - + + # monkey patching to replace the original method 'register_buffer' that uses CUDA DDIMSampler.register_buffer = types.MethodType(register_buffer, DDIMSampler) - - + + def save_videos(batch_tensors, savedir, filenames, fps=10): # b,samples,c,t,h,w n_samples = batch_tensors.shape[1] @@ -819,14 +744,14 @@ Run OpenVINO pipeline inference grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1) savepath = os.path.join(savedir, f"{filenames[idx]}.mp4") torchvision.io.write_video(savepath, grid, fps=fps, video_codec="h264", options={"crf": "10"}) - - + + def get_image(image, prompt, steps=5, cfg_scale=7.5, eta=1.0, fs=3, seed=123, model=model, result_dir="results"): if not os.path.exists(result_dir): os.mkdir(result_dir) - + seed_everything(seed) - + # torch.cuda.empty_cache() print("start:", prompt, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))) start = time.time() @@ -838,11 +763,11 @@ Run OpenVINO pipeline inference frames = model.temporal_length h, w = 256 // 8, 256 // 8 noise_shape = [batch_size, channels, frames, h, w] - + # text cond with torch.no_grad(), torch.cpu.amp.autocast(): cond = process_input(model, prompt, image, transform, fs=3) - + ## inference batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale) ## b,samples,c,t,h,w @@ -851,10 +776,10 @@ Run OpenVINO pipeline inference prompt_str = prompt_str[:40] if len(prompt_str) == 0: prompt_str = "empty_prompt" - + save_videos(batch_samples, result_dir, filenames=[prompt_str], fps=8) print(f"Saved in {prompt_str}.mp4. Time used: {(time.time() - start):.2f} seconds") - + return os.path.join(result_dir, f"{prompt_str}.mp4") .. code:: ipython3 @@ -871,20 +796,20 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_78601/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_971108/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-08-28 00:24:49 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 190.25 seconds - + start: man fishing in a boat at sunset 2024-08-06 13:54:24 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 164.28 seconds + .. code:: ipython3 from IPython.display import HTML - + HTML( f"""