From f55fb996f63e95361dcdbf96a889beee2ab78357 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 22 Oct 2024 17:19:21 -0400 Subject: [PATCH 1/2] Adjust NIM endpoint inference endpoints, and adding http endpoints --- helm/values.yaml | 20 +++++++++++++++----- src/nv_ingest/api/main.py | 4 +++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/helm/values.yaml b/helm/values.yaml index 9cc63cfb..32aef7d0 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -75,8 +75,10 @@ resources: limits: memory: 32Gi nvidia.com/gpu: 1 + cpu: "36000m" requests: memory: 16Gi + cpu: "12000m" ## @param tmpDirSize [default: 8Gi] Specify the amount of space to reserve for temporary storage @@ -253,15 +255,19 @@ redis: ## @skip envVars.REDIS_MORPHEUS_TASK_QUEUE ## @skip envVars.CACHED_GRPC_ENDPOINT ## @skip envVars.CACHED_HTTP_ENDPOINT +## @skip envVars.CACHED_INFER_ENDPOINT ## @skip envVars.PADDLE_GRPC_ENDPOINT ## @skip envVars.PADDLE_HTTP_ENDPOINT +## @skip envVars.PADDLE_INFER_ENDPOINT ## @skip envVars.YOLOX_GRPC_ENDPOINT ## @skip envVars.YOLOX_HTTP_ENDPOINT +## @skip envVars.YOLOX_INFER_ENDPOINT ## @skip envVars.DEPLOT_GRPC_ENDPOINT ## @skip envVars.DEPLOT_HTTP_ENDPOINT +## @skip envVars.DEPLOT_INFER_ENDPOINT envVars: - MESSAGE_CLIENT_HOST: "nv-ingest-ms-runtime" - MESSAGE_CLIENT_PORT: "7670" + MESSAGE_CLIENT_HOST: "nv-ingest-redis-master" + MESSAGE_CLIENT_PORT: "6379" REDIS_MORPHEUS_TASK_QUEUE: "morpheus_task_queue" NV_INGEST_DEFAULT_TIMEOUT_MS: "1234" @@ -270,13 +276,17 @@ envVars: MINIO_BUCKET: nv-ingest CACHED_GRPC_ENDPOINT: nv-ingest-cached:8001 - CACHED_HTTP_ENDPOINT: "" + CACHED_HTTP_ENDPOINT: http://nv-ingest-cached:8000/v1/infer + CACHED_INFER_ENDPOINT: grpc PADDLE_GRPC_ENDPOINT: nv-ingest-paddle:8001 - PADDLE_HTTP_ENDPOINT: "" + PADDLE_HTTP_ENDPOINT: http://nv-ingest-paddle:8000/v1/infer + PADDLE_INFER_PROTOCOL: grpc YOLOX_GRPC_ENDPOINT: nv-ingest-yolox:8001 - YOLOX_HTTP_ENDPOINT: "" + YOLOX_HTTP_ENDPOINT: http://nv-ingest-yolox:8000/v1/infer + YOLOX_INFER_PROTOCOL: grpc DEPLOT_GRPC_ENDPOINT: "" DEPLOT_HTTP_ENDPOINT: http://nv-ingest-deplot:8000/v1/chat/completions + DEPLOT_INFER_PROTOCOL: http EMBEDDING_NIM_ENDPOINT: "http://nv-ingest-embedding:8000/v1" MILVUS_ENDPOINT: "http://nv-ingest-milvus:19530" diff --git a/src/nv_ingest/api/main.py b/src/nv_ingest/api/main.py index 7800a61e..9beba4f6 100644 --- a/src/nv_ingest/api/main.py +++ b/src/nv_ingest/api/main.py @@ -9,6 +9,7 @@ # its affiliates is strictly prohibited. import logging +import os from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter @@ -24,7 +25,8 @@ trace.set_tracer_provider(TracerProvider()) tracer = trace.get_tracer(__name__) -exporter = OTLPSpanExporter(endpoint="otel-collector:4317", insecure=True) +otel_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "otel-collector:4317") +exporter = OTLPSpanExporter(endpoint=otel_endpoint, insecure=True) span_processor = BatchSpanProcessor(exporter) trace.get_tracer_provider().add_span_processor(span_processor) From e2955fcf589c34beb73093dd53d2d01fed0ce854 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 22 Oct 2024 17:23:58 -0400 Subject: [PATCH 2/2] Increase memory footprint --- helm/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/helm/values.yaml b/helm/values.yaml index 32aef7d0..d32d5575 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -73,12 +73,12 @@ replicaCount: 1 ## @param resources.requests.memory [default: 16Gi] Specify request for memory resources: limits: - memory: 32Gi + memory: 90Gi nvidia.com/gpu: 1 cpu: "36000m" requests: - memory: 16Gi - cpu: "12000m" + memory: 24Gi + cpu: "16000m" ## @param tmpDirSize [default: 8Gi] Specify the amount of space to reserve for temporary storage