Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bedrock): support metrics for bedrock #1957

Merged
merged 12 commits into from
Oct 2, 2024

Large diffs are not rendered by default.

41 changes: 10 additions & 31 deletions packages/opentelemetry-instrumentation-bedrock/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,10 @@
import os
import pytest
import boto3
from opentelemetry import trace
from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter

pytest_plugins = []


@pytest.fixture(scope="session")
def exporter():
exporter = InMemorySpanExporter()
processor = SimpleSpanProcessor(exporter)

provider = TracerProvider()
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)

return exporter


@pytest.fixture(scope="session", autouse=True)
def instrument(exporter):
BedrockInstrumentor(enrich_token_usage=True).instrument()

yield

exporter.shutdown()


@pytest.fixture(autouse=True)
def clear_exporter(exporter):
exporter.clear()


@pytest.fixture(autouse=True)
def environment():
if os.getenv("AWS_SECRET_ACCESS_KEY") is None:
Expand All @@ -55,6 +24,16 @@ def brt():
)


@pytest.fixture
def brt2():
return boto3.client(
service_name="bedrock-runtime",
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
region_name="us-west-2",
)


@pytest.fixture(scope="module")
def vcr_config():
return {"filter_headers": ["authorization"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""unit tests."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
interactions:
- request:
body: '{"inputText": "Tell me a joke about opentelemetry", "textGenerationConfig":
{"maxTokenCount": 200, "temperature": 0.5, "topP": 0.5}}'
headers:
Accept:
- !!binary |
YXBwbGljYXRpb24vanNvbg==
Content-Length:
- '132'
Content-Type:
- !!binary |
YXBwbGljYXRpb24vanNvbg==
User-Agent:
- !!binary |
Qm90bzMvMS4zNC4xNjIgbWQvQm90b2NvcmUjMS4zNC4xNjIgdWEvMi4wIG9zL21hY29zIzIzLjYu
MCBtZC9hcmNoI2FybTY0IGxhbmcvcHl0aG9uIzMuMTEuNSBtZC9weWltcGwjQ1B5dGhvbiBjZmcv
cmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zNC4xNjI=
X-Amz-Date:
- !!binary |
MjAyNDA5MTlUMDgxNTMyWg==
amz-sdk-invocation-id:
- !!binary |
YmNjNTcxNjYtYjRmOC00MzQzLTg4YmYtNWE4YzBhODBmZTM5
amz-sdk-request:
- !!binary |
YXR0ZW1wdD0x
method: POST
uri: https://bedrock-runtime.us-west-2.amazonaws.com/model/amazon.titan-text-express-v1/invoke
response:
body:
string: '{"inputTextTokenCount":9,"results":[{"tokenCount":17,"outputText":"\nWhat
do you call a bear with no teeth?\nA gummy bear.","completionReason":"FINISH"}]}'
headers:
Connection:
- keep-alive
Content-Length:
- '154'
Content-Type:
- application/json
Date:
- Thu, 19 Sep 2024 08:15:34 GMT
X-Amzn-Bedrock-Input-Token-Count:
- '9'
X-Amzn-Bedrock-Invocation-Latency:
- '1229'
X-Amzn-Bedrock-Output-Token-Count:
- '17'
x-amzn-RequestId:
- 6278b8bf-c1a7-46d7-822b-9f85ee7805b6
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Unit tests configuration module."""

import pytest
from opentelemetry import metrics
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
from opentelemetry.instrumentation.bedrock import BedrockInstrumentor

from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter


@pytest.fixture(scope="session")
def metrics_test_context():
resource = Resource.create()
reader = InMemoryMetricReader()
provider = MeterProvider(metric_readers=[reader], resource=resource)

metrics.set_meter_provider(provider)

# Without the following lines, span.is_recording() is False
# so that _handle_call and _handle_stream_call will be skipped
exporter = InMemorySpanExporter()
processor = SimpleSpanProcessor(exporter)
trace_provider = TracerProvider()
trace_provider.add_span_processor(processor)
trace.set_tracer_provider(trace_provider)

BedrockInstrumentor(enrich_token_usage=True).instrument()

return provider, reader


@pytest.fixture(scope="session", autouse=True)
def clear_metrics_test_context(metrics_test_context):
provider, reader = metrics_test_context

reader.shutdown()
provider.shutdown()
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json

import pytest
from opentelemetry.semconv_ai import Meters, SpanAttributes


@pytest.mark.vcr
def test_invoke_model_metrics(metrics_test_context, brt2):
if brt2 is None:
print("test_invoke_model_metrics test skipped.")
return

_, reader = metrics_test_context

body = json.dumps(
{
"inputText": "Tell me a joke about opentelemetry",
"textGenerationConfig": {
"maxTokenCount": 200,
"temperature": 0.5,
"topP": 0.5,
},
}
)

brt2.invoke_model(
body=body,
modelId='amazon.titan-text-express-v1',
accept='application/json',
contentType='application/json'
)

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
assert len(resource_metrics) > 0

found_token_metric = False
found_duration_metric = False

for rm in resource_metrics:
for sm in rm.scope_metrics:
for metric in sm.metrics:

if metric.name == Meters.LLM_TOKEN_USAGE:
found_token_metric = True
for data_point in metric.data.data_points:
assert data_point.attributes[SpanAttributes.LLM_TOKEN_TYPE] in [
"output",
"input",
]
assert data_point.sum > 0

if metric.name == Meters.LLM_OPERATION_DURATION:
found_duration_metric = True
assert any(
data_point.count > 0 for data_point in metric.data.data_points
)
assert any(
data_point.sum > 0 for data_point in metric.data.data_points
)

assert (
metric.data.data_points[0].attributes[SpanAttributes.LLM_SYSTEM]
== "bedrock"
)

assert found_token_metric is True
assert found_duration_metric is True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""unit tests."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Unit tests configuration module."""

import pytest
from opentelemetry import trace
from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter


@pytest.fixture(scope="session")
def exporter():
exporter = InMemorySpanExporter()
processor = SimpleSpanProcessor(exporter)

provider = TracerProvider()
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)

return exporter


@pytest.fixture(scope="session", autouse=True)
def instrument(exporter):
BedrockInstrumentor(enrich_token_usage=True).instrument()

yield

exporter.shutdown()


@pytest.fixture(autouse=True)
def clear_exporter(exporter):
exporter.clear()
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json


@pytest.mark.vcr()
@pytest.mark.vcr
def test_ai21_j2_completion_string_content(exporter, brt):
body = json.dumps(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json


@pytest.mark.vcr()
@pytest.mark.vcr
def test_meta_llama2_completion_string_content(exporter, brt):
model_id = "meta.llama2-13b-chat-v1"
prompt = """<s>[INST] <<SYS>>
Expand Down Expand Up @@ -44,7 +44,7 @@ def test_meta_llama2_completion_string_content(exporter, brt):
)


@pytest.mark.vcr()
@pytest.mark.vcr
def test_meta_llama3_completion(exporter, brt):
model_id = "meta.llama3-70b-instruct-v1:0"
prompt = "Tell me a joke about opentelemetry"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json


@pytest.mark.vcr()
@pytest.mark.vcr
def test_titan_completion(exporter, brt):
body = json.dumps(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class Meters:
LLM_WATSONX_COMPLETIONS_RESPONSES = "llm.watsonx.completions.responses"
LLM_WATSONX_COMPLETIONS_TOKENS = "llm.watsonx.completions.tokens"

LLM_BEDROCK_COMPLETIONS_EXCEPTIONS = "llm.bedrock.completions.exceptions"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this required?

Copy link
Contributor Author

@jinsongo jinsongo Sep 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's an open issue. I think it's better to use a common name, for example llm.completions.exceptions for all ai system to count exceptions. If yes, I would like to use another PR to fix all related code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jinsongo if we're doing this here I'd try to use the new genAI conventions - https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics
I think they're already released so you can even use them directly from the original otel semconv package

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(in this case - it means exceptions are counted in the duration metric)

Copy link
Contributor Author

@jinsongo jinsongo Sep 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nirga I cannot find an appropriate metric name for the exceptions counter of Bedrock. As you know, version 0.4.1 of opentelemetry-semantic-conventions-ai does not include LLM_BEDROCK_COMPLETIONS_EXCEPTIONS although it's already included in this current PR commits, which causes the build to fail when I use it in the Bedrock instrumentation code. Actually, that's the same approach for OpenAI, Authropic and Watsonx.

Copy link
Member

@nirga nirga Sep 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jinsongo this should be in the standard opentelemetry-semantic-conventions package. And you should count exceptions on the duration metric as specified in the spec.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jinsongo If this parameter is not required, I think we can ignore this for now, and we can fix this in future as a consolidation for semantic convention.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gyliu513 I have removed LLM_BEDROCK_COMPLETIONS_EXCEPTIONS from semconv_ai, and added a TODO comment about fixing in future as a consolidation for semantic convention.



class SpanAttributes:
# Semantic Conventions for LLM requests, this needs to be removed after
Expand Down
Loading