Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Allow custom prometheus info metric #83

Merged
merged 7 commits into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@
</argLine>
<!-- required to workaround issue with openjdk 8u181-b13-2 -->
<useSystemClassLoader>false</useSystemClassLoader>
<environmentVariables>
<MM_INFO_METRICS>assistant_deployment_info:relabel;deployment=DEPLOYMENT_NAME,slot=SLOT_NAME,component=COMPONENT_NAME,group=GROUP_NAME</MM_INFO_METRICS>
<DEPLOYMENT_NAME>ga-tf-mm</DEPLOYMENT_NAME>
<SLOT_NAME>ga</SLOT_NAME>
<COMPONENT_NAME>tf-mm</COMPONENT_NAME>
<GROUP_NAME>clu</GROUP_NAME>
</environmentVariables>
</configuration>
</plugin>

Expand Down
33 changes: 26 additions & 7 deletions src/main/java/com/ibm/watson/modelmesh/Metrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,20 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.lang.reflect.Array;
import java.net.SocketAddress;
import java.nio.channels.DatagramChannel;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.*;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;

import static com.ibm.watson.modelmesh.Metric.*;
import static com.ibm.watson.modelmesh.ModelMesh.M;
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_CUSTOM_ENV_VAR;
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_METRICS_ENV_VAR;
import static java.util.concurrent.TimeUnit.*;

/**
Expand Down Expand Up @@ -150,12 +149,14 @@ final class PrometheusMetrics implements Metrics {
5000, 10000, 20000, 60000, 120000, 300000
};

private static final int INFO_METRICS_MAX = 5;

private final CollectorRegistry registry;
private final NettyServer metricServer;
private final boolean shortNames;
private final EnumMap<Metric, Collector> metricsMap = new EnumMap<>(Metric.class);

public PrometheusMetrics(Map<String, String> params) throws Exception {
public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMetricParams) throws Exception {
int port = 2112;
boolean shortNames = true;
boolean https = true;
Expand Down Expand Up @@ -230,6 +231,24 @@ public PrometheusMetrics(Map<String, String> params) throws Exception {
}
}

if (infoMetricParams != null && !infoMetricParams.isEmpty()){
if (infoMetricParams.size() > INFO_METRICS_MAX) {
throw new Exception("Too many info metrics provided in env var " + MMESH_CUSTOM_ENV_VAR + ": \""
+ infoMetricParams+ "\". The max is " + INFO_METRICS_MAX);
}

String metric_name = infoMetricParams.remove("metric_name");
String[] labelNames = infoMetricParams.keySet().toArray(String[]::new);
String[] labelValues = Stream.of(labelNames).map(infoMetricParams::get).toArray(String[]::new);
Gauge infoMetricsGauge = Gauge.build()
.name(metric_name)
.help("Info Metrics")
.labelNames(labelNames)
.create();
infoMetricsGauge.labels(labelValues).set(1.0);
registry.register(infoMetricsGauge);
}
rafvasq marked this conversation as resolved.
Show resolved Hide resolved

this.metricServer = new NettyServer(registry, port, https);
this.shortNames = shortNames;

Expand Down
32 changes: 30 additions & 2 deletions src/main/java/com/ibm/watson/modelmesh/ModelMesh.java
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,9 @@ protected final TProcessor initialize() throws Exception {
// }

// "type" or "type:p1=v1;p2=v2;...;pn=vn"
private static final Pattern METRICS_CONFIG_PATT = Pattern.compile("([a-z]+)(:\\w+=[^;]+(?:;\\w+=[^;]+)*)?");
private static final Pattern METRICS_CONFIG_PATT = Pattern.compile("([a-z;]+)(:\\w+=[^;]+(?:;\\w+=[^;]+)*)?");
// "metric_name" or "metric:name;l1=v1,l2=v2,...,ln=vn,"
private static final Pattern CUSTOM_METRIC_CONFIG_PATT = Pattern.compile("([a-z_:]+);(\\w+=[^;]+(?:;\\w+=[^,]+)*)?");

private static Metrics setUpMetrics() throws Exception {
if (System.getenv("MM_METRICS_STATSD_PORT") != null || System.getenv("MM_METRICS_PROMETHEUS_PORT") != null) {
Expand Down Expand Up @@ -916,12 +918,38 @@ private static Metrics setUpMetrics() throws Exception {
params.put(kv[0], kv[1]);
}
}
String infoMetricConfig = getStringParameter(MMESH_CUSTOM_ENV_VAR, null);
Map<String, String> infoMetricParams;
if (infoMetricConfig == null) {
logger.info("{} returned null", MMESH_CUSTOM_ENV_VAR);
infoMetricParams = null;
} else {
logger.info("{} set to \"{}\"", MMESH_CUSTOM_ENV_VAR, infoMetricConfig);
Matcher infoMetricMatcher = CUSTOM_METRIC_CONFIG_PATT.matcher(infoMetricConfig);
if (!infoMetricMatcher.matches()) {
throw new Exception("Invalid metrics configuration provided in env var " + MMESH_CUSTOM_ENV_VAR + ": \""
+ infoMetricConfig + "\"");
}
String infoMetricName = infoMetricMatcher.group(1);
String infoMetricParamString = infoMetricMatcher.group(2);
infoMetricParams = new HashMap<>();
infoMetricParams.put("metric_name", infoMetricName);
for (String infoMetricParam : infoMetricParamString.substring(0).split(",")) {
String[] kv = infoMetricParam.split("=");
String value = System.getenv(kv[1]);
if (value == null) {
throw new Exception("Env var " + kv[1] + " is unresolved in " + MMESH_CUSTOM_ENV_VAR + ": \""
+ infoMetricConfig + "\"");
}
infoMetricParams.put(kv[0], value);
}
}
try {
switch (type.toLowerCase()) {
case "statsd":
return new Metrics.StatsDMetrics(params);
case "prometheus":
return new Metrics.PrometheusMetrics(params);
return new Metrics.PrometheusMetrics(params, infoMetricParams);
case "disabled":
logger.info("Metrics publishing is disabled (env var {}={})", MMESH_METRICS_ENV_VAR, metricsConfig);
return Metrics.NO_OP_METRICS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ private ModelMeshEnvVars() {}
public static final String LOAD_FAILURE_EXPIRY_ENV_VAR = "MM_LOAD_FAILURE_EXPIRY_TIME_MS";

public static final String MMESH_METRICS_ENV_VAR = "MM_METRICS";
public static final String MMESH_CUSTOM_ENV_VAR = "MM_INFO_METRICS";

public static final String LOG_EACH_INVOKE_ENV_VAR = "MM_LOG_EACH_INVOKE";
public static final String SEND_DEST_ID_ENV_VAR = "MM_SEND_DEST_ID";
Expand Down
16 changes: 13 additions & 3 deletions src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ protected int requestCount() {

static final String SCHEME = "https"; // or http

static final String METRIC_NAME = "assistant_deployment_info:relabel";
static final String DEPLOYMENT_NAME = "ga-tf-mm";
static final String SLOT_NAME = "ga";
static final String COMPONENT_NAME = "tf-mm";
static final String GROUP_NAME = "clu";

@Override
protected Map<String, String> extraEnvVars() {
return ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME);
Expand All @@ -84,7 +90,7 @@ public void metricsTest() throws Exception {

// verify not found status
ModelStatusInfo status = manageModels.getModelStatus(GetStatusRequest.newBuilder()
.setModelId("i don't exist").build());
.setModelId("I don't exist").build());

assertEquals(ModelStatus.NOT_FOUND, status.getStatus());
assertEquals(0, status.getErrorsCount());
Expand Down Expand Up @@ -166,7 +172,6 @@ public void verifyMetrics() throws Exception {
.filter(Matcher::matches)
.collect(Collectors.toMap(m -> m.group(1), m -> Double.parseDouble(m.group(2))));


System.out.println(metrics.size() + " metrics scraped");

// Spot check some expected metrics and values
Expand Down Expand Up @@ -198,5 +203,10 @@ public void verifyMetrics() throws Exception {
assertEquals(0.0, metrics.get("jvm_buffer_pool_used_buffers{pool=\"mapped\",}")); // mmapped memory not used
assertTrue(metrics.containsKey("jvm_gc_collection_seconds_sum{gc=\"G1 Young Generation\",}"));
assertTrue(metrics.containsKey("jvm_memory_bytes_committed{area=\"heap\",}"));

// Info metrics
assertEquals(1.0, metrics.get(METRIC_NAME + "{component=\"" + COMPONENT_NAME
+ "\",slot=\"" + SLOT_NAME + "\",deployment=\"" + DEPLOYMENT_NAME + "\",group=\"" + GROUP_NAME + "\",}"));
}
}

}