Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine metrics plan #1352

Merged
merged 13 commits into from
Jul 20, 2022
Original file line number Diff line number Diff line change
@@ -1,4 +1,47 @@
import { TransformNullValue } from '@lib/utils/prometheus'
import { QueryData } from '@lib/components/MetricChart/seriesRenderer'
import { ColorType, TransformNullValue } from '@lib/utils/prometheus'

function transformColorBySQLType(legendLabel: string) {
switch (legendLabel) {
case 'Select':
return ColorType.BLUE_3
case 'Commit':
return ColorType.GREEN_2
case 'Insert':
return ColorType.GREEN_3
case 'Update':
return ColorType.GREEN_4
case 'general':
return ColorType.PINK
default:
return undefined
}
}

function transformColorByExecTimeOverview(legendLabel: string) {
switch (legendLabel) {
case 'tso_wait':
return ColorType.RED_5
case 'Commit':
return ColorType.GREEN_4
case 'Prewrite':
return ColorType.GREEN_3
case 'PessimisticLock':
return ColorType.RED_4
case 'Get':
return ColorType.BLUE_3
case 'BatchGet':
return ColorType.BLUE_4
case 'Cop':
return ColorType.BLUE_1
case 'Scan':
return ColorType.PURPLE
case 'execute time':
return ColorType.YELLOW
default:
return undefined
}
}

const metricsItems = [
{
Expand Down Expand Up @@ -58,9 +101,11 @@ const metricsItems = [
queries: [
{
query: `sum(rate(tidb_server_handle_query_duration_seconds_sum{sql_type!="internal"}[$__rate_interval]))`,
name: 'database time'
name: 'database time',
color: ColorType.YELLOW
}
],
nullValue: TransformNullValue.AS_ZERO,
unit: 's',
type: 'line'
},
Expand All @@ -69,7 +114,8 @@ const metricsItems = [
queries: [
{
query: `sum(rate(tidb_server_handle_query_duration_seconds_sum{sql_type!="internal"}[$__rate_interval])) by (sql_type)`,
name: '{sql_type}'
name: '{sql_type}',
color: (qd: QueryData) => transformColorBySQLType(qd.name)
}
],
unit: 's',
Expand All @@ -80,19 +126,48 @@ const metricsItems = [
queries: [
{
query: `sum(rate(tidb_session_parse_duration_seconds_sum{sql_type="general"}[$__rate_interval]))`,
name: 'parse'
name: 'parse',
color: ColorType.RED_2
},
{
query: `sum(rate(tidb_session_compile_duration_seconds_sum{sql_type="general"}[$__rate_interval]))`,
name: 'compile'
name: 'compile',
color: ColorType.ORANGE
},
{
query: `sum(rate(tidb_session_execute_duration_seconds_sum{sql_type="general"}[$__rate_interval]))`,
name: 'execute'
name: 'execute',
color: ColorType.GREEN_3
},
{
query: `sum(rate(tidb_server_get_token_duration_seconds_sum{sql_type="general"}[$__rate_interval]))/1000000`,
name: 'get token'
name: 'get token',
color: ColorType.RED_3
}
],
unit: 's',
type: 'bar_stacked'
},
{
title: 'Database Execute Time',
queries: [
{
query:
'sum(rate(tidb_tikvclient_request_seconds_sum{store!="0"}[$__rate_interval])) by (type)',
name: '{type}',
color: (qd: QueryData) => transformColorByExecTimeOverview(qd.name)
},
{
query:
'sum(rate(pd_client_cmd_handle_cmds_duration_seconds_sum{type="wait"}[$__rate_interval]))',
name: 'tso_wait',
color: ColorType.RED_5
},
{
query:
'sum(rate(tidb_session_execute_duration_seconds_sum{sql_type="general"}[$__rate_interval]))',
name: 'execute time',
color: ColorType.YELLOW
}
],
unit: 's',
Expand Down Expand Up @@ -157,7 +232,12 @@ const metricsItems = [
{
query:
'sum(rate(tidb_server_plan_cache_total[$__rate_interval])) by (type)',
name: 'avg'
name: 'avg - hit'
},
{
query:
'sum(rate(tidb_server_plan_cache_miss_total[$__rate_interval]))',
name: 'avg - miss'
}
],
unit: 'short',
Expand Down Expand Up @@ -283,6 +363,7 @@ const metricsItems = [
name: '{type}-{txn_mode}'
}
],
nullValue: TransformNullValue.AS_ZERO,
unit: 's',
type: 'line'
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,6 @@ export default function Metrics() {
</Typography.Title>
<MetricChart
queries={item.queries}
yDomain={
item.yDomain
? { min: item.yDomain.min, max: item.yDomain.max }
: undefined
}
type={item.type as GraphType}
unit={item.unit!}
range={chartRange}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,87 +1,90 @@
import { TransformNullValue } from '@lib/utils/prometheus'

const overviewMetrics = [
{
title: 'total_requests',
queries: [
{
query: 'sum(rate(tidb_executor_statement_total[$__rate_interval]))',
name: 'Total'
},
{
query:
'sum(rate(tidb_executor_statement_total[$__rate_interval])) by (type)',
name: '{type}'
}
],
unit: null,
nullValue: TransformNullValue.AS_ZERO,
unit: 'qps',
type: 'bar_stacked'
},
{
title: 'latency',
queries: [
{
query:
'histogram_quantile(0.9, sum(rate(tidb_server_handle_query_duration_seconds_bucket[$__rate_interval])) by (le))',
name: '95%'
'sum(rate(tidb_server_handle_query_duration_seconds_sum{sql_type!="internal"}[$__rate_interval])) / sum(rate(tidb_server_handle_query_duration_seconds_count{sql_type!="internal"}[$__rate_interval]))',
name: 'avg'
},
{
query:
'histogram_quantile(0.99, sum(rate(tidb_server_handle_query_duration_seconds_bucket[$__rate_interval])) by (le))',
name: '99%'
'histogram_quantile(0.99, sum(rate(tidb_server_handle_query_duration_seconds_bucket{sql_type!="internal"}[$__rate_interval])) by (le))',
name: '99'
},
{
query:
'histogram_quantile(0.999, sum(rate(tidb_server_handle_query_duration_seconds_bucket[$__rate_interval])) by (le))',
name: '99.9%'
'sum(rate(tidb_server_handle_query_duration_seconds_sum{sql_type!="internal"}[$__rate_interval])) by (sql_type) / sum(rate(tidb_server_handle_query_duration_seconds_count{sql_type!="internal"}[$__rate_interval])) by (sql_type)',
name: 'avg-{sql_type}'
},
{
query:
'histogram_quantile(0.99, sum(rate(tidb_server_handle_query_duration_seconds_bucket{sql_type!="internal"}[$__rate_interval])) by (le,sql_type))',
name: '99-{sql_type}'
}
],
nullValue: TransformNullValue.AS_ZERO,
unit: 's',
type: 'line'
},
{
title: 'cpu',
queries: [
{
query:
'100 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[$__rate_interval]) ) * 100',
query: 'rate(process_cpu_seconds_total{job="tidb"}[$__rate_interval])',
name: '{instance}'
}
],
yDomain: {
min: 0,
max: 100
},
nullValue: TransformNullValue.AS_ZERO,
unit: 'percent',
type: 'line'
},
{
title: 'memory',
queries: [
{
query: `100 - (
avg_over_time(node_memory_MemAvailable_bytes[$__rate_interval]) or
(
avg_over_time(node_memory_Buffers_bytes[$__rate_interval]) +
avg_over_time(node_memory_Cached_bytes[$__rate_interval]) +
avg_over_time(node_memory_MemFree_bytes[$__rate_interval]) +
avg_over_time(node_memory_Slab_bytes[$__rate_interval])
)
) /
avg_over_time(node_memory_MemTotal_bytes[$__rate_interval]) * 100`,
query: 'process_resident_memory_bytes{job="tidb"}',
name: '{instance}'
}
],
yDomain: {
min: 0,
max: 100
},
unit: 'percent',
nullValue: TransformNullValue.AS_ZERO,
unit: 'decbytes',
type: 'line'
},
{
title: 'io',
queries: [
{
query: 'irate(node_disk_io_time_seconds_total[$__rate_interval]) * 100',
name: '{instance} - {device}'
query:
'sum(rate(tikv_engine_flow_bytes{db="raft", type="wal_file_bytes"}[$__rate_interval])) by (instance) + sum(rate(raft_engine_write_size_sum[$__rate_interval])) by (instance)',
name: '{instance}-write'
},
{
query:
'sum(rate(tikv_engine_flow_bytes{db="kv", type=~"bytes_read|iter_bytes_read"}[$__rate_interval])) by (instance)',
name: '{instance}-read'
}
],
unit: 'decbytes',
unit: 'KBs',
type: 'line'
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ overview:
metrics:
total_requests: QPS
latency: Latency
cpu: CPU Utilization
memory: Memory Utilization
io: IO Utilization
cpu: TiDB CPU Usage
memory: TiDB Memory Usage
io: IO Usage
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ overview:
metrics:
total_requests: QPS
latency: 延迟
cpu: CPU 使用率
memory: 内存使用率
io: IO 使用率
cpu: TiDB CPU 使用
memory: TiDB 内存使用
io: IO 使用
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,13 @@ for (const key in translations) {
export interface IQueryOption {
query: string
name: string
color?: string
color?: string | ((qd: QueryData) => string)
}

export interface IMetricChartProps {
// When object ref changed, there will be a data reload.
range: TimeRangeValue

yDomain?: DomainRange
queries: IQueryOption[]
unit?: string
type: GraphType
Expand Down Expand Up @@ -122,7 +121,6 @@ type Data = {
export default function MetricChart({
queries,
range,
yDomain,
unit,
type,
height = 200,
Expand Down Expand Up @@ -220,12 +218,15 @@ export default function MetricChart({
})
: data

sd.push({
const d: QueryData = {
id: `${queryIdx}_${seriesIdx}`,
name: format(queries[queryIdx].name, promResult.metric),
data: transformedData,
color: queries[queryIdx].color
})
data: transformedData
}
const colorOrFn = queries[queryIdx].color

d.color = typeof colorOrFn === 'function' ? colorOrFn(d) : colorOrFn
sd.push(d)
})
})
setData({
Expand Down Expand Up @@ -301,7 +302,6 @@ export default function MetricChart({
unit ? getValueFormat(unit)(v, 2) : Number(v).toFixed(0)
}
ticks={5}
domain={yDomain}
/>
{data?.values.map((qd) => renderQueryData(type, qd))}
{data && (
Expand Down
20 changes: 20 additions & 0 deletions ui/packages/tidb-dashboard-lib/src/utils/prometheus/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,26 @@ export enum TransformNullValue {
AS_ZERO = 'as_zero'
}

export enum ColorType {
BLUE_1 = '#C0D8FF',
BLUE_2 = '#8AB8FF',
BLUE_3 = '#3274D9',
BLUE_4 = '#1F60C4',
GREEN_1 = '#C8F2C2',
GREEN_2 = '#96D98D',
GREEN_3 = '#56A64B',
GREEN_4 = '#37872D',
RED_1 = '#FFA6B0',
RED_2 = '#FF7383',
RED_3 = '#E02F44',
RED_4 = '#C4162A',
RED_5 = '#701313',
PURPLE = '#8778ee',
ORANGE = '#FF9830',
YELLOW = '#FADE2A',
PINK = '#F2495C'
}

// Our customized types

export interface QueryOptions {
Expand Down