Skip to content

Add statistical I/O metrics #137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions collector/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,14 @@ var (
})
)

var (
metricsAggStageCounters = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Name: "metrics_agg_stage_counters",
Help: "the number of times the aggregate pipeline stage has been executed.",
}, []string{"type"})
)

// DocumentStats are the stats associated to a document.
type DocumentStats struct {
Deleted float64 `bson:"deleted"`
Expand Down Expand Up @@ -414,17 +422,27 @@ func (cursorStats *CursorStats) Export(ch chan<- prometheus.Metric) {
metricsCursorOpen.WithLabelValues("total").Set(cursorStats.Open.Total)
}

// AggStageCounterStats are the stats for aggStageCounters
type AggStageCounterStats struct {
Lookup float64 `bson:"$lookup"`
}

// Export exports the aggStageCounter stats.
func (AggStageCounterStats *AggStageCounterStats) Export(ch chan<- prometheus.Metric) {
metricsAggStageCounters.WithLabelValues("lookup").Set(AggStageCounterStats.Lookup)
}
// MetricsStats are all stats associated with metrics of the system
type MetricsStats struct {
Document *DocumentStats `bson:"document"`
GetLastError *GetLastErrorStats `bson:"getLastError"`
Operation *OperationStats `bson:"operation"`
QueryExecutor *QueryExecutorStats `bson:"queryExecutor"`
Record *RecordStats `bson:"record"`
Repl *ReplStats `bson:"repl"`
Storage *StorageStats `bson:"storage"`
Cursor *CursorStats `bson:"cursor"`
Ttl *TtlStats `bson:"ttl"`
Document *DocumentStats `bson:"document"`
GetLastError *GetLastErrorStats `bson:"getLastError"`
Operation *OperationStats `bson:"operation"`
QueryExecutor *QueryExecutorStats `bson:"queryExecutor"`
Record *RecordStats `bson:"record"`
Repl *ReplStats `bson:"repl"`
Storage *StorageStats `bson:"storage"`
Cursor *CursorStats `bson:"cursor"`
Ttl *TtlStats `bson:"ttl"`
AggStageCounter *AggStageCounterStats `bson:"aggStageCounters"`
}

// Export exports the metrics stats.
Expand Down Expand Up @@ -456,6 +474,9 @@ func (metricsStats *MetricsStats) Export(ch chan<- prometheus.Metric) {
if metricsStats.Cursor != nil {
metricsStats.Cursor.Export(ch)
}
if metricsStats.AggStageCounter != nil {
metricsStats.AggStageCounter.Export(ch)
}

metricsCursorTimedOutTotal.Collect(ch)
metricsCursorOpen.Collect(ch)
Expand Down Expand Up @@ -487,6 +508,7 @@ func (metricsStats *MetricsStats) Export(ch chan<- prometheus.Metric) {
metricsStorageFreelistSearchTotal.Collect(ch)
metricsTTLDeletedDocumentsTotal.Collect(ch)
metricsTTLPassesTotal.Collect(ch)
metricsAggStageCounters.Collect(ch)
}

// Describe describes the metrics for prometheus
Expand Down Expand Up @@ -521,4 +543,5 @@ func (metricsStats *MetricsStats) Describe(ch chan<- *prometheus.Desc) {
metricsStorageFreelistSearchTotal.Describe(ch)
metricsTTLDeletedDocumentsTotal.Describe(ch)
metricsTTLPassesTotal.Describe(ch)
metricsAggStageCounters.Describe(ch)
}
144 changes: 129 additions & 15 deletions collector/wiredtiger.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,30 @@ import (
)

var (
wtBlockManagerBlocksTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
wtBlockManagerReadBlocksTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_blockmanager",
Name: "blocks_total",
Name: "read_blocks_total",
Help: "The total number of blocks read by the WiredTiger BlockManager",
}, []string{"type"})
wtBlockManagerBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
wtBlockManagerReadBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_blockmanager",
Name: "bytes_total",
Name: "read_bytes_total",
Help: "The total number of bytes read by the WiredTiger BlockManager",
}, []string{"type"})
wtBlockManagerWriteBlocksTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_blockmanager",
Name: "write_blocks_total",
Help: "The total number of blocks write by the WiredTiger BlockManager",
}, []string{"type"})
wtBlockManagerWriteBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_blockmanager",
Name: "write_bytes_total",
Help: "The total number of bytes write by the WiredTiger BlockManager",
}, []string{"type"})
)

var (
Expand Down Expand Up @@ -76,6 +88,18 @@ var (
Name: "overhead_percent",
Help: "The percentage overhead of the WiredTiger Cache",
})
wtCacheReadTimeUsecs = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_cache",
Name: "read_time_usecs",
Help: "application threads page read from disk to cache time (usecs)",
})
wtCacheWriteTimeUsecs = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_cache",
Name: "write_time_usecs",
Help: "application threads page write from cache to disk time (usecs)",
})
)

var (
Expand Down Expand Up @@ -168,6 +192,30 @@ var (
}, []string{"type"})
)

var (
wtConnectionReadIOS = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_connection",
Name: "read_IO_s",
Help: "Wt connection total read IOS",
})
wtConnectionWriteIOS = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_connection",
Name: "write_IO_s",
Help: "Wt connection total read IOS",
})
)

var (
wtCursor = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: "wiredtiger_cursor",
Name: "calls",
Help: "Number of times each operation was called",
}, []string{"type"})
)

// blockmanager stats
type WTBlockManagerStats struct {
MappedBytesRead float64 `bson:"mapped bytes read"`
Expand All @@ -180,18 +228,21 @@ type WTBlockManagerStats struct {
}

func (stats *WTBlockManagerStats) Export(ch chan<- prometheus.Metric) {
wtBlockManagerBlocksTotal.WithLabelValues("read").Set(stats.BlocksRead)
wtBlockManagerBlocksTotal.WithLabelValues("read_mapped").Set(stats.MappedBlocksRead)
wtBlockManagerBlocksTotal.WithLabelValues("pre_loaded").Set(stats.BlocksPreLoaded)
wtBlockManagerBlocksTotal.WithLabelValues("written").Set(stats.BlocksWritten)
wtBlockManagerBytesTotal.WithLabelValues("read").Set(stats.BytesRead)
wtBlockManagerBytesTotal.WithLabelValues("read_mapped").Set(stats.MappedBytesRead)
wtBlockManagerBytesTotal.WithLabelValues("written").Set(stats.BytesWritten)
wtBlockManagerReadBlocksTotal.WithLabelValues("read").Set(stats.BlocksRead)
wtBlockManagerReadBlocksTotal.WithLabelValues("read_mapped").Set(stats.MappedBlocksRead)
wtBlockManagerReadBlocksTotal.WithLabelValues("pre_loaded").Set(stats.BlocksPreLoaded)
wtBlockManagerWriteBlocksTotal.WithLabelValues("written").Set(stats.BlocksWritten)
wtBlockManagerWriteBlocksTotal.WithLabelValues("pre_loaded").Set(stats.BlocksPreLoaded)
wtBlockManagerReadBytesTotal.WithLabelValues("read").Set(stats.BytesRead)
wtBlockManagerReadBytesTotal.WithLabelValues("read_mapped").Set(stats.MappedBytesRead)
wtBlockManagerWriteBytesTotal.WithLabelValues("written").Set(stats.BytesWritten)
}

func (stats *WTBlockManagerStats) Describe(ch chan<- *prometheus.Desc) {
wtBlockManagerBlocksTotal.Describe(ch)
wtBlockManagerBytesTotal.Describe(ch)
wtBlockManagerReadBlocksTotal.Describe(ch)
wtBlockManagerWriteBlocksTotal.Describe(ch)
wtBlockManagerReadBytesTotal.Describe(ch)
wtBlockManagerWriteBytesTotal.Describe(ch)
}

// cache stats
Expand All @@ -210,6 +261,8 @@ type WTCacheStats struct {
PagesReadInto float64 `bson:"pages read into cache"`
PagesWrittenFrom float64 `bson:"pages written from cache"`
PagesDirty float64 `bson:"tracked dirty pages in the cache"`
ReadTime float64 `bson:"application threads page read from disk to cache time (usecs)"`
WriteTime float64 `bson:"application threads page write from cache to disk time (usecs)"`
}

func (stats *WTCacheStats) Export(ch chan<- prometheus.Metric) {
Expand All @@ -227,6 +280,8 @@ func (stats *WTCacheStats) Export(ch chan<- prometheus.Metric) {
wtCacheBytes.WithLabelValues("leaf_pages").Set(stats.BytesLeafPages)
wtCacheMaxBytes.Set(stats.MaxBytes)
wtCachePercentOverhead.Set(stats.PercentOverhead)
wtCacheReadTimeUsecs.Set(stats.ReadTime)
wtCacheWriteTimeUsecs.Set(stats.WriteTime)
}

func (stats *WTCacheStats) Describe(ch chan<- *prometheus.Desc) {
Expand All @@ -236,6 +291,8 @@ func (stats *WTCacheStats) Describe(ch chan<- *prometheus.Desc) {
wtCacheBytes.Describe(ch)
wtCacheMaxBytes.Describe(ch)
wtCachePercentOverhead.Describe(ch)
wtCacheReadTimeUsecs.Describe(ch)
wtCacheWriteTimeUsecs.Describe(ch)
}

// log stats
Expand Down Expand Up @@ -354,6 +411,40 @@ func (stats *WTConcurrentTransactionsStats) Describe(ch chan<- *prometheus.Desc)
wtConcurrentTransactionsTotalTickets.Describe(ch)
}

// connection stats
type WTConnectionStats struct {
ReadIOS float64 `bson:"total read I/Os"`
WriteIOS float64 `bson:"total write I/Os"`
}

func (stats *WTConnectionStats) Export(ch chan<- prometheus.Metric) {
wtConnectionReadIOS.Set(stats.ReadIOS)
wtConnectionWriteIOS.Set(stats.WriteIOS)
}

func (stats *WTConnectionStats) Describe(ch chan<- *prometheus.Desc) {
wtConnectionReadIOS.Describe(ch)
wtConnectionWriteIOS.Describe(ch)
}

// cursor stats
type WTCursorStats struct {
Create float64 `bson:"cursor create calls"`
Insert float64 `bson:"cursor insert calls"`
Modify float64 `bson:"cursor modify calls"`
Remove float64 `bson:"cursor remove calls"`
}

func (stats *WTCursorStats) Export(ch chan<- prometheus.Metric) {
wtCursor.WithLabelValues("create").Set(stats.Create)
wtCursor.WithLabelValues("insert").Set(stats.Insert)
wtCursor.WithLabelValues("modify").Set(stats.Modify)
wtCursor.WithLabelValues("remove").Set(stats.Remove)
}

func (stats *WTCursorStats) Describe(ch chan<- *prometheus.Desc) {
wtCursor.Describe(ch)
}
// WiredTiger stats
type WiredTigerStats struct {
BlockManager *WTBlockManagerStats `bson:"block-manager"`
Expand All @@ -362,6 +453,8 @@ type WiredTigerStats struct {
Session *WTSessionStats `bson:"session"`
Transaction *WTTransactionStats `bson:"transaction"`
ConcurrentTransactions *WTConcurrentTransactionsStats `bson:"concurrentTransactions"`
Connection *WTConnectionStats `bson:"connection"`
Cursor *WTCursorStats `bson:"cursor"`
}

func (stats *WiredTigerStats) Describe(ch chan<- *prometheus.Desc) {
Expand All @@ -383,6 +476,12 @@ func (stats *WiredTigerStats) Describe(ch chan<- *prometheus.Desc) {
if stats.ConcurrentTransactions != nil {
stats.ConcurrentTransactions.Describe(ch)
}
if stats.Connection != nil {
stats.Connection.Describe(ch)
}
if stats.Cursor != nil {
stats.Cursor.Describe(ch)
}
}

func (stats *WiredTigerStats) Export(ch chan<- prometheus.Metric) {
Expand All @@ -404,9 +503,17 @@ func (stats *WiredTigerStats) Export(ch chan<- prometheus.Metric) {
if stats.ConcurrentTransactions != nil {
stats.ConcurrentTransactions.Export(ch)
}
if stats.Connection != nil {
stats.Connection.Export(ch)
}
if stats.Cursor != nil {
stats.Cursor.Export(ch)
}

wtBlockManagerBlocksTotal.Collect(ch)
wtBlockManagerBytesTotal.Collect(ch)
wtBlockManagerReadBlocksTotal.Collect(ch)
wtBlockManagerWriteBlocksTotal.Collect(ch)
wtBlockManagerReadBytesTotal.Collect(ch)
wtBlockManagerWriteBytesTotal.Collect(ch)

wtCachePagesTotal.Collect(ch)
wtCacheBytesTotal.Collect(ch)
Expand All @@ -415,6 +522,8 @@ func (stats *WiredTigerStats) Export(ch chan<- prometheus.Metric) {
wtCacheBytes.Collect(ch)
wtCacheMaxBytes.Collect(ch)
wtCachePercentOverhead.Collect(ch)
wtCacheReadTimeUsecs.Collect(ch)
wtCacheWriteTimeUsecs.Collect(ch)

wtTransactionsTotal.Collect(ch)
wtTransactionsTotalCheckpointMs.Collect(ch)
Expand All @@ -432,4 +541,9 @@ func (stats *WiredTigerStats) Export(ch chan<- prometheus.Metric) {
wtConcurrentTransactionsOut.Collect(ch)
wtConcurrentTransactionsAvailable.Collect(ch)
wtConcurrentTransactionsTotalTickets.Collect(ch)

wtConnectionReadIOS.Collect(ch)
wtConnectionWriteIOS.Collect(ch)

wtCursor.Collect(ch)
}