thanos-io · kakkoyun · Sep 18, 2020 · Sep 2, 2020 · Sep 2, 2020 · Sep 2, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,8 @@ We use *breaking* word for marking changes that are not backward compatible (rel
 
 ## Unreleased
 
+- [#3032](https://github.com/thanos-io/thanos/pull/3032) Query Frontend: It now uses the corex module fully and it support all backends that are supported there.
+
 ## [v0.15.0](https://github.com/thanos-io/thanos/releases) - in release process.
 
 :warning: **WARNING** :warning: Thanos Rule's `/api/v1/rules` endpoint no longer returns the old, deprecated `partial_response_strategy`. The old, deprecated value has been fixed to `WARN` for quite some time. _Please_ use `partialResponseStrategy`.

diff --git a/cmd/thanos/query-frontend.go b/cmd/thanos/query-frontend.go
@@ -16,93 +16,73 @@ import (
 	"github.com/opentracing/opentracing-go"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/common/model"
 	"github.com/weaveworks/common/user"
 	"gopkg.in/alecthomas/kingpin.v2"
 
+	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/extflag"
 	"github.com/thanos-io/thanos/pkg/extprom"
 	extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
 	"github.com/thanos-io/thanos/pkg/logging"
 	"github.com/thanos-io/thanos/pkg/prober"
 	"github.com/thanos-io/thanos/pkg/queryfrontend"
-	"github.com/thanos-io/thanos/pkg/queryfrontend/cache"
 	httpserver "github.com/thanos-io/thanos/pkg/server/http"
 	"github.com/thanos-io/thanos/pkg/server/http/middleware"
 	"github.com/thanos-io/thanos/pkg/tracing"
 )
 
-type queryFrontendConfig struct {
-	http             httpConfig
-	queryRangeConfig queryRangeConfig
-
-	downstreamURL        string
-	compressResponses    bool
-	LogQueriesLongerThan time.Duration
+type config struct {
+	http httpConfig
+	queryfrontend.Config
 
+	contentOrPath          extflag.PathOrContent
 	requestLoggingDecision string
-}
-
-type queryRangeConfig struct {
-	respCacheConfig     extflag.PathOrContent
-	cacheMaxFreshness   time.Duration
-	splitInterval       model.Duration
-	maxRetries          int
-	maxQueryParallelism int
-	maxQueryLength      model.Duration
-
 	// partialResponseStrategy is the default strategy used
 	// when parsing thanos query request.
 	partialResponseStrategy bool
 }
 
-func (c *queryRangeConfig) registerFlag(cmd *kingpin.CmdClause) {
-	cmd.Flag("query-range.split-interval", "Split queries by an interval and execute in parallel, 0 disables it.").
-		Default("24h").SetValue(&c.splitInterval)
+func registerQueryFrontend(m map[string]setupFunc, app *kingpin.Application) {
+	comp := component.QueryFrontend
+	cmd := app.Command(comp.String(), "query frontend")
+	cfg := &config{}
+
+	cmd.Flag("query-range.split-queries-by-interval", "Split queries by an interval and execute in parallel, 0 disables it.").
+		Default("24h").DurationVar(&cfg.QueryRange.SplitQueriesByInterval)
 
 	cmd.Flag("query-range.max-retries-per-request", "Maximum number of retries for a single request; beyond this, the downstream error is returned.").
-		Default("5").IntVar(&c.maxRetries)
+		Default("5").IntVar(&cfg.QueryRange.MaxRetries)
 
 	cmd.Flag("query-range.max-query-length", "Limit the query time range (end - start time) in the query-frontend, 0 disables it.").
-		Default("0").SetValue(&c.maxQueryLength)
+		Default("0").DurationVar(&cfg.Limits.MaxQueryLength)
 
-	cmd.Flag("query-range.max-query-parallelism", "Maximum number of queries will be scheduled in parallel by the frontend.").
-		Default("14").IntVar(&c.maxQueryParallelism)
+	cmd.Flag("query-range.max-query-parallelism", "Maximum number of queries will be scheduled in parallel by the Frontend.").
+		Default("14").IntVar(&cfg.Limits.MaxQueryParallelism)
 
-	cmd.Flag("query-range.response-cache-max-freshness", "Most recent allowed cacheable result, to prevent caching very recent results that might still be in flux.").
-		Default("1m").DurationVar(&c.cacheMaxFreshness)
+	cmd.Flag("query-range.max-cache-freshness", "Most recent allowed cacheable result, to prevent caching very recent results that might still be in flux.").
+		Default("1m").DurationVar(&cfg.Limits.MaxCacheFreshness)
 
 	cmd.Flag("query-range.partial-response", "Enable partial response for queries if no partial_response param is specified. --no-query-range.partial-response for disabling.").
-		Default("true").BoolVar(&c.partialResponseStrategy)
+		Default("true").BoolVar(&cfg.partialResponseStrategy)
 
-	c.respCacheConfig = *extflag.RegisterPathOrContent(cmd, "query-range.response-cache-config", "YAML file that contains response cache configuration.", false)
-}
+	cfg.contentOrPath = *extflag.RegisterPathOrContent(cmd, "query-range.cache-config", "YAML file that contains response cache configuration.", false)
 
-func (c *queryFrontendConfig) registerFlag(cmd *kingpin.CmdClause) {
-	c.queryRangeConfig.registerFlag(cmd)
-	c.http.registerFlag(cmd)
+	cfg.http.registerFlag(cmd)
 
 	cmd.Flag("query-frontend.downstream-url", "URL of downstream Prometheus Query compatible API.").
-		Default("http://localhost:9090").StringVar(&c.downstreamURL)
+		Default("http://localhost:9090").StringVar(&cfg.Frontend.DownstreamURL)
 
-	cmd.Flag("query-frontend.compress-responses", "Compress HTTP responses.").
-		Default("false").BoolVar(&c.compressResponses)
+	cmd.Flag("query-frontend.compress-http-responses", "Compress HTTP responses.").
+		Default("false").BoolVar(&cfg.Frontend.CompressResponses)
 
-	cmd.Flag("query-frontend.log_queries_longer_than", "Log queries that are slower than the specified duration. "+
-		"Set to 0 to disable. Set to < 0 to enable on all queries.").Default("0").DurationVar(&c.LogQueriesLongerThan)
-
-	cmd.Flag("log.request.decision", "Request Logging for logging the start and end of requests. LogFinishCall is enabled by default. LogFinishCall : Logs the finish call of the requests. LogStartAndFinishCall : Logs the start and finish call of the requests. NoLogCall : Disable request logging.").Default("LogFinishCall").EnumVar(&c.requestLoggingDecision, "NoLogCall", "LogFinishCall", "LogStartAndFinishCall")
-}
+	cmd.Flag("query-frontend.log-queries-longer-than", "Log queries that are slower than the specified duration. "+
+		"Set to 0 to disable. Set to < 0 to enable on all queries.").Default("0").DurationVar(&cfg.Frontend.LogQueriesLongerThan)
 
-func registerQueryFrontend(m map[string]setupFunc, app *kingpin.Application) {
-	comp := component.QueryFrontend
-	cmd := app.Command(comp.String(), "query frontend")
-	conf := &queryFrontendConfig{}
-	conf.registerFlag(cmd)
+	cmd.Flag("log.request.decision", "Request Logging for logging the start and end of requests. LogFinishCall is enabled by default. LogFinishCall : Logs the finish call of the requests. LogStartAndFinishCall : Logs the start and finish call of the requests. NoLogCall : Disable request logging.").Default("LogFinishCall").EnumVar(&cfg.requestLoggingDecision, "NoLogCall", "LogFinishCall", "LogStartAndFinishCall")
 
 	m[comp.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error {
-		return runQueryFrontend(g, logger, reg, tracer, conf, comp)
+		return runQueryFrontend(g, logger, reg, tracer, cfg, comp)
 	}
 }
 
@@ -111,51 +91,42 @@ func runQueryFrontend(
 	logger log.Logger,
 	reg *prometheus.Registry,
 	tracer opentracing.Tracer,
-	conf *queryFrontendConfig,
+	cfg *config,
 	comp component.Component,
 ) error {
-
-	if len(conf.downstreamURL) == 0 {
+	if len(cfg.Frontend.DownstreamURL) == 0 {
 		return errors.New("downstream URL should be configured")
 	}
+	err := cfg.QueryRange.Validate(logger)
+	if err != nil {
+		return errors.Wrap(err, "error validating query range config")
+	}
 
 	fe, err := frontend.New(frontend.Config{
-		DownstreamURL:        conf.downstreamURL,
-		CompressResponses:    conf.compressResponses,
-		LogQueriesLongerThan: conf.LogQueriesLongerThan,
+		DownstreamURL:        cfg.Frontend.DownstreamURL,
+		CompressResponses:    cfg.Frontend.CompressResponses,
+		LogQueriesLongerThan: cfg.Frontend.LogQueriesLongerThan,
 	}, logger, reg)
 	if err != nil {
 		return errors.Wrap(err, "setup query frontend")
 	}
 	defer fe.Close()
 
-	limits := queryfrontend.NewLimits(
-		conf.queryRangeConfig.maxQueryParallelism,
-		time.Duration(conf.queryRangeConfig.maxQueryLength),
-		conf.queryRangeConfig.cacheMaxFreshness,
-	)
-
-	respCacheContentYaml, err := conf.queryRangeConfig.respCacheConfig.Content()
+	limits, err := validation.NewOverrides(validation.Limits{
+		MaxQueryLength:      cfg.Limits.MaxQueryLength,
+		MaxQueryParallelism: cfg.Limits.MaxQueryParallelism,
+		MaxCacheFreshness:   cfg.Limits.MaxCacheFreshness,
+	}, nil)
 	if err != nil {
-		return errors.Wrap(err, "get content of response cache configuration")
-	}
-
-	var cacheConfig *queryrange.ResultsCacheConfig
-	if len(respCacheContentYaml) > 0 {
-		cacheConfig, err = cache.NewResponseCacheConfig(respCacheContentYaml)
-		if err != nil {
-			return errors.Wrap(err, "create response cache")
-		}
+		return errors.Wrap(err, "initialiase limits")
 	}
 
-	codec := queryfrontend.NewThanosCodec(conf.queryRangeConfig.partialResponseStrategy)
+	codec := queryfrontend.NewThanosCodec(cfg.partialResponseStrategy)
 	tripperWare, err := queryfrontend.NewTripperWare(
+		cfg.QueryRange,
 		limits,
-		cacheConfig,
 		codec,
 		queryrange.PrometheusResponseExtractor{},
-		time.Duration(conf.queryRangeConfig.splitInterval),
-		conf.queryRangeConfig.maxRetries,
 		reg,
 		logger,
 	)
@@ -173,16 +144,16 @@ func runQueryFrontend(
 
 	// Configure Request Logging for HTTP calls.
 	opts := []logging.Option{logging.WithDecider(func() logging.Decision {
-		return logging.LogDecision[conf.requestLoggingDecision]
+		return logging.LogDecision[cfg.requestLoggingDecision]
 	})}
 	logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...)
 	ins := extpromhttp.NewInstrumentationMiddleware(reg)
 
 	// Start metrics HTTP server.
 	{
 		srv := httpserver.New(logger, reg, comp, httpProbe,
-			httpserver.WithListen(conf.http.bindAddress),
-			httpserver.WithGracePeriod(time.Duration(conf.http.gracePeriod)),
+			httpserver.WithListen(cfg.http.bindAddress),
+			httpserver.WithGracePeriod(time.Duration(cfg.http.gracePeriod)),
 		)
 
 		instr := func(f http.HandlerFunc) http.HandlerFunc {

diff --git a/docs/components/query-frontend.md b/docs/components/query-frontend.md
@@ -45,21 +45,68 @@ Query Frontend supports a retry mechanism to retry query when HTTP requests are
 
 Query Frontend supports caching query results and reuses them on subsequent queries. If the cached results are incomplete,
 Query Frontend calculates the required subqueries and executes them in parallel on downstream queriers. Query Frontend can optionally align queries with their step parameter to improve the cacheability of the query results.
+It uses the cortex cache module so supports all that is supported there.
 
-Currently, only in-memory cache (fifo cache) is supported. An example config:
-
-[embedmd]:# (../flags/config_response_cache_in_memory.txt yaml)
+[embedmd]:# (../flags/config_frontend_cache.txt yaml)
 ```yaml
-type: IN-MEMORY
-config:
-  max_size: ""
-  max_size_items: 0
-  validity: 0s
+- limits:
+    maxquerylength: 0s
+    maxqueryparallelism: 0
+    maxcachefreshness: 0s
+  queryrange:
+    split_queries_by_interval: 0s
+    split_queries_by_day: false
+    align_queries_with_step: false
+    results_cache:
+      cache:
+        enable_fifocache: false
+        default_validity: 0s
+        background:
+          writeback_goroutines: 0
+          writeback_buffer: 0
+        memcached:
+          expiration: 0s
+          batch_size: 0
+          parallelism: 0
+        memcached_client:
+          host: ""
+          service: ""
+          addresses: ""
+          timeout: 0s
+          max_idle_conns: 0
+          update_interval: 0s
+          consistent_hash: false
+        redis:
+          endpoint: ""
+          timeout: 0s
+          expiration: 0s
+          max_idle_conns: 0
+          max_active_conns: 0
+          password: ""
+          enable_tls: false
+          idle_timeout: 0s
+          wait_on_pool_exhaustion: false
+          max_conn_lifetime: 0s
+        fifocache:
+          max_size_bytes: ""
+          max_size_items: 0
+          validity: 0s
+          size: 0
+        prefix: ""
+      max_freshness: 0s
+    cache_results: false
+    max_retries: 0
+    parallelise_shardable_queries: false
+  frontend:
+    max_outstanding_per_tenant: 0
+    compress_responses: false
+    downstream_url: ""
+    log_queries_longer_than: 0s
 ```
 
 ### Slow Query Log
 
-Query Frontend supports `--query-frontend.log_queries_longer_than` flag to log queries running longer some duration.
+Query Frontend supports `--query-frontend.log_queries_longer_than` flag to log queries running longer then some duration.
 
 ## Naming
 
@@ -89,7 +136,7 @@ Flags:
                               priority). Content of YAML file with tracing
                               configuration. See format details:
                               https://thanos.io/tip/tracing.md/#configuration
-      --query-range.split-interval=24h
+      --query-range.split-queries-by-interval=24h
                               Split queries by an interval and execute in
                               parallel, 0 disables it.
       --query-range.max-retries-per-request=5
@@ -100,32 +147,31 @@ Flags:
                               the query-frontend, 0 disables it.
       --query-range.max-query-parallelism=14
                               Maximum number of queries will be scheduled in
-                              parallel by the frontend.
-      --query-range.response-cache-max-freshness=1m
+                              parallel by the Frontend.
+      --query-range.max-cache-freshness=1m
                               Most recent allowed cacheable result, to prevent
                               caching very recent results that might still be in
                               flux.
       --query-range.partial-response
                               Enable partial response for queries if no
                               partial_response param is specified.
                               --no-query-range.partial-response for disabling.
-      --query-range.response-cache-config-file=<file-path>
+      --query-range.cache-config-file=<file-path>
                               Path to YAML file that contains response cache
                               configuration.
-      --query-range.response-cache-config=<content>
-                              Alternative to
-                              'query-range.response-cache-config-file' flag
-                              (lower priority). Content of YAML file that
+      --query-range.cache-config=<content>
+                              Alternative to 'query-range.cache-config-file'
+                              flag (lower priority). Content of YAML file that
                               contains response cache configuration.
       --http-address="0.0.0.0:10902"
                               Listen host:port for HTTP endpoints.
       --http-grace-period=2m  Time to wait after an interrupt received for HTTP
                               Server.
       --query-frontend.downstream-url="http://localhost:9090"
                               URL of downstream Prometheus Query compatible API.
-      --query-frontend.compress-responses
+      --query-frontend.compress-http-responses
                               Compress HTTP responses.
-      --query-frontend.log_queries_longer_than=0
+      --query-frontend.log-queries-longer-than=0
                               Log queries that are slower than the specified
                               duration. Set to 0 to disable. Set to < 0 to
                               enable on all queries.

diff --git a/go.mod b/go.mod
@@ -8,6 +8,7 @@ require (
 	github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d
 	github.com/aliyun/aliyun-oss-go-sdk v2.0.4+incompatible
 	github.com/armon/go-metrics v0.3.3
+	github.com/baiyubin/aliyun-sts-go-sdk v0.0.0-20180326062324-cfa1a18b161f // indirect
 	github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b
 	github.com/cespare/xxhash v1.1.0
 	github.com/chromedp/cdproto v0.0.0-20200424080200-0de008e41fa0
@@ -70,6 +71,7 @@ require (
 )
 
 replace (
+	github.com/cortexproject/cortex => ../../cortexproject/cortex
 	// Update to v1.1.1 to make sure windows CI pass.
 	github.com/elastic/go-sysinfo => github.com/elastic/go-sysinfo v1.1.1
 	// Make sure Prometheus version is pinned as Prometheus semver does not include Go APIs.