From 20ee9d7162ef568c3441595fbb51397f653f3379 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Wed, 31 Jan 2024 12:02:47 +0200 Subject: [PATCH 01/19] convert indexers to watchers in a mono indexer * seperate commit for easier review * --- grid-proxy/cmds/proxy_server/main.go | 15 ++++--- .../gpuindexer.go => indexer/gpu.go} | 2 +- .../healthindexer.go => indexer/health.go} | 2 +- grid-proxy/internal/indexer/indexer.go | 45 +++++++++++++++++++ 4 files changed, 55 insertions(+), 9 deletions(-) rename grid-proxy/internal/{gpuindexer/gpuindexer.go => indexer/gpu.go} (99%) rename grid-proxy/internal/{healthindexer/healthindexer.go => indexer/health.go} (99%) create mode 100644 grid-proxy/internal/indexer/indexer.go diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index 9431c8de3..21d8a7c17 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -16,8 +16,7 @@ import ( "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/certmanager" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" - "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/gpuindexer" - "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/healthindexer" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/indexer" logging "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg" rmb "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" @@ -128,7 +127,7 @@ func main() { dbClient := explorer.DBClient{DB: &db} - indexer, err := gpuindexer.NewNodeGPUIndexer( + gpuWatcher, err := indexer.NewNodeGPUIndexer( ctx, f.relayURL, f.mnemonics, @@ -142,13 +141,15 @@ func main() { log.Fatal().Err(err).Msg("failed to create GPU indexer") } - indexer.Start(ctx) - - healthIndexer, err := healthindexer.NewNodeHealthIndexer(ctx, &db, subManager, f.mnemonics, f.relayURL, f.healthIndexerWorkers, f.healthIndexerInterval) + healthWatcher, err := indexer.NewNodeHealthIndexer(ctx, &db, subManager, f.mnemonics, f.relayURL, f.healthIndexerWorkers, f.healthIndexerInterval) if err != nil { log.Fatal().Err(err).Msg("failed to create health indexer") } - healthIndexer.Start(ctx) + + indexer := indexer.NewIndexer(ctx, true) + indexer.RegisterWatcher("GPU", gpuWatcher) + indexer.RegisterWatcher("Health", healthWatcher) + indexer.Start() s, err := createServer(f, dbClient, GitCommit, relayRPCClient) if err != nil { diff --git a/grid-proxy/internal/gpuindexer/gpuindexer.go b/grid-proxy/internal/indexer/gpu.go similarity index 99% rename from grid-proxy/internal/gpuindexer/gpuindexer.go rename to grid-proxy/internal/indexer/gpu.go index 2f0884a17..945bf174d 100644 --- a/grid-proxy/internal/gpuindexer/gpuindexer.go +++ b/grid-proxy/internal/indexer/gpu.go @@ -1,4 +1,4 @@ -package gpuindexer +package indexer import ( "context" diff --git a/grid-proxy/internal/healthindexer/healthindexer.go b/grid-proxy/internal/indexer/health.go similarity index 99% rename from grid-proxy/internal/healthindexer/healthindexer.go rename to grid-proxy/internal/indexer/health.go index b27ffa9af..cfee99a05 100644 --- a/grid-proxy/internal/healthindexer/healthindexer.go +++ b/grid-proxy/internal/indexer/health.go @@ -1,4 +1,4 @@ -package healthindexer +package indexer import ( "context" diff --git a/grid-proxy/internal/indexer/indexer.go b/grid-proxy/internal/indexer/indexer.go new file mode 100644 index 000000000..3dd12c85e --- /dev/null +++ b/grid-proxy/internal/indexer/indexer.go @@ -0,0 +1,45 @@ +package indexer + +import ( + "context" + + "github.com/rs/zerolog/log" +) + +type Watcher interface { + Start(ctx context.Context) +} + +type Indexer struct { + Watchers map[string]Watcher + Paused bool + Context context.Context +} + +func NewIndexer( + ctx context.Context, + paused bool, +) *Indexer { + return &Indexer{ + Watchers: make(map[string]Watcher), + Paused: paused, + Context: ctx, + } +} + +func (i *Indexer) RegisterWatcher(name string, watcher Watcher) { + i.Watchers[name] = watcher +} + +func (i *Indexer) Start() { + if i.Paused { + log.Info().Msg("Indexer paused") + return + } + + log.Info().Msg("Starting indexer...") + for name, watcher := range i.Watchers { + watcher.Start(i.Context) + log.Info().Msgf("%s watcher started", name) + } +} From 5e4787d83c6678d07212095e80d3bd1bbf4e0d5a Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Wed, 31 Jan 2024 15:23:02 +0200 Subject: [PATCH 02/19] use a single rmb client of type peer.RpcClient across the project --- grid-proxy/cmds/proxy_server/main.go | 46 ++++++------ grid-proxy/internal/indexer/gpu.go | 97 ++++++++++++-------------- grid-proxy/internal/indexer/health.go | 31 +++----- grid-proxy/internal/indexer/indexer.go | 16 +++-- 4 files changed, 85 insertions(+), 105 deletions(-) diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index 21d8a7c17..99eb9c260 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -7,8 +7,10 @@ import ( "fmt" "net/http" "os" + "strings" "time" + "github.com/google/uuid" "github.com/gorilla/mux" "github.com/pkg/errors" "github.com/rs/zerolog/log" @@ -111,11 +113,6 @@ func main() { subManager := substrate.NewManager(f.tfChainURL) - relayRPCClient, err := createRPCRMBClient(ctx, f.relayURL, f.mnemonics, subManager) - if err != nil { - log.Fatal().Err(err).Msg("failed to create relay client") - } - db, err := db.NewPostgresDatabase(f.postgresHost, f.postgresPort, f.postgresUser, f.postgresPassword, f.postgresDB, f.maxPoolOpenConnections, logger.LogLevel(f.sqlLogLevel)) if err != nil { log.Fatal().Err(err).Msg("couldn't get postgres client") @@ -126,32 +123,35 @@ func main() { } dbClient := explorer.DBClient{DB: &db} + rpcRmbClient, err := createRPCRMBClient(ctx, f.relayURL, f.mnemonics, subManager) + if err != nil { + log.Fatal().Err(err).Msg("failed to create relay client") + } + idxr := indexer.NewIndexer(ctx, false, rpcRmbClient) - gpuWatcher, err := indexer.NewNodeGPUIndexer( + gpuWatcher := indexer.NewNodeGPUIndexer( ctx, - f.relayURL, - f.mnemonics, - subManager, &db, + rpcRmbClient, + &db, f.gpuIndexerCheckIntervalMins, f.gpuIndexerBatchSize, f.gpuIndexerResultWorkers, f.gpuIndexerBatchWorkers, ) - if err != nil { - log.Fatal().Err(err).Msg("failed to create GPU indexer") - } + idxr.RegisterWatcher("GPU", gpuWatcher) - healthWatcher, err := indexer.NewNodeHealthIndexer(ctx, &db, subManager, f.mnemonics, f.relayURL, f.healthIndexerWorkers, f.healthIndexerInterval) - if err != nil { - log.Fatal().Err(err).Msg("failed to create health indexer") - } + healthWatcher := indexer.NewNodeHealthIndexer( + ctx, + rpcRmbClient, + &db, + f.healthIndexerWorkers, + f.healthIndexerInterval, + ) + idxr.RegisterWatcher("Health", healthWatcher) - indexer := indexer.NewIndexer(ctx, true) - indexer.RegisterWatcher("GPU", gpuWatcher) - indexer.RegisterWatcher("Health", healthWatcher) - indexer.Start() + idxr.Start() - s, err := createServer(f, dbClient, GitCommit, relayRPCClient) + s, err := createServer(f, dbClient, GitCommit, rpcRmbClient) if err != nil { log.Fatal().Err(err).Msg("failed to create mux server") } @@ -207,8 +207,8 @@ func app(s *http.Server, f flags) error { return nil } -func createRPCRMBClient(ctx context.Context, relayURL, mnemonics string, subManager substrate.Manager) (rmb.Client, error) { - sessionId := fmt.Sprintf("tfgrid_proxy-%d", os.Getpid()) +func createRPCRMBClient(ctx context.Context, relayURL, mnemonics string, subManager substrate.Manager) (*peer.RpcClient, error) { + sessionId := fmt.Sprintf("tfgrid-proxy-%s", strings.Split(uuid.NewString(), "-")[0]) client, err := peer.NewRpcClient(ctx, peer.KeyTypeSr25519, mnemonics, relayURL, sessionId, subManager, true) if err != nil { return nil, fmt.Errorf("failed to create direct RPC RMB client: %w", err) diff --git a/grid-proxy/internal/indexer/gpu.go b/grid-proxy/internal/indexer/gpu.go index 945bf174d..849d0f292 100644 --- a/grid-proxy/internal/indexer/gpu.go +++ b/grid-proxy/internal/indexer/gpu.go @@ -2,18 +2,12 @@ package indexer import ( "context" - "encoding/json" - "fmt" - "os" "time" - "github.com/google/uuid" "github.com/rs/zerolog/log" - substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" - rmbTypes "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer/types" ) const ( @@ -25,7 +19,7 @@ const ( type NodeGPUIndexer struct { db db.Database - relayPeer *peer.Peer + rpcClient *peer.RpcClient checkInterval time.Duration batchSize uint nodesGPUResultsChan chan []types.NodeGPU @@ -37,16 +31,15 @@ type NodeGPUIndexer struct { func NewNodeGPUIndexer( ctx context.Context, - relayURL, - mnemonics string, - subManager substrate.Manager, + rpcClient *peer.RpcClient, db db.Database, indexerCheckIntervalMins, batchSize, nodesGPUResultsWorkers, - nodesGPUBufferWorkers uint) (*NodeGPUIndexer, error) { - indexer := &NodeGPUIndexer{ + nodesGPUBufferWorkers uint) *NodeGPUIndexer { + return &NodeGPUIndexer{ db: db, + rpcClient: rpcClient, nodesGPUResultsChan: make(chan []types.NodeGPU), nodesGPUBatchesChan: make(chan []types.NodeGPU), newNodeTwinIDChan: make(chan []uint32), @@ -55,22 +48,6 @@ func NewNodeGPUIndexer( nodesGPUResultsWorkers: nodesGPUResultsWorkers, nodesGPUBufferWorkers: nodesGPUBufferWorkers, } - - sessionId := fmt.Sprintf("tfgrid_proxy_indexer-%d", os.Getpid()) - client, err := peer.NewPeer( - ctx, - mnemonics, - subManager, - indexer.relayCallback, - peer.WithRelay(relayURL), - peer.WithSession(sessionId), - ) - if err != nil { - return nil, fmt.Errorf("failed to create direct RMB client: %w", err) - } - indexer.relayPeer = client - - return indexer, nil } func (n *NodeGPUIndexer) queryGridNodes(ctx context.Context) { @@ -130,11 +107,25 @@ func (n *NodeGPUIndexer) runQueryGridNodes(ctx context.Context) { } func (n *NodeGPUIndexer) getNodeGPUInfo(ctx context.Context, nodeTwinID uint32) error { - ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + subCtx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() - id := uuid.NewString() - return n.relayPeer.SendRequest(ctx, id, nodeTwinID, nil, "zos.gpu.list", nil) + var nodesGPU []types.NodeGPU + err := n.rpcClient.Call(subCtx, nodeTwinID, "zos.gpu.list", nil, &nodesGPU) + if err != nil { + return err + } + log.Debug().Msgf("gpu indexer: %+v", nodesGPU) + + for i := range nodesGPU { + nodesGPU[i].NodeTwinID = nodeTwinID + } + + if len(nodesGPU) != 0 { + n.nodesGPUResultsChan <- nodesGPU + } + + return nil } func (n *NodeGPUIndexer) getNodes(ctx context.Context, filter types.NodeFilter, limit types.Limit) ([]db.Node, error) { @@ -204,8 +195,6 @@ func (n *NodeGPUIndexer) Start(ctx context.Context) { go n.watchNodeTable(ctx) - log.Info().Msg("GPU indexer started") - } func (n *NodeGPUIndexer) watchNodeTable(ctx context.Context) { @@ -238,24 +227,24 @@ func (n *NodeGPUIndexer) watchNodeTable(ctx context.Context) { } } -func (n *NodeGPUIndexer) relayCallback(ctx context.Context, p peer.Peer, response *rmbTypes.Envelope, callBackErr error) { - output, err := peer.Json(response, callBackErr) - if err != nil { - log.Error().Err(err) - return - } - - var nodesGPU []types.NodeGPU - err = json.Unmarshal(output, &nodesGPU) - if err != nil { - log.Error().Err(err).RawJSON("data", output).Msg("failed to unmarshal GPU information response") - return - - } - for i := range nodesGPU { - nodesGPU[i].NodeTwinID = response.Source.Twin - } - if len(nodesGPU) != 0 { - n.nodesGPUResultsChan <- nodesGPU - } -} +// func (n *NodeGPUIndexer) relayCallback(ctx context.Context, p peer.Peer, response *rmbTypes.Envelope, callBackErr error) { +// output, err := peer.Json(response, callBackErr) +// if err != nil { +// log.Error().Err(err) +// return +// } + +// var nodesGPU []types.NodeGPU +// err = json.Unmarshal(output, &nodesGPU) +// if err != nil { +// log.Error().Err(err).RawJSON("data", output).Msg("failed to unmarshal GPU information response") +// return + +// } +// for i := range nodesGPU { +// nodesGPU[i].NodeTwinID = response.Source.Twin +// } +// if len(nodesGPU) != 0 { +// n.nodesGPUResultsChan <- nodesGPU +// } +// } diff --git a/grid-proxy/internal/indexer/health.go b/grid-proxy/internal/indexer/health.go index cfee99a05..005b45dc5 100644 --- a/grid-proxy/internal/indexer/health.go +++ b/grid-proxy/internal/indexer/health.go @@ -2,16 +2,11 @@ package indexer import ( "context" - "fmt" - "strings" "time" - "github.com/google/uuid" "github.com/rs/zerolog/log" - substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" - "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" ) @@ -22,7 +17,7 @@ const ( type NodeHealthIndexer struct { db db.Database - relayClient rmb.Client + relayClient *peer.RpcClient nodeTwinIdsChan chan uint32 indexerInterval time.Duration indexerWorkers uint @@ -30,26 +25,18 @@ type NodeHealthIndexer struct { func NewNodeHealthIndexer( ctx context.Context, + rpcClient *peer.RpcClient, db db.Database, - subManager substrate.Manager, - mnemonic string, - relayUrl string, indexerWorkers uint, indexerInterval uint, -) (*NodeHealthIndexer, error) { - sessionId := generateSessionId() - rpcClient, err := peer.NewRpcClient(ctx, peer.KeyTypeSr25519, mnemonic, relayUrl, sessionId, subManager, true) - if err != nil { - return nil, fmt.Errorf("failed to create rmb client: %w", err) - } - +) *NodeHealthIndexer { return &NodeHealthIndexer{ db: db, relayClient: rpcClient, nodeTwinIdsChan: make(chan uint32), indexerWorkers: indexerWorkers, indexerInterval: time.Duration(indexerInterval) * time.Minute, - }, nil + } } func (c *NodeHealthIndexer) Start(ctx context.Context) { @@ -62,8 +49,6 @@ func (c *NodeHealthIndexer) Start(ctx context.Context) { go c.checkNodeHealth(ctx) } - log.Info().Msg("Node health indexer started") - } func (c *NodeHealthIndexer) startNodeQuerier(ctx context.Context) { @@ -134,6 +119,8 @@ func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { err := c.relayClient.Call(subCtx, twinId, indexerCallCommand, nil, &result) cancel() + log.Debug().Msgf("health indexer: %+v", result) + healthReport := types.HealthReport{ NodeTwinId: twinId, Healthy: isHealthy(err), @@ -152,6 +139,6 @@ func isHealthy(err error) bool { return err == nil } -func generateSessionId() string { - return fmt.Sprintf("node-health-indexer-%s", strings.Split(uuid.NewString(), "-")[0]) -} +// func generateSessionId() string { +// return fmt.Sprintf("node-health-indexer-%s", strings.Split(uuid.NewString(), "-")[0]) +// } diff --git a/grid-proxy/internal/indexer/indexer.go b/grid-proxy/internal/indexer/indexer.go index 3dd12c85e..9903d44d6 100644 --- a/grid-proxy/internal/indexer/indexer.go +++ b/grid-proxy/internal/indexer/indexer.go @@ -4,6 +4,7 @@ import ( "context" "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" ) type Watcher interface { @@ -11,19 +12,22 @@ type Watcher interface { } type Indexer struct { - Watchers map[string]Watcher - Paused bool - Context context.Context + Watchers map[string]Watcher + Paused bool + Context context.Context + RmbClient *peer.RpcClient } func NewIndexer( ctx context.Context, paused bool, + rmbClient *peer.RpcClient, ) *Indexer { return &Indexer{ - Watchers: make(map[string]Watcher), - Paused: paused, - Context: ctx, + Watchers: make(map[string]Watcher), + Paused: paused, + Context: ctx, + RmbClient: rmbClient, } } From 7cde9d7cd3750f22a267840be78c70e1de4808ce Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Wed, 31 Jan 2024 15:41:25 +0200 Subject: [PATCH 03/19] configure starting/stoping indexer/modifier from flag --- .github/workflows/grid-proxy-integration.yml | 2 +- grid-proxy/Makefile | 3 +++ grid-proxy/cmds/proxy_server/main.go | 9 +++++---- grid-proxy/tests/queries/main_test.go | 20 ++++++++++++-------- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/grid-proxy-integration.yml b/.github/workflows/grid-proxy-integration.yml index bfb55a49a..e9d853586 100644 --- a/.github/workflows/grid-proxy-integration.yml +++ b/.github/workflows/grid-proxy-integration.yml @@ -52,7 +52,7 @@ jobs: pushd tools/db go run . --seed 13 --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --reset popd - go run cmds/proxy_server/main.go -no-cert --address :8080 --log-level debug --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --health-indexer-workers 0 --mnemonics "$MNEMONICS" & + go run cmds/proxy_server/main.go -no-cert -no-indexer --address :8080 --log-level debug --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --mnemonics "$MNEMONICS" & sleep 10 pushd tests/queries go test -v --seed 13 --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --endpoint http://localhost:8080 diff --git a/grid-proxy/Makefile b/grid-proxy/Makefile index 1bedce54d..2a93f500f 100644 --- a/grid-proxy/Makefile +++ b/grid-proxy/Makefile @@ -46,6 +46,7 @@ db-refill: db-stop db-start sleep db-fill server-start: ## Start the proxy server (Args: `m=`) @go run cmds/proxy_server/main.go \ -no-cert \ + -no-indexer \ --address :8080 \ --log-level debug \ --postgres-host $(PQ_HOST) \ @@ -63,6 +64,7 @@ test-queries: ## Run all queries tests @cd tests/queries/ &&\ go test -v \ -parallel 20 \ + -no-modify \ --seed 13 \ --postgres-host $(PQ_HOST) \ --postgres-db tfgrid-graphql \ @@ -75,6 +77,7 @@ test-query: ## Run specific test query (Args: `t=TestName`). @cd tests/queries/ &&\ go test -v \ -parallel 10 \ + -no-modify \ --seed 13 \ --postgres-host $(PQ_HOST) \ --postgres-db tfgrid-graphql \ diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index 99eb9c260..efca8eb29 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -58,9 +58,9 @@ type flags struct { gpuIndexerResultWorkers uint gpuIndexerBatchWorkers uint maxPoolOpenConnections int - // being 0 is helpful of making the data persistent while testing - healthIndexerWorkers uint - healthIndexerInterval uint + healthIndexerWorkers uint + healthIndexerInterval uint + noIndexer bool // true to stop the indexer, useful on running for testing } func main() { @@ -89,6 +89,7 @@ func main() { flag.IntVar(&f.maxPoolOpenConnections, "max-open-conns", 80, "max number of db connection pool open connections") flag.UintVar(&f.healthIndexerWorkers, "health-indexer-workers", 100, "number of workers checking on node health") flag.UintVar(&f.healthIndexerInterval, "health-indexer-interval", 5, "node health check interval in min") + flag.BoolVar(&f.noIndexer, "no-indexer", false, "do not start the indexer") flag.Parse() // shows version and exit @@ -127,7 +128,7 @@ func main() { if err != nil { log.Fatal().Err(err).Msg("failed to create relay client") } - idxr := indexer.NewIndexer(ctx, false, rpcRmbClient) + idxr := indexer.NewIndexer(ctx, f.noIndexer, rpcRmbClient) gpuWatcher := indexer.NewNodeGPUIndexer( ctx, diff --git a/grid-proxy/tests/queries/main_test.go b/grid-proxy/tests/queries/main_test.go index 5ca9b16da..bd30e0015 100644 --- a/grid-proxy/tests/queries/main_test.go +++ b/grid-proxy/tests/queries/main_test.go @@ -30,6 +30,7 @@ var ( SEED int STATUS_DOWN = "down" STATUS_UP = "up" + NO_MODIFY = false mockClient proxyclient.Client data mock.DBData @@ -45,6 +46,7 @@ func parseCmdline() { flag.StringVar(&POSTGRES_PASSSWORD, "postgres-password", "", "postgres password") flag.StringVar(&ENDPOINT, "endpoint", "", "the grid proxy endpoint to test against") flag.IntVar(&SEED, "seed", 0, "seed used for the random generation of tests") + flag.BoolVar(&NO_MODIFY, "no-modify", false, "stop modify the dump data") flag.Parse() } @@ -81,16 +83,18 @@ func TestMain(m *testing.M) { panic(err) } - err = modifyDataToFireTriggers(db, data) - if err != nil { - panic(err) - } - data, err = mock.Load(db) - if err != nil { - panic(err) + if !NO_MODIFY { + err = modifyDataToFireTriggers(db, data) + if err != nil { + panic(err) + } + data, err = mock.Load(db) + if err != nil { + panic(err) + } } - mockClient = mock.NewGridProxyMockClient(data) + mockClient = mock.NewGridProxyMockClient(data) exitCode = m.Run() os.Exit(exitCode) } From 4c14d01ef46efbe270e1edc72d20726143cd1434 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Wed, 31 Jan 2024 17:47:52 +0200 Subject: [PATCH 04/19] wip: introducing the dmi watcher --- grid-proxy/cmds/proxy_server/main.go | 9 +++ grid-proxy/internal/indexer/dmi.go | 101 +++++++++++++++++++++++++ grid-proxy/internal/indexer/gpu.go | 23 +----- grid-proxy/internal/indexer/health.go | 46 ++--------- grid-proxy/internal/indexer/indexer.go | 5 ++ grid-proxy/internal/indexer/types.go | 47 ++++++++++++ grid-proxy/internal/indexer/utils.go | 32 ++++++++ 7 files changed, 200 insertions(+), 63 deletions(-) create mode 100644 grid-proxy/internal/indexer/dmi.go create mode 100644 grid-proxy/internal/indexer/types.go create mode 100644 grid-proxy/internal/indexer/utils.go diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index efca8eb29..99e3c27ff 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -150,6 +150,15 @@ func main() { ) idxr.RegisterWatcher("Health", healthWatcher) + dmiWatcher := indexer.NewDmiWatcher( + ctx, + &db, + rpcRmbClient, + 5, + 1, + ) + idxr.RegisterWatcher("DMI", dmiWatcher) + idxr.Start() s, err := createServer(f, dbClient, GitCommit, rpcRmbClient) diff --git a/grid-proxy/internal/indexer/dmi.go b/grid-proxy/internal/indexer/dmi.go new file mode 100644 index 000000000..c76617bf4 --- /dev/null +++ b/grid-proxy/internal/indexer/dmi.go @@ -0,0 +1,101 @@ +package indexer + +import ( + "context" + "time" + + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" + "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" +) + +const ( + DmiCallCmd = "zos.system.dmi" +) + +type DmiWatcher struct { + database db.Database + rmbClient *peer.RpcClient + nodeTwinIdsChan chan uint32 + resultChan chan DMI + interval time.Duration + workers uint +} + +func NewDmiWatcher( + ctx context.Context, + database db.Database, + rmbClient *peer.RpcClient, + interval uint, + workers uint, +) *DmiWatcher { + return &DmiWatcher{ + database: database, + rmbClient: rmbClient, + nodeTwinIdsChan: make(chan uint32), + resultChan: make(chan DMI), + interval: time.Duration(interval) * time.Minute, + workers: workers, + } +} + +func (w *DmiWatcher) Start(ctx context.Context) { + go w.startNodeQuerier(ctx) + + for i := uint(0); i < w.workers; i++ { + go w.startNodeCaller(ctx) + } + + go w.startUpserter(ctx, w.database) +} + +func (w *DmiWatcher) startNodeQuerier(ctx context.Context) { + ticker := time.NewTicker(w.interval) + queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) + for { + select { + case <-ticker.C: + queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) + case <-ctx.Done(): + return + } + } +} + +func (w *DmiWatcher) startNodeCaller(ctx context.Context) { + for { + select { + case twinId := <-w.nodeTwinIdsChan: + w.resultChan <- w.callNode(ctx, twinId) + case <-ctx.Done(): + return + } + } +} + +// TODO: make it generic and then assert the result in each watcher +func (w *DmiWatcher) callNode(ctx context.Context, twinId uint32) DMI { + var result DMI + subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) + defer cancel() + + err := w.rmbClient.Call(subCtx, twinId, DmiCallCmd, nil, &result) + if err != nil { + log.Error().Err(err).Uint32("twinId", twinId).Msg("failed to call node") + } + + return result +} + +func (w *DmiWatcher) startUpserter(ctx context.Context, database db.Database) { + for { + select { + case dmiData := <-w.resultChan: + log.Debug().Msgf("received: %+v", dmiData) + // collect in batch + // upsert in db + case <-ctx.Done(): + return + } + } +} diff --git a/grid-proxy/internal/indexer/gpu.go b/grid-proxy/internal/indexer/gpu.go index 849d0f292..40b4008e7 100644 --- a/grid-proxy/internal/indexer/gpu.go +++ b/grid-proxy/internal/indexer/gpu.go @@ -72,6 +72,7 @@ func (n *NodeGPUIndexer) queryNewNodes(ctx context.Context, twinIDs []uint32) { } } +// TODO: use the node in utils func (n *NodeGPUIndexer) runQueryGridNodes(ctx context.Context) { status := "up" filter := types.NodeFilter{ @@ -226,25 +227,3 @@ func (n *NodeGPUIndexer) watchNodeTable(ctx context.Context) { } } } - -// func (n *NodeGPUIndexer) relayCallback(ctx context.Context, p peer.Peer, response *rmbTypes.Envelope, callBackErr error) { -// output, err := peer.Json(response, callBackErr) -// if err != nil { -// log.Error().Err(err) -// return -// } - -// var nodesGPU []types.NodeGPU -// err = json.Unmarshal(output, &nodesGPU) -// if err != nil { -// log.Error().Err(err).RawJSON("data", output).Msg("failed to unmarshal GPU information response") -// return - -// } -// for i := range nodesGPU { -// nodesGPU[i].NodeTwinID = response.Source.Twin -// } -// if len(nodesGPU) != 0 { -// n.nodesGPUResultsChan <- nodesGPU -// } -// } diff --git a/grid-proxy/internal/indexer/health.go b/grid-proxy/internal/indexer/health.go index 005b45dc5..5069d20b9 100644 --- a/grid-proxy/internal/indexer/health.go +++ b/grid-proxy/internal/indexer/health.go @@ -11,8 +11,7 @@ import ( ) const ( - indexerCallTimeout = 10 * time.Second - indexerCallCommand = "zos.system.version" + healthCallCmd = "zos.system.version" ) type NodeHealthIndexer struct { @@ -54,12 +53,12 @@ func (c *NodeHealthIndexer) Start(ctx context.Context) { func (c *NodeHealthIndexer) startNodeQuerier(ctx context.Context) { ticker := time.NewTicker(c.indexerInterval) c.queryHealthyNodes(ctx) - c.queryGridNodes(ctx) + queryUpNodes(ctx, c.db, c.nodeTwinIdsChan) for { select { case <-ticker.C: c.queryHealthyNodes(ctx) - c.queryGridNodes(ctx) + queryUpNodes(ctx, c.db, c.nodeTwinIdsChan) case <-ctx.Done(): return } @@ -78,45 +77,13 @@ func (c *NodeHealthIndexer) queryHealthyNodes(ctx context.Context) { } } -func (c *NodeHealthIndexer) queryGridNodes(ctx context.Context) { - status := "up" - filter := types.NodeFilter{ - Status: &status, - } - - limit := types.Limit{ - Size: 100, - RetCount: true, - Page: 1, - } - - hasNext := true - for hasNext { - nodes, _, err := c.db.GetNodes(ctx, filter, limit) - if err != nil { - log.Error().Err(err).Msg("failed to query grid nodes") - } - - if len(nodes) < int(limit.Size) { - hasNext = false - } - - for _, node := range nodes { - c.nodeTwinIdsChan <- uint32(node.TwinID) - } - - limit.Page++ - } - -} - func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { var result interface{} for { select { case twinId := <-c.nodeTwinIdsChan: subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) - err := c.relayClient.Call(subCtx, twinId, indexerCallCommand, nil, &result) + err := c.relayClient.Call(subCtx, twinId, healthCallCmd, nil, &result) cancel() log.Debug().Msgf("health indexer: %+v", result) @@ -125,6 +92,7 @@ func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { NodeTwinId: twinId, Healthy: isHealthy(err), } + // TODO: separate this on a different channel err = c.db.UpsertNodeHealth(ctx, healthReport) if err != nil { log.Error().Err(err).Msgf("failed to update health report for node with twin id %d", twinId) @@ -138,7 +106,3 @@ func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { func isHealthy(err error) bool { return err == nil } - -// func generateSessionId() string { -// return fmt.Sprintf("node-health-indexer-%s", strings.Split(uuid.NewString(), "-")[0]) -// } diff --git a/grid-proxy/internal/indexer/indexer.go b/grid-proxy/internal/indexer/indexer.go index 9903d44d6..8535f2657 100644 --- a/grid-proxy/internal/indexer/indexer.go +++ b/grid-proxy/internal/indexer/indexer.go @@ -2,11 +2,16 @@ package indexer import ( "context" + "time" "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" ) +const ( + indexerCallTimeout = 10 * time.Second +) + type Watcher interface { Start(ctx context.Context) } diff --git a/grid-proxy/internal/indexer/types.go b/grid-proxy/internal/indexer/types.go new file mode 100644 index 000000000..f861c4684 --- /dev/null +++ b/grid-proxy/internal/indexer/types.go @@ -0,0 +1,47 @@ +package indexer + +// DMI represents a map of SectionTypeStr to Section parsed from dmidecode output, +// as well as information about the tool used to get these sections +// Property in section is in the form of key value pairs where values are optional +// and may include a list of items as well. +// k: [v] +// +// [ +// item1 +// item2 +// ... +// ] +type DMI struct { + Tooling Tooling `json:"tooling"` + Sections []Section `json:"sections"` +} + +// Tooling holds the information and version about the tool used to +// read DMI information +type Tooling struct { + Aggregator string `json:"aggregator"` + Decoder string `json:"decoder"` +} + +// Section represents a complete section like BIOS or Baseboard +type Section struct { + HandleLine string `json:"handleline"` + TypeStr string `json:"typestr,omitempty"` + Type Type `json:"typenum"` + SubSections []SubSection `json:"subsections"` +} + +// Type (allowed types 0 -> 42) +type Type int + +// SubSection represents part of a section, identified by a title +type SubSection struct { + Title string `json:"title"` + Properties map[string]PropertyData `json:"properties,omitempty"` +} + +// PropertyData represents a key value pair with optional list of items +type PropertyData struct { + Val string `json:"value"` + Items []string `json:"items,omitempty"` +} diff --git a/grid-proxy/internal/indexer/utils.go b/grid-proxy/internal/indexer/utils.go new file mode 100644 index 000000000..fe0409810 --- /dev/null +++ b/grid-proxy/internal/indexer/utils.go @@ -0,0 +1,32 @@ +package indexer + +import ( + "context" + + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" +) + +func queryUpNodes(ctx context.Context, database db.Database, nodeTwinIdChan chan uint32) { + // status := "up" + filter := types.NodeFilter{} + limit := types.Limit{Size: 100, Page: 1} + hasNext := true + for hasNext { + nodes, _, err := database.GetNodes(ctx, filter, limit) + if err != nil { + log.Error().Err(err).Msg("failed to query grid nodes") + } + + if len(nodes) < int(limit.Size) { + hasNext = false + } + + for _, node := range nodes { + nodeTwinIdChan <- uint32(node.TwinID) + } + + limit.Page++ + } +} From fb323105a2a216d801e4633cd5ecf29278740054 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Sun, 4 Feb 2024 16:34:20 +0200 Subject: [PATCH 05/19] upsert dmi on database --- grid-proxy/internal/explorer/db/postgres.go | 28 +++++-- grid-proxy/internal/explorer/db/types.go | 3 + grid-proxy/internal/indexer/dmi.go | 93 +++++++++++++++++++-- grid-proxy/pkg/types/indexer.go | 73 ++++++++++++++++ 4 files changed, 179 insertions(+), 18 deletions(-) create mode 100644 grid-proxy/pkg/types/indexer.go diff --git a/grid-proxy/internal/explorer/db/postgres.go b/grid-proxy/internal/explorer/db/postgres.go index 07d21a769..999f658c6 100644 --- a/grid-proxy/internal/explorer/db/postgres.go +++ b/grid-proxy/internal/explorer/db/postgres.go @@ -62,14 +62,6 @@ func NewPostgresDatabase(host string, port int, user, password, dbname string, m return PostgresDatabase{}, errors.Wrap(err, "failed to configure DB connection") } - err = gormDB.AutoMigrate(&NodeGPU{}) - if err != nil { - return PostgresDatabase{}, errors.Wrap(err, "failed to migrate node_gpu table") - } - err = gormDB.AutoMigrate(&HealthReport{}) - if err != nil { - return PostgresDatabase{}, errors.Wrap(err, "failed to migrate health_report table") - } sql.SetMaxIdleConns(3) sql.SetMaxOpenConns(maxConns) @@ -87,7 +79,17 @@ func (d *PostgresDatabase) Close() error { } func (d *PostgresDatabase) Initialize() error { - return d.gormDB.Exec(setupFile).Error + err := d.gormDB.AutoMigrate(&NodeGPU{}, &HealthReport{}, &types.DmiInfo{}) + if err != nil { + return errors.Wrap(err, "failed to migrate indexer tables") + } + + err = d.gormDB.Exec(setupFile).Error + if err != nil { + return errors.Wrap(err, "failed to setup cache tables") + } + + return nil } // GetStats returns aggregate info about the grid @@ -905,3 +907,11 @@ func (p *PostgresDatabase) GetHealthyNodeTwinIds(ctx context.Context) ([]int64, err := p.gormDB.Table("health_report").Select("node_twin_id").Where("healthy = true").Scan(&nodeTwinIDs).Error return nodeTwinIDs, err } + +func (p *PostgresDatabase) UpsertNodeDmi(ctx context.Context, dmi []types.DmiInfo) error { + conflictClause := clause.OnConflict{ + Columns: []clause.Column{{Name: "node_twin_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"bios", "baseboard", "processor", "memory"}), + } + return p.gormDB.WithContext(ctx).Table("dmi_infos").Clauses(conflictClause).Create(&dmi).Error +} diff --git a/grid-proxy/internal/explorer/db/types.go b/grid-proxy/internal/explorer/db/types.go index 6c4c7401e..a832e448e 100644 --- a/grid-proxy/internal/explorer/db/types.go +++ b/grid-proxy/internal/explorer/db/types.go @@ -8,6 +8,7 @@ import ( // Database interface for storing and fetching grid info type Database interface { + // TODO: separate the setter/getter GetStats(ctx context.Context, filter types.StatsFilter) (types.Stats, error) GetNode(ctx context.Context, nodeID uint32) (Node, error) GetFarm(ctx context.Context, farmID uint32) (Farm, error) @@ -23,6 +24,8 @@ type Database interface { UpsertNodeHealth(ctx context.Context, healthReport types.HealthReport) error GetHealthyNodeTwinIds(ctx context.Context) ([]int64, error) GetConnectionString() string + + UpsertNodeDmi(ctx context.Context, dmi []types.DmiInfo) error } type ContractBilling types.ContractBilling diff --git a/grid-proxy/internal/indexer/dmi.go b/grid-proxy/internal/indexer/dmi.go index c76617bf4..5183ba773 100644 --- a/grid-proxy/internal/indexer/dmi.go +++ b/grid-proxy/internal/indexer/dmi.go @@ -6,20 +6,23 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" ) const ( - DmiCallCmd = "zos.system.dmi" + DmiCallCmd = "zos.system.dmi" + flushingInterval = 60 * time.Second ) type DmiWatcher struct { database db.Database rmbClient *peer.RpcClient nodeTwinIdsChan chan uint32 - resultChan chan DMI + resultChan chan types.DmiInfo interval time.Duration workers uint + batchSize uint } func NewDmiWatcher( @@ -28,14 +31,16 @@ func NewDmiWatcher( rmbClient *peer.RpcClient, interval uint, workers uint, + batchSize uint, ) *DmiWatcher { return &DmiWatcher{ database: database, rmbClient: rmbClient, nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan DMI), + resultChan: make(chan types.DmiInfo), interval: time.Duration(interval) * time.Minute, workers: workers, + batchSize: batchSize, } } @@ -49,6 +54,7 @@ func (w *DmiWatcher) Start(ctx context.Context) { go w.startUpserter(ctx, w.database) } +// TODO: not only on interval but also on any node goes from down>up or newly added nodes func (w *DmiWatcher) startNodeQuerier(ctx context.Context) { ticker := time.NewTicker(w.interval) queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) @@ -66,7 +72,13 @@ func (w *DmiWatcher) startNodeCaller(ctx context.Context) { for { select { case twinId := <-w.nodeTwinIdsChan: - w.resultChan <- w.callNode(ctx, twinId) + response, err := w.callNode(ctx, twinId) + if err != nil { + continue + } + parsedDmi := parseDmiResponse(response) + parsedDmi.NodeTwinId = twinId + w.resultChan <- parsedDmi case <-ctx.Done(): return } @@ -74,7 +86,7 @@ func (w *DmiWatcher) startNodeCaller(ctx context.Context) { } // TODO: make it generic and then assert the result in each watcher -func (w *DmiWatcher) callNode(ctx context.Context, twinId uint32) DMI { +func (w *DmiWatcher) callNode(ctx context.Context, twinId uint32) (DMI, error) { var result DMI subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) defer cancel() @@ -84,18 +96,81 @@ func (w *DmiWatcher) callNode(ctx context.Context, twinId uint32) DMI { log.Error().Err(err).Uint32("twinId", twinId).Msg("failed to call node") } - return result + return result, err } func (w *DmiWatcher) startUpserter(ctx context.Context, database db.Database) { + buffer := make([]types.DmiInfo, w.batchSize) + + ticker := time.NewTicker(flushingInterval) for { select { case dmiData := <-w.resultChan: - log.Debug().Msgf("received: %+v", dmiData) - // collect in batch - // upsert in db + buffer = append(buffer, dmiData) + if len(buffer) >= int(w.batchSize) { + err := w.database.UpsertNodeDmi(ctx, buffer) + if err != nil { + log.Error().Err(err).Msgf("failed") + } + buffer = nil + } + case <-ticker.C: + if len(buffer) != 0 { + err := w.database.UpsertNodeDmi(ctx, buffer) + if err != nil { + log.Error().Err(err).Msgf("failed") + } + buffer = nil + } case <-ctx.Done(): return } } } + +func parseDmiResponse(dmiResponse DMI) types.DmiInfo { + var info types.DmiInfo + for _, sec := range dmiResponse.Sections { + if sec.TypeStr == "Processor" { + for _, subSec := range sec.SubSections { + if subSec.Title == "Processor Information" { + info.Processor = append(info.Processor, types.Processor{ + Version: subSec.Properties["Version"].Val, + ThreadCount: subSec.Properties["Thread Count"].Val, + }) + } + } + } + if sec.TypeStr == "MemoryDevice" { + for _, subSec := range sec.SubSections { + if subSec.Title == "Memory Device" { + if subSec.Properties["Type"].Val == "Unknown" { + continue + } + info.Memory = append(info.Memory, types.Memory{ + Type: subSec.Properties["Type"].Val, + Manufacturer: subSec.Properties["Manufacturer"].Val, + }) + } + } + } + if sec.TypeStr == "Baseboard" { + for _, subSec := range sec.SubSections { + if subSec.Title == "Base Board Information" { + info.Baseboard.Manufacturer = subSec.Properties["Manufacturer"].Val + info.Baseboard.ProductName = subSec.Properties["Product Name"].Val + } + } + } + if sec.TypeStr == "BIOS" { + for _, subSec := range sec.SubSections { + if subSec.Title == "BIOS Information" { + info.BIOS.Vendor = subSec.Properties["Vendor"].Val + info.BIOS.Version = subSec.Properties["Version"].Val + } + } + } + } + + return info +} diff --git a/grid-proxy/pkg/types/indexer.go b/grid-proxy/pkg/types/indexer.go new file mode 100644 index 000000000..ef6140b47 --- /dev/null +++ b/grid-proxy/pkg/types/indexer.go @@ -0,0 +1,73 @@ +package types + +import ( + "database/sql/driver" + "encoding/json" + "errors" +) + +type DmiInfo struct { + NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` + BIOS BIOS `json:"bios" gorm:"type:jsonb"` + Baseboard BaseBoard `json:"baseboard" gorm:"type:jsonb"` + Processor ProcessorArray `json:"processor" gorm:"type:jsonb"` + Memory MemoryArray `json:"memory" gorm:"type:jsonb"` +} + +func (HealthReport) TableName() string { + return "dmi_info" +} + +type BIOS struct { + Vendor string `json:"vendor"` + Version string `json:"version"` +} + +type BaseBoard struct { + Manufacturer string `json:"manufacturer"` + ProductName string `json:"product_name"` +} + +type Processor struct { + Version string `json:"version"` + ThreadCount string `json:"thread_count"` +} + +type Memory struct { + Manufacturer string `json:"manufacturer"` + Type string `json:"type"` +} + +type ProcessorArray []Processor + +type MemoryArray []Memory + +func (p ProcessorArray) Value() (driver.Value, error) { + return json.Marshal(p) +} + +func (p *ProcessorArray) Scan(value interface{}) error { + if value == nil { + return nil + } + bytes, ok := value.([]byte) + if !ok { + return errors.New("Invalid data type for Processor") + } + return json.Unmarshal(bytes, &p) +} + +func (p MemoryArray) Value() (driver.Value, error) { + return json.Marshal(p) +} + +func (p *MemoryArray) Scan(value interface{}) error { + if value == nil { + return nil + } + bytes, ok := value.([]byte) + if !ok { + return errors.New("Invalid data type for Processor") + } + return json.Unmarshal(bytes, &p) +} From 606f301b23773ba697d2301d59f4b8df904861ed Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Sun, 4 Feb 2024 22:10:14 +0200 Subject: [PATCH 06/19] add network speed watcher --- grid-proxy/internal/explorer/db/postgres.go | 10 +- grid-proxy/internal/explorer/db/types.go | 1 + grid-proxy/internal/indexer/network_speed.go | 144 +++++++++++++++++++ grid-proxy/pkg/types/indexer.go | 11 ++ 4 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 grid-proxy/internal/indexer/network_speed.go diff --git a/grid-proxy/internal/explorer/db/postgres.go b/grid-proxy/internal/explorer/db/postgres.go index 999f658c6..687c94edc 100644 --- a/grid-proxy/internal/explorer/db/postgres.go +++ b/grid-proxy/internal/explorer/db/postgres.go @@ -79,7 +79,7 @@ func (d *PostgresDatabase) Close() error { } func (d *PostgresDatabase) Initialize() error { - err := d.gormDB.AutoMigrate(&NodeGPU{}, &HealthReport{}, &types.DmiInfo{}) + err := d.gormDB.AutoMigrate(&NodeGPU{}, &HealthReport{}, &types.DmiInfo{}, &types.NetworkTestResult{}) if err != nil { return errors.Wrap(err, "failed to migrate indexer tables") } @@ -915,3 +915,11 @@ func (p *PostgresDatabase) UpsertNodeDmi(ctx context.Context, dmi []types.DmiInf } return p.gormDB.WithContext(ctx).Table("dmi_infos").Clauses(conflictClause).Create(&dmi).Error } + +func (p *PostgresDatabase) UpsertNetworkSpeed(ctx context.Context, report []types.NetworkTestResult) error { + conflictClause := clause.OnConflict{ + Columns: []clause.Column{{Name: "node_twin_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"download_speed", "upload_speed"}), + } + return p.gormDB.WithContext(ctx).Table("network_test_results").Clauses(conflictClause).Create(&report).Error +} diff --git a/grid-proxy/internal/explorer/db/types.go b/grid-proxy/internal/explorer/db/types.go index a832e448e..2815bee6f 100644 --- a/grid-proxy/internal/explorer/db/types.go +++ b/grid-proxy/internal/explorer/db/types.go @@ -26,6 +26,7 @@ type Database interface { GetConnectionString() string UpsertNodeDmi(ctx context.Context, dmi []types.DmiInfo) error + UpsertNetworkSpeed(ctx context.Context, speed []types.NetworkTestResult) error } type ContractBilling types.ContractBilling diff --git a/grid-proxy/internal/indexer/network_speed.go b/grid-proxy/internal/indexer/network_speed.go new file mode 100644 index 000000000..fb6fa9b6d --- /dev/null +++ b/grid-proxy/internal/indexer/network_speed.go @@ -0,0 +1,144 @@ +package indexer + +import ( + "context" + "time" + + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" + "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" +) + +const ( + perfTestCallCmd = "zos.perf.get" + testName = "iperf" +) + +type SpeedWatcher struct { + database db.Database + rmbClient *peer.RpcClient + nodeTwinIdsChan chan uint32 + resultChan chan types.NetworkTestResult + interval time.Duration + workers uint + batchSize uint +} + +func NewSpeedWatcher( + ctx context.Context, + database db.Database, + rmbClient *peer.RpcClient, + interval uint, + workers uint, + batchSize uint, +) *SpeedWatcher { + return &SpeedWatcher{ + database: database, + rmbClient: rmbClient, + nodeTwinIdsChan: make(chan uint32), + resultChan: make(chan types.NetworkTestResult), + interval: time.Duration(interval) * time.Minute, + workers: workers, + batchSize: batchSize, + } +} + +func (w *SpeedWatcher) Start(ctx context.Context) { + go w.startNodeQuerier(ctx) + + for i := uint(0); i < w.workers; i++ { + go w.startNodeCaller(ctx) + } + + go w.startUpserter(ctx, w.database) +} + +// TODO: not only on interval but also on any node goes from down>up or newly added nodes +func (w *SpeedWatcher) startNodeQuerier(ctx context.Context) { + ticker := time.NewTicker(w.interval) + queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) + for { + select { + case <-ticker.C: + queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) + case <-ctx.Done(): + return + } + } +} + +func (w *SpeedWatcher) startNodeCaller(ctx context.Context) { + for { + select { + case twinId := <-w.nodeTwinIdsChan: + response, err := w.callNode(ctx, twinId) + if err != nil { + continue + } + parsed := parse(response, twinId) + log.Info().Msgf("got: %+v", parsed) + w.resultChan <- parsed + case <-ctx.Done(): + return + } + } +} + +// TODO: make it generic and then assert the result in each watcher +func (w *SpeedWatcher) callNode(ctx context.Context, twinId uint32) (types.PerfResult, error) { + var result types.PerfResult + subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) + defer cancel() + + payload := struct { + Name string + }{ + Name: testName, + } + err := w.rmbClient.Call(subCtx, twinId, perfTestCallCmd, payload, &result) + if err != nil { + log.Error().Err(err).Uint32("twinId", twinId).Msg("failed to call node") + } + + return result, err +} + +func (w *SpeedWatcher) startUpserter(ctx context.Context, database db.Database) { + buffer := make([]types.NetworkTestResult, w.batchSize) + + ticker := time.NewTicker(flushingInterval) + for { + select { + case report := <-w.resultChan: + buffer = append(buffer, report) + if len(buffer) >= int(w.batchSize) { + err := w.database.UpsertNetworkSpeed(ctx, buffer) + if err != nil { + log.Error().Err(err) + } + buffer = nil + } + case <-ticker.C: + if len(buffer) != 0 { + err := w.database.UpsertNetworkSpeed(ctx, buffer) + if err != nil { + log.Error().Err(err) + } + buffer = nil + } + case <-ctx.Done(): + return + } + } +} + +func parse(res types.PerfResult, twinId uint32) types.NetworkTestResult { + for _, report := range res.Result { + if report.DownloadSpeed != 0 { + report.NodeTwinId = twinId + return report + } + } + return types.NetworkTestResult{} +} diff --git a/grid-proxy/pkg/types/indexer.go b/grid-proxy/pkg/types/indexer.go index ef6140b47..58cd1b6be 100644 --- a/grid-proxy/pkg/types/indexer.go +++ b/grid-proxy/pkg/types/indexer.go @@ -71,3 +71,14 @@ func (p *MemoryArray) Scan(value interface{}) error { } return json.Unmarshal(bytes, &p) } + +type NetworkTestResult struct { + NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` + UploadSpeed float64 `json:"upload_speed"` // in bit/sec + DownloadSpeed float64 `json:"download_speed"` // in bit/sec +} + +type PerfResult struct { + NodeTwinId uint32 `json:"node_twin_id"` + Result []NetworkTestResult `json:"result"` +} From 8ae0a14001702d45086c23c5edcb7a9c3b200b9c Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Wed, 7 Feb 2024 13:35:07 +0200 Subject: [PATCH 07/19] implement an Scanner/Valuer interfaces for custom gorm jsonb types in dmi model --- grid-proxy/internal/explorer/converters.go | 20 +++++ grid-proxy/internal/explorer/db/postgres.go | 6 ++ grid-proxy/internal/explorer/db/setup.sql | 26 +++++- grid-proxy/internal/explorer/db/types.go | 6 ++ grid-proxy/pkg/types/indexer.go | 87 +++++++++++++++------ grid-proxy/pkg/types/nodes.go | 9 +++ 6 files changed, 125 insertions(+), 29 deletions(-) diff --git a/grid-proxy/internal/explorer/converters.go b/grid-proxy/internal/explorer/converters.go index 86a3bb952..cd8ec7019 100644 --- a/grid-proxy/internal/explorer/converters.go +++ b/grid-proxy/internal/explorer/converters.go @@ -59,6 +59,16 @@ func nodeFromDBNode(info db.Node) types.Node { NumGPU: info.NumGPU, ExtraFee: info.ExtraFee, Healthy: info.Healthy, + Dmi: types.DmiInfo{ + Processor: info.Processor, + Memory: info.Memory, + BIOS: types.BIOS(info.Bios), + Baseboard: info.Baseboard, + }, + Speed: types.Speed{ + Upload: info.UploadSpeed, + Download: info.DownloadSpeed, + }, } node.Status = nodestatus.DecideNodeStatus(node.Power, node.UpdatedAt) node.Dedicated = info.FarmDedicated || info.NodeContractsCount == 0 || info.Renter != 0 @@ -133,6 +143,16 @@ func nodeWithNestedCapacityFromDBNode(info db.Node) types.NodeWithNestedCapacity NumGPU: info.NumGPU, ExtraFee: info.ExtraFee, Healthy: info.Healthy, + Dmi: types.DmiInfo{ + Processor: info.Processor, + Memory: info.Memory, + BIOS: types.BIOS(info.Bios), + Baseboard: info.Baseboard, + }, + Speed: types.Speed{ + Upload: info.UploadSpeed, + Download: info.DownloadSpeed, + }, } node.Status = nodestatus.DecideNodeStatus(node.Power, node.UpdatedAt) node.Dedicated = info.FarmDedicated || info.NodeContractsCount == 0 || info.Renter != 0 diff --git a/grid-proxy/internal/explorer/db/postgres.go b/grid-proxy/internal/explorer/db/postgres.go index 687c94edc..67ee4472f 100644 --- a/grid-proxy/internal/explorer/db/postgres.go +++ b/grid-proxy/internal/explorer/db/postgres.go @@ -303,6 +303,12 @@ func (d *PostgresDatabase) nodeTableQuery(ctx context.Context, filter types.Node "resources_cache.node_contracts_count", "resources_cache.node_gpu_count AS num_gpu", "health_report.healthy", + "resources_cache.bios", + "resources_cache.baseboard", + "resources_cache.memory", + "resources_cache.processor", + "resources_cache.upload_speed", + "resources_cache.download_speed", ). Joins(` LEFT JOIN resources_cache ON node.node_id = resources_cache.node_id diff --git a/grid-proxy/internal/explorer/db/setup.sql b/grid-proxy/internal/explorer/db/setup.sql index 9c0a84432..cee8d5634 100644 --- a/grid-proxy/internal/explorer/db/setup.sql +++ b/grid-proxy/internal/explorer/db/setup.sql @@ -51,7 +51,13 @@ SELECT count(node_contract.contract_id) as node_contracts_count, COALESCE(node_gpu.node_gpu_count, 0) as node_gpu_count, node.country as country, - country.region as region + country.region as region, + COALESCE(dmi_infos.bios, '{}') as bios, + COALESCE(dmi_infos.baseboard, '{}') as baseboard, + COALESCE(dmi_infos.processor, '[]') as processor, + COALESCE(dmi_infos.memory, '[]') as memory, + COALESCE(network_test_results.upload_speed, 0) as upload_speed, + COALESCE(network_test_results.download_speed, 0) as download_speed FROM node LEFT JOIN node_contract ON node.node_id = node_contract.node_id AND node_contract.state IN ('Created', 'GracePeriod') LEFT JOIN contract_resources ON node_contract.resources_used_id = contract_resources.id @@ -66,6 +72,8 @@ FROM node node_twin_id ) AS node_gpu ON node.twin_id = node_gpu.node_twin_id LEFT JOIN country ON LOWER(node.country) = LOWER(country.name) + LEFT JOIN network_test_results ON node.twin_id = network_test_results.node_twin_id + LEFT JOIN dmi_infos ON node.twin_id = dmi_infos.node_twin_id GROUP BY node.node_id, node_resources_total.mru, @@ -77,7 +85,13 @@ GROUP BY rent_contract.twin_id, COALESCE(node_gpu.node_gpu_count, 0), node.country, - country.region; + country.region, + COALESCE(dmi_infos.bios, '{}'), + COALESCE(dmi_infos.baseboard, '{}'), + COALESCE(dmi_infos.processor, '[]'), + COALESCE(dmi_infos.memory, '[]'), + COALESCE(network_test_results.upload_speed, 0), + COALESCE(network_test_results.download_speed, 0); DROP TABLE IF EXISTS resources_cache; CREATE TABLE IF NOT EXISTS resources_cache( @@ -99,7 +113,13 @@ CREATE TABLE IF NOT EXISTS resources_cache( node_contracts_count INTEGER NOT NULL, node_gpu_count INTEGER NOT NULL, country TEXT, - region TEXT + region TEXT, + bios jsonb, + baseboard jsonb, + processor jsonb, + memory jsonb, + upload_speed numeric, + download_speed numeric ); INSERT INTO resources_cache diff --git a/grid-proxy/internal/explorer/db/types.go b/grid-proxy/internal/explorer/db/types.go index 2815bee6f..6ec2ba869 100644 --- a/grid-proxy/internal/explorer/db/types.go +++ b/grid-proxy/internal/explorer/db/types.go @@ -84,6 +84,12 @@ type Node struct { ExtraFee uint64 NodeContractsCount uint64 `gorm:"node_contracts_count"` Healthy bool + Bios types.BIOS `gorm:"type:jsonb"` + Baseboard types.Baseboard `gorm:"type:jsonb"` + Memory types.Memories `gorm:"type:jsonb"` + Processor types.Processors `gorm:"type:jsonb"` + UploadSpeed uint64 + DownloadSpeed uint64 } // NodePower struct is the farmerbot report for node status diff --git a/grid-proxy/pkg/types/indexer.go b/grid-proxy/pkg/types/indexer.go index 58cd1b6be..a9ebaaebb 100644 --- a/grid-proxy/pkg/types/indexer.go +++ b/grid-proxy/pkg/types/indexer.go @@ -6,16 +6,23 @@ import ( "errors" ) -type DmiInfo struct { - NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` - BIOS BIOS `json:"bios" gorm:"type:jsonb"` - Baseboard BaseBoard `json:"baseboard" gorm:"type:jsonb"` - Processor ProcessorArray `json:"processor" gorm:"type:jsonb"` - Memory MemoryArray `json:"memory" gorm:"type:jsonb"` +type NetworkTestResult struct { + NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` + UploadSpeed float64 `json:"upload_speed"` // in bit/sec + DownloadSpeed float64 `json:"download_speed"` // in bit/sec +} + +type PerfResult struct { + NodeTwinId uint32 `json:"node_twin_id"` + Result []NetworkTestResult `json:"result"` } -func (HealthReport) TableName() string { - return "dmi_info" +type DmiInfo struct { + NodeTwinId uint32 `json:"node_twin_id,omitempty" gorm:"unique;not null"` + BIOS BIOS `json:"bios" gorm:"type:jsonb"` + Baseboard Baseboard `json:"baseboard" gorm:"type:jsonb"` + Processor Processors `json:"processor" gorm:"type:jsonb"` + Memory Memories `json:"memory" gorm:"type:jsonb"` } type BIOS struct { @@ -23,7 +30,7 @@ type BIOS struct { Version string `json:"version"` } -type BaseBoard struct { +type Baseboard struct { Manufacturer string `json:"manufacturer"` ProductName string `json:"product_name"` } @@ -32,21 +39,30 @@ type Processor struct { Version string `json:"version"` ThreadCount string `json:"thread_count"` } +type Processors []Processor type Memory struct { Manufacturer string `json:"manufacturer"` Type string `json:"type"` } +type Memories []Memory -type ProcessorArray []Processor +/* +GORM directly maps the structs to tables. These structs can contain fields with basic Go types, +pointers or aliases of these types, or even custom types, as long as they implement the Scanner +and Valuer interfaces from the database/sql package. -type MemoryArray []Memory +Notes: + - For simple types like the BIOS struct, we can directly implement the Scan/Value methods. + However, for types like []Processor, we need to create an alias, Processors, + so we have method receivers. +*/ -func (p ProcessorArray) Value() (driver.Value, error) { - return json.Marshal(p) +func (c Processors) Value() (driver.Value, error) { + return json.Marshal(c) } -func (p *ProcessorArray) Scan(value interface{}) error { +func (c *Processors) Scan(value interface{}) error { if value == nil { return nil } @@ -54,14 +70,14 @@ func (p *ProcessorArray) Scan(value interface{}) error { if !ok { return errors.New("Invalid data type for Processor") } - return json.Unmarshal(bytes, &p) + return json.Unmarshal(bytes, &c) } -func (p MemoryArray) Value() (driver.Value, error) { - return json.Marshal(p) +func (c Memories) Value() (driver.Value, error) { + return json.Marshal(c) } -func (p *MemoryArray) Scan(value interface{}) error { +func (c *Memories) Scan(value interface{}) error { if value == nil { return nil } @@ -69,16 +85,35 @@ func (p *MemoryArray) Scan(value interface{}) error { if !ok { return errors.New("Invalid data type for Processor") } - return json.Unmarshal(bytes, &p) + return json.Unmarshal(bytes, &c) } -type NetworkTestResult struct { - NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` - UploadSpeed float64 `json:"upload_speed"` // in bit/sec - DownloadSpeed float64 `json:"download_speed"` // in bit/sec +func (c *BIOS) Value() (driver.Value, error) { + return json.Marshal(c) } -type PerfResult struct { - NodeTwinId uint32 `json:"node_twin_id"` - Result []NetworkTestResult `json:"result"` +func (c *BIOS) Scan(value interface{}) error { + if value == nil { + return nil + } + bytes, ok := value.([]byte) + if !ok { + return errors.New("Invalid data type for Processor") + } + return json.Unmarshal(bytes, &c) +} + +func (c *Baseboard) Value() (driver.Value, error) { + return json.Marshal(c) +} + +func (c *Baseboard) Scan(value interface{}) error { + if value == nil { + return nil + } + bytes, ok := value.([]byte) + if !ok { + return errors.New("Invalid data type for Processor") + } + return json.Unmarshal(bytes, &c) } diff --git a/grid-proxy/pkg/types/nodes.go b/grid-proxy/pkg/types/nodes.go index f49a42f74..73511c8b8 100644 --- a/grid-proxy/pkg/types/nodes.go +++ b/grid-proxy/pkg/types/nodes.go @@ -45,6 +45,8 @@ type Node struct { NumGPU int `json:"num_gpu" sort:"num_gpu"` ExtraFee uint64 `json:"extraFee" sort:"extra_fee"` Healthy bool `json:"healthy"` + Dmi DmiInfo `json:"dmi"` + Speed Speed `json:"speed"` } // CapacityResult is the NodeData capacity results to unmarshal json in it @@ -53,6 +55,11 @@ type CapacityResult struct { Used Capacity `json:"used_resources"` } +type Speed struct { + Upload uint64 `json:"upload"` + Download uint64 `json:"download"` +} + // Node to be compatible with old view type NodeWithNestedCapacity struct { ID string `json:"id"` @@ -81,6 +88,8 @@ type NodeWithNestedCapacity struct { NumGPU int `json:"num_gpu"` ExtraFee uint64 `json:"extraFee"` Healthy bool `json:"healthy"` + Dmi DmiInfo `json:"dmi"` + Speed Speed `json:"speed"` } // PublicConfig node public config From 6f01097942ae5bc1fa4ae030133f17df72939a46 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Wed, 7 Feb 2024 13:55:17 +0200 Subject: [PATCH 08/19] fix network speed types --- grid-proxy/internal/explorer/db/types.go | 4 ++-- grid-proxy/internal/indexer/network_speed.go | 3 +++ grid-proxy/pkg/types/nodes.go | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/grid-proxy/internal/explorer/db/types.go b/grid-proxy/internal/explorer/db/types.go index 6ec2ba869..9b88eb32b 100644 --- a/grid-proxy/internal/explorer/db/types.go +++ b/grid-proxy/internal/explorer/db/types.go @@ -88,8 +88,8 @@ type Node struct { Baseboard types.Baseboard `gorm:"type:jsonb"` Memory types.Memories `gorm:"type:jsonb"` Processor types.Processors `gorm:"type:jsonb"` - UploadSpeed uint64 - DownloadSpeed uint64 + UploadSpeed float64 + DownloadSpeed float64 } // NodePower struct is the farmerbot report for node status diff --git a/grid-proxy/internal/indexer/network_speed.go b/grid-proxy/internal/indexer/network_speed.go index fb6fa9b6d..bf51402aa 100644 --- a/grid-proxy/internal/indexer/network_speed.go +++ b/grid-proxy/internal/indexer/network_speed.go @@ -134,6 +134,9 @@ func (w *SpeedWatcher) startUpserter(ctx context.Context, database db.Database) } func parse(res types.PerfResult, twinId uint32) types.NetworkTestResult { + // TODO: better parsing + // we have four speeds tcp/udp for ipv4/ipv6. + // now, we just pick the first non-zero for _, report := range res.Result { if report.DownloadSpeed != 0 { report.NodeTwinId = twinId diff --git a/grid-proxy/pkg/types/nodes.go b/grid-proxy/pkg/types/nodes.go index 73511c8b8..b71b4e016 100644 --- a/grid-proxy/pkg/types/nodes.go +++ b/grid-proxy/pkg/types/nodes.go @@ -56,8 +56,8 @@ type CapacityResult struct { } type Speed struct { - Upload uint64 `json:"upload"` - Download uint64 `json:"download"` + Upload float64 `json:"upload"` + Download float64 `json:"download"` } // Node to be compatible with old view From a2ec7282ea76d0a49df15a6697b6432b414cf3f6 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Thu, 8 Feb 2024 12:41:11 +0200 Subject: [PATCH 09/19] add triggers for dmi/speed tables --- grid-proxy/internal/explorer/db/setup.sql | 57 ++++++++++++++++++++ grid-proxy/internal/indexer/dmi.go | 2 +- grid-proxy/internal/indexer/network_speed.go | 3 +- grid-proxy/internal/indexer/utils.go | 6 ++- 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/grid-proxy/internal/explorer/db/setup.sql b/grid-proxy/internal/explorer/db/setup.sql index cee8d5634..31fb0e96e 100644 --- a/grid-proxy/internal/explorer/db/setup.sql +++ b/grid-proxy/internal/explorer/db/setup.sql @@ -396,6 +396,63 @@ CREATE OR REPLACE TRIGGER tg_rent_contract AFTER INSERT OR UPDATE OF state ON rent_contract FOR EACH ROW EXECUTE PROCEDURE reflect_rent_contract_changes(); +/* + Dmi trigger + - Insert new record/Update > update resources_cache +*/ +CREATE OR REPLACE FUNCTION reflect_dmi_changes() RETURNS TRIGGER AS +$$ +BEGIN + BEGIN + UPDATE resources_cache + SET bios = NEW.bios, + baseboard = NEW.baseboard, + processor = NEW.processor, + memory = NEW.memory + WHERE resources_cache.node_id = ( + SELECT node_id from node where node.twin_id = NEW.node_twin_id + ); + EXCEPTION + WHEN OTHERS THEN + RAISE NOTICE 'Error updating resources_cache dmi fields %', SQLERRM; + END; +RETURN NULL; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE TRIGGER tg_dmi + AFTER INSERT OR UPDATE ON dmi_infos FOR EACH ROW + EXECUTE PROCEDURE reflect_dmi_changes(); + + + +/* + speed trigger + - Insert new record/Update > update resources_cache +*/ +CREATE OR REPLACE FUNCTION reflect_speed_changes() RETURNS TRIGGER AS +$$ +BEGIN + BEGIN + UPDATE resources_cache + SET upload_speed = NEW.upload_speed, + download_speed = NEW.download_speed + WHERE resources_cache.node_id = ( + SELECT node_id from node where node.twin_id = NEW.node_twin_id + ); + EXCEPTION + WHEN OTHERS THEN + RAISE NOTICE 'Error updating resources_cache speed fields %', SQLERRM; + END; +RETURN NULL; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE TRIGGER tg_speed + AFTER INSERT OR UPDATE ON network_test_results FOR EACH ROW + EXECUTE PROCEDURE reflect_speed_changes(); + + /* Public ips trigger - Insert new ip > increment free/total ips + re-aggregate ips object diff --git a/grid-proxy/internal/indexer/dmi.go b/grid-proxy/internal/indexer/dmi.go index 5183ba773..e48ef2faa 100644 --- a/grid-proxy/internal/indexer/dmi.go +++ b/grid-proxy/internal/indexer/dmi.go @@ -100,7 +100,7 @@ func (w *DmiWatcher) callNode(ctx context.Context, twinId uint32) (DMI, error) { } func (w *DmiWatcher) startUpserter(ctx context.Context, database db.Database) { - buffer := make([]types.DmiInfo, w.batchSize) + buffer := make([]types.DmiInfo, 0, w.batchSize) ticker := time.NewTicker(flushingInterval) for { diff --git a/grid-proxy/internal/indexer/network_speed.go b/grid-proxy/internal/indexer/network_speed.go index bf51402aa..dca50dc8d 100644 --- a/grid-proxy/internal/indexer/network_speed.go +++ b/grid-proxy/internal/indexer/network_speed.go @@ -77,7 +77,6 @@ func (w *SpeedWatcher) startNodeCaller(ctx context.Context) { continue } parsed := parse(response, twinId) - log.Info().Msgf("got: %+v", parsed) w.resultChan <- parsed case <-ctx.Done(): return @@ -105,7 +104,7 @@ func (w *SpeedWatcher) callNode(ctx context.Context, twinId uint32) (types.PerfR } func (w *SpeedWatcher) startUpserter(ctx context.Context, database db.Database) { - buffer := make([]types.NetworkTestResult, w.batchSize) + buffer := make([]types.NetworkTestResult, 0, w.batchSize) ticker := time.NewTicker(flushingInterval) for { diff --git a/grid-proxy/internal/indexer/utils.go b/grid-proxy/internal/indexer/utils.go index fe0409810..5071a1c61 100644 --- a/grid-proxy/internal/indexer/utils.go +++ b/grid-proxy/internal/indexer/utils.go @@ -9,8 +9,10 @@ import ( ) func queryUpNodes(ctx context.Context, database db.Database, nodeTwinIdChan chan uint32) { - // status := "up" - filter := types.NodeFilter{} + status := "up" + filter := types.NodeFilter{ + Status: &status, + } limit := types.Limit{Size: 100, Page: 1} hasNext := true for hasNext { From fae4ee791c7dd3a7e518d89f8f30bacc8bfba845 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Thu, 8 Feb 2024 16:13:00 +0200 Subject: [PATCH 10/19] add generators: - organize the schema file & add new indexer tables - add generator for the new indexer tables - use gorm instead of plain sql in the crafter --- grid-proxy/tests/queries/main_test.go | 23 ++++- grid-proxy/tools/db/crafter/generator.go | 50 ++++++++++ grid-proxy/tools/db/crafter/test_values.go | 51 +++++++++++ grid-proxy/tools/db/crafter/types.go | 30 ++---- grid-proxy/tools/db/db.go | 20 +++- grid-proxy/tools/db/generate.go | 14 ++- grid-proxy/tools/db/schema.sql | 101 ++++++++++++++------- 7 files changed, 224 insertions(+), 65 deletions(-) create mode 100644 grid-proxy/tools/db/crafter/test_values.go diff --git a/grid-proxy/tests/queries/main_test.go b/grid-proxy/tests/queries/main_test.go index bd30e0015..f3037b29a 100644 --- a/grid-proxy/tests/queries/main_test.go +++ b/grid-proxy/tests/queries/main_test.go @@ -17,6 +17,8 @@ import ( proxyclient "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/client" mock "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/tests/queries/mock_client" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/tools/db/crafter" + "gorm.io/driver/postgres" + "gorm.io/gorm" "gorm.io/gorm/logger" ) @@ -65,7 +67,20 @@ func TestMain(m *testing.M) { if err != nil { panic(errors.Wrap(err, "failed to open db")) } - defer db.Close() + gormDB, err := gorm.Open(postgres.Open(psqlInfo), &gorm.Config{ + Logger: logger.Default.LogMode(4), + }) + if err != nil { + panic(fmt.Errorf("failed to generate gorm db: %w", err)) + } + defer func() { + db.Close() + db_gorm, err := gormDB.DB() + if err != nil { + panic(fmt.Errorf("failed to get gorm db: %w", err)) + } + db_gorm.Close() + }() // proxy client gridProxyClient = proxyclient.NewClient(ENDPOINT) @@ -84,7 +99,7 @@ func TestMain(m *testing.M) { } if !NO_MODIFY { - err = modifyDataToFireTriggers(db, data) + err = modifyDataToFireTriggers(db, gormDB, data) if err != nil { panic(err) } @@ -99,7 +114,7 @@ func TestMain(m *testing.M) { os.Exit(exitCode) } -func modifyDataToFireTriggers(db *sql.DB, data mock.DBData) error { +func modifyDataToFireTriggers(db *sql.DB, gormDB *gorm.DB, data mock.DBData) error { twinStart := len(data.Twins) + 1 farmStart := len(data.Farms) + 1 nodeStart := len(data.Nodes) + 1 @@ -117,7 +132,7 @@ func modifyDataToFireTriggers(db *sql.DB, data mock.DBData) error { RentContractCount = 1 ) - generator := crafter.NewCrafter(db, + generator := crafter.NewCrafter(db, gormDB, SEED, NodeCount, FarmCount, diff --git a/grid-proxy/tools/db/crafter/generator.go b/grid-proxy/tools/db/crafter/generator.go index 3e01534a6..a4bd20594 100644 --- a/grid-proxy/tools/db/crafter/generator.go +++ b/grid-proxy/tools/db/crafter/generator.go @@ -2,10 +2,12 @@ package crafter import ( "fmt" + "math/rand" "strings" "time" "github.com/google/uuid" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/zos/pkg/gridtypes" ) @@ -859,3 +861,51 @@ func (c *Crafter) GenerateCountries() error { return nil } + +func (c *Crafter) GenerateSpeedReports() error { + start := c.NodeStart + end := c.NodeStart + c.NodeCount + nodeTwinsStart := c.TwinStart + (c.FarmStart + c.FarmCount) + + var speedReports []types.NetworkTestResult + for i := start; i < end; i += 2 { + speedReport := types.NetworkTestResult{ + NodeTwinId: uint32(nodeTwinsStart + i), + UploadSpeed: rand.Float64() * float64(rand.Intn(9999999)), + DownloadSpeed: rand.Float64() * float64(rand.Intn(9999999)), + } + speedReports = append(speedReports, speedReport) + } + + if err := c.gormDB.Create(speedReports).Error; err != nil { + return fmt.Errorf("failed to insert speed: %w", err) + } + fmt.Println("speed reports generated") + + return nil +} + +func (c *Crafter) GenerateDmi() error { + start := c.NodeStart + end := c.NodeStart + c.NodeCount + nodeTwinsStart := c.TwinStart + (c.FarmStart + c.FarmCount) + + var dmis []types.DmiInfo + for i := start; i < end; i++ { + dmi := types.DmiInfo{ + NodeTwinId: uint32(nodeTwinsStart + i), + BIOS: bios[rand.Intn(len(bios))], + Baseboard: baseboard[rand.Intn(len(baseboard))], + Processor: processor[:rand.Intn(len(processor))], + Memory: memory[:rand.Intn(len(memory))], + } + dmis = append(dmis, dmi) + } + + if err := c.gormDB.Create(dmis).Error; err != nil { + return fmt.Errorf("failed to insert dmi: %w", err) + } + fmt.Println("dmi reports generated") + + return nil +} diff --git a/grid-proxy/tools/db/crafter/test_values.go b/grid-proxy/tools/db/crafter/test_values.go new file mode 100644 index 000000000..767efd1c6 --- /dev/null +++ b/grid-proxy/tools/db/crafter/test_values.go @@ -0,0 +1,51 @@ +package crafter + +import "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" + +var ( + countries = []string{"Belgium", "United States", "Egypt", "United Kingdom"} + regions = map[string]string{ + "Belgium": "Europe", + "United States": "Americas", + "Egypt": "Africa", + "United Kingdom": "Europe", + } + countriesCodes = map[string]string{ + "Belgium": "BG", + "United States": "US", + "Egypt": "EG", + "United Kingdom": "UK", + } + cities = map[string][]string{ + "Belgium": {"Brussels", "Antwerp", "Ghent", "Charleroi"}, + "United States": {"New York", "Chicago", "Los Angeles", "San Francisco"}, + "Egypt": {"Cairo", "Giza", "October", "Nasr City"}, + "United Kingdom": {"London", "Liverpool", "Manchester", "Cambridge"}, + } + bios = []types.BIOS{ + {Vendor: "SeaBIOS", Version: "Arch Linux 1.16.3-1-1"}, + {Vendor: "American Megatrends Inc.", Version: "3.2"}, + {Vendor: "American Megatrends Inc.", Version: "F4"}, + {Vendor: "American Megatrends Inc.", Version: "P3.60"}, + } + + baseboard = []types.Baseboard{ + {Manufacturer: "Supermicro", ProductName: "X9DRi-LN4+/X9DR3-LN4+"}, + {Manufacturer: "GIGABYTE", ProductName: "MCMLUEB-00"}, + {Manufacturer: "INTEL Corporation", ProductName: "SKYBAY"}, + } + + processor = []types.Processor{ + {Version: "pc-i440fx-7.0", ThreadCount: "1"}, + {Version: "Intel(R) Core(TM) i5-10210U CPU @ 1.60GHz", ThreadCount: "8"}, + {Version: "AMD Ryzen 3 3200G with Radeon Vega Graphics", ThreadCount: "4"}, + {Version: "Intel(R) Xeon(R) CPU E5-2620 0 @ 2.00GHz", ThreadCount: "12"}, + } + + memory = []types.Memory{ + {Manufacturer: "Kingston", Type: "DDR4"}, + {Manufacturer: "SK Hynix", Type: "DDR3"}, + {Manufacturer: "Hynix/Hyundai", Type: "DDR3"}, + {Manufacturer: "Hynix Semiconductor", Type: "DDR3"}, + } +) diff --git a/grid-proxy/tools/db/crafter/types.go b/grid-proxy/tools/db/crafter/types.go index 23af838aa..006a72ab0 100644 --- a/grid-proxy/tools/db/crafter/types.go +++ b/grid-proxy/tools/db/crafter/types.go @@ -3,6 +3,8 @@ package crafter import ( "database/sql" "math/rand" + + "gorm.io/gorm" ) const ( @@ -21,30 +23,11 @@ const ( var ( r *rand.Rand - - countries = []string{"Belgium", "United States", "Egypt", "United Kingdom"} - regions = map[string]string{ - "Belgium": "Europe", - "United States": "Americas", - "Egypt": "Africa", - "United Kingdom": "Europe", - } - countriesCodes = map[string]string{ - "Belgium": "BG", - "United States": "US", - "Egypt": "EG", - "United Kingdom": "UK", - } - cities = map[string][]string{ - "Belgium": {"Brussels", "Antwerp", "Ghent", "Charleroi"}, - "United States": {"New York", "Chicago", "Los Angeles", "San Francisco"}, - "Egypt": {"Cairo", "Giza", "October", "Nasr City"}, - "United Kingdom": {"London", "Liverpool", "Manchester", "Cambridge"}, - } ) type Crafter struct { - db *sql.DB + db *sql.DB + gormDB *gorm.DB nodesMRU map[uint64]uint64 nodesSRU map[uint64]uint64 @@ -72,7 +55,7 @@ type Crafter struct { PublicIPStart uint } -func NewCrafter(db *sql.DB, +func NewCrafter(db *sql.DB, gormDB *gorm.DB, seed int, nodeCount, farmCount, @@ -91,7 +74,8 @@ func NewCrafter(db *sql.DB, r = rand.New(rand.NewSource(int64(seed))) return Crafter{ - db: db, + db: db, + gormDB: gormDB, nodesMRU: make(map[uint64]uint64), nodesSRU: make(map[uint64]uint64), diff --git a/grid-proxy/tools/db/db.go b/grid-proxy/tools/db/db.go index f688c1cbb..af0606dfc 100644 --- a/grid-proxy/tools/db/db.go +++ b/grid-proxy/tools/db/db.go @@ -9,6 +9,9 @@ import ( _ "github.com/lib/pq" "github.com/pkg/errors" + "gorm.io/driver/postgres" + "gorm.io/gorm" + "gorm.io/gorm/logger" ) type flags struct { @@ -43,7 +46,20 @@ func main() { if err != nil { panic(errors.Wrap(err, "failed to open db")) } - defer db.Close() + gormDB, err := gorm.Open(postgres.Open(psqlInfo), &gorm.Config{ + Logger: logger.Default.LogMode(4), + }) + if err != nil { + panic(fmt.Errorf("failed to generate gorm db: %w", err)) + } + defer func() { + db.Close() + db_gorm, err := gormDB.DB() + if err != nil { + panic(fmt.Errorf("failed to get gorm db: %w", err)) + } + db_gorm.Close() + }() if f.reset { if err := reset(db); err != nil { @@ -62,7 +78,7 @@ func main() { } // ---- - if err := generateData(db, f.seed); err != nil { + if err := generateData(db, gormDB, f.seed); err != nil { panic(err) } } diff --git a/grid-proxy/tools/db/generate.go b/grid-proxy/tools/db/generate.go index bea47f153..69b0ffc65 100644 --- a/grid-proxy/tools/db/generate.go +++ b/grid-proxy/tools/db/generate.go @@ -6,6 +6,7 @@ import ( "os" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/tools/db/crafter" + "gorm.io/gorm" ) const ( @@ -71,8 +72,8 @@ func initSchema(db *sql.DB) error { return nil } -func generateData(db *sql.DB, seed int) error { - generator := crafter.NewCrafter(db, +func generateData(db *sql.DB, gormDB *gorm.DB, seed int) error { + generator := crafter.NewCrafter(db, gormDB, seed, NodeCount, FarmCount, @@ -115,5 +116,14 @@ func generateData(db *sql.DB, seed int) error { if err := generator.GenerateCountries(); err != nil { return fmt.Errorf("failed to generate countries: %w", err) } + + if err := generator.GenerateSpeedReports(); err != nil { + return fmt.Errorf("failed to generate speed reports: %w", err) + } + + if err := generator.GenerateDmi(); err != nil { + return fmt.Errorf("failed to generate dmi reports: %w", err) + } + return nil } diff --git a/grid-proxy/tools/db/schema.sql b/grid-proxy/tools/db/schema.sql index e944dd28b..978a3fdfa 100644 --- a/grid-proxy/tools/db/schema.sql +++ b/grid-proxy/tools/db/schema.sql @@ -443,31 +443,6 @@ CREATE TABLE public.public_ip ( ); ALTER TABLE public.public_ip OWNER TO postgres; --- --- Name: node_gpu; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE IF NOT EXISTS public.node_gpu ( - id text NOT NULL, - node_twin_id bigint NOT NULL, - vendor text, - device text, - contract bigint -); - -ALTER TABLE public.node_gpu OWNER TO postgres; - --- --- Name: health_report; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE IF NOT EXISTS public.health_report ( - node_twin_id bigint NOT NULL, - healthy boolean -); - -ALTER TABLE public.health_report OWNER TO postgres; - -- -- Name: refund_transaction; Type: TABLE; Schema: public; Owner: postgres -- @@ -875,15 +850,6 @@ ALTER TABLE ONLY public.node_resources_total ADD CONSTRAINT "REL_fd430c3a2645c8f409f859c2aa" UNIQUE (node_id); --- --- Name: node_gpu node_gpu_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.node_gpu - ADD CONSTRAINT node_gpu_pkey PRIMARY KEY (id); - - - -- -- Name: status status_pkey; Type: CONSTRAINT; Schema: substrate_threefold_status; Owner: postgres -- @@ -1046,3 +1012,70 @@ ALTER TABLE ONLY public.node_resources_total -- PostgreSQL database dump complete -- + +-- +-- +-- Indexer tables +-- +-- + + +-- +-- Name: node_gpu; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE IF NOT EXISTS public.node_gpu ( + id text NOT NULL, + node_twin_id bigint NOT NULL, + vendor text, + device text, + contract bigint +); + +ALTER TABLE public.node_gpu + OWNER TO postgres; + +ALTER TABLE ONLY public.node_gpu + ADD CONSTRAINT node_gpu_pkey PRIMARY KEY (id); + + +-- +-- Name: health_report; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE IF NOT EXISTS public.health_report ( + node_twin_id bigint NOT NULL, + healthy boolean +); + +ALTER TABLE public.health_report + OWNER TO postgres; + + +-- +-- Name: dmi_infos; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.dmi_infos( + node_twin_id bigint PRIMARY KEY, + bios jsonb, + baseboard jsonb, + processor jsonb, + memory jsonb +); + +ALTER TABLE public.dmi_infos + OWNER TO postgres; + +-- +-- Name: network_test_results; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.network_test_results( + node_twin_id bigint PRIMARY KEY, + upload_speed numeric, + download_speed numeric +); + +ALTER TABLE public.network_test_results + OWNER TO postgres; From 23645c4498fe32b4e91b1bc22a71bf0f82eec017 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Thu, 8 Feb 2024 16:51:19 +0200 Subject: [PATCH 11/19] add loader/tests for the new dmi/speed data --- grid-proxy/cmds/proxy_server/main.go | 13 +++- .../tests/queries/mock_client/loader.go | 72 ++++++++++++++++++- grid-proxy/tests/queries/mock_client/nodes.go | 10 +++ 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index 99e3c27ff..be9bb27a0 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -155,10 +155,21 @@ func main() { &db, rpcRmbClient, 5, - 1, + 20, + 20, ) idxr.RegisterWatcher("DMI", dmiWatcher) + speedWatcher := indexer.NewSpeedWatcher( + ctx, + &db, + rpcRmbClient, + 5, + 20, + 20, + ) + idxr.RegisterWatcher("Speed", speedWatcher) + idxr.Start() s, err := createServer(f, dbClient, GitCommit, rpcRmbClient) diff --git a/grid-proxy/tests/queries/mock_client/loader.go b/grid-proxy/tests/queries/mock_client/loader.go index 94959c664..a5d6d0ebe 100644 --- a/grid-proxy/tests/queries/mock_client/loader.go +++ b/grid-proxy/tests/queries/mock_client/loader.go @@ -5,6 +5,7 @@ import ( "math" "strings" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/zos/pkg/gridtypes" ) @@ -37,7 +38,10 @@ type DBData struct { Regions map[string]string Locations map[string]Location HealthReports map[uint64]bool - DB *sql.DB + DMIs map[uint32]types.DmiInfo + Speeds map[uint32]types.NetworkTestResult + + DB *sql.DB } func loadNodes(db *sql.DB, data *DBData) error { @@ -606,6 +610,64 @@ func loadHealthReports(db *sql.DB, data *DBData) error { return nil } +func loadDMIs(db *sql.DB, data *DBData) error { + rows, err := db.Query(` + SELECT + node_twin_id, + bios, + baseboard, + processor, + memory + FROM + dmi_infos;`) + if err != nil { + return err + } + for rows.Next() { + var dmi types.DmiInfo + if err := rows.Scan( + &dmi.NodeTwinId, + &dmi.BIOS, + &dmi.Baseboard, + &dmi.Processor, + &dmi.Memory, + ); err != nil { + return err + } + twinId := dmi.NodeTwinId + dmi.NodeTwinId = 0 // to omit it as empty, cleaner response + data.DMIs[twinId] = dmi + } + + return nil +} + +func loadSpeeds(db *sql.DB, data *DBData) error { + rows, err := db.Query(` + SELECT + node_twin_id, + upload_speed, + download_speed + FROM + network_test_results;`) + if err != nil { + return err + } + for rows.Next() { + var speed types.NetworkTestResult + if err := rows.Scan( + &speed.NodeTwinId, + &speed.UploadSpeed, + &speed.DownloadSpeed, + ); err != nil { + return err + } + data.Speeds[speed.NodeTwinId] = speed + } + + return nil +} + func Load(db *sql.DB) (DBData, error) { data := DBData{ NodeIDMap: make(map[string]uint64), @@ -633,6 +695,8 @@ func Load(db *sql.DB) (DBData, error) { Regions: make(map[string]string), Locations: make(map[string]Location), HealthReports: make(map[uint64]bool), + DMIs: make(map[uint32]types.DmiInfo), + Speeds: make(map[uint32]types.NetworkTestResult), DB: db, } if err := loadNodes(db, &data); err != nil { @@ -680,6 +744,12 @@ func Load(db *sql.DB) (DBData, error) { if err := loadHealthReports(db, &data); err != nil { return data, err } + if err := loadDMIs(db, &data); err != nil { + return data, err + } + if err := loadSpeeds(db, &data); err != nil { + return data, err + } if err := calcNodesUsedResources(&data); err != nil { return data, err } diff --git a/grid-proxy/tests/queries/mock_client/nodes.go b/grid-proxy/tests/queries/mock_client/nodes.go index f52b59b73..8cb7c9c3b 100644 --- a/grid-proxy/tests/queries/mock_client/nodes.go +++ b/grid-proxy/tests/queries/mock_client/nodes.go @@ -88,6 +88,11 @@ func (g *GridProxyMockClient) Nodes(ctx context.Context, filter types.NodeFilter NumGPU: numGPU, ExtraFee: node.ExtraFee, Healthy: g.data.HealthReports[node.TwinID], + Dmi: g.data.DMIs[uint32(node.TwinID)], + Speed: types.Speed{ + Upload: g.data.Speeds[uint32(node.TwinID)].UploadSpeed, + Download: g.data.Speeds[uint32(node.TwinID)].DownloadSpeed, + }, }) } } @@ -175,6 +180,11 @@ func (g *GridProxyMockClient) Node(ctx context.Context, nodeID uint32) (res type NumGPU: numGPU, ExtraFee: node.ExtraFee, Healthy: g.data.HealthReports[node.TwinID], + Dmi: g.data.DMIs[uint32(node.TwinID)], + Speed: types.Speed{ + Upload: g.data.Speeds[uint32(node.TwinID)].UploadSpeed, + Download: g.data.Speeds[uint32(node.TwinID)].DownloadSpeed, + }, } return } From 5fcf9e31694983ffc66f99d5ecac0ce43b130352 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Thu, 8 Feb 2024 17:08:19 +0200 Subject: [PATCH 12/19] make the interval/worker configurable --- grid-proxy/cmds/proxy_server/main.go | 19 +++++++++++++------ grid-proxy/internal/indexer/dmi.go | 4 ++-- grid-proxy/internal/indexer/health.go | 2 -- grid-proxy/internal/indexer/network_speed.go | 6 ++---- grid-proxy/pkg/types/indexer.go | 4 ++++ 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index be9bb27a0..a05fd0516 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -60,6 +60,10 @@ type flags struct { maxPoolOpenConnections int healthIndexerWorkers uint healthIndexerInterval uint + dmiWatcherWorkers uint + dmiWatcherInterval uint + speedWatcherWorkers uint + speedWatcherInterval uint noIndexer bool // true to stop the indexer, useful on running for testing } @@ -89,6 +93,11 @@ func main() { flag.IntVar(&f.maxPoolOpenConnections, "max-open-conns", 80, "max number of db connection pool open connections") flag.UintVar(&f.healthIndexerWorkers, "health-indexer-workers", 100, "number of workers checking on node health") flag.UintVar(&f.healthIndexerInterval, "health-indexer-interval", 5, "node health check interval in min") + flag.UintVar(&f.dmiWatcherWorkers, "dmi-watcher-workers", 1, "number of workers checking on node dmi") + flag.UintVar(&f.dmiWatcherInterval, "dmi-watcher-interval", 60*24, "node dmi check interval in min") + flag.UintVar(&f.speedWatcherWorkers, "speed-watcher-workers", 100, "number of workers checking on node speed") + flag.UintVar(&f.speedWatcherInterval, "speed-watcher-interval", 5, "node speed check interval in min") + flag.BoolVar(&f.noIndexer, "no-indexer", false, "do not start the indexer") flag.Parse() @@ -154,9 +163,8 @@ func main() { ctx, &db, rpcRmbClient, - 5, - 20, - 20, + f.dmiWatcherInterval, + f.dmiWatcherWorkers, ) idxr.RegisterWatcher("DMI", dmiWatcher) @@ -164,9 +172,8 @@ func main() { ctx, &db, rpcRmbClient, - 5, - 20, - 20, + f.speedWatcherInterval, + f.speedWatcherWorkers, ) idxr.RegisterWatcher("Speed", speedWatcher) diff --git a/grid-proxy/internal/indexer/dmi.go b/grid-proxy/internal/indexer/dmi.go index e48ef2faa..cf1cc348c 100644 --- a/grid-proxy/internal/indexer/dmi.go +++ b/grid-proxy/internal/indexer/dmi.go @@ -13,6 +13,7 @@ import ( const ( DmiCallCmd = "zos.system.dmi" flushingInterval = 60 * time.Second + dmiBatchSize = 20 ) type DmiWatcher struct { @@ -31,7 +32,6 @@ func NewDmiWatcher( rmbClient *peer.RpcClient, interval uint, workers uint, - batchSize uint, ) *DmiWatcher { return &DmiWatcher{ database: database, @@ -40,7 +40,7 @@ func NewDmiWatcher( resultChan: make(chan types.DmiInfo), interval: time.Duration(interval) * time.Minute, workers: workers, - batchSize: batchSize, + batchSize: dmiBatchSize, } } diff --git a/grid-proxy/internal/indexer/health.go b/grid-proxy/internal/indexer/health.go index 5069d20b9..dacebc2a3 100644 --- a/grid-proxy/internal/indexer/health.go +++ b/grid-proxy/internal/indexer/health.go @@ -86,8 +86,6 @@ func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { err := c.relayClient.Call(subCtx, twinId, healthCallCmd, nil, &result) cancel() - log.Debug().Msgf("health indexer: %+v", result) - healthReport := types.HealthReport{ NodeTwinId: twinId, Healthy: isHealthy(err), diff --git a/grid-proxy/internal/indexer/network_speed.go b/grid-proxy/internal/indexer/network_speed.go index dca50dc8d..57989c271 100644 --- a/grid-proxy/internal/indexer/network_speed.go +++ b/grid-proxy/internal/indexer/network_speed.go @@ -13,6 +13,7 @@ import ( const ( perfTestCallCmd = "zos.perf.get" testName = "iperf" + speedBatchSize = 20 ) type SpeedWatcher struct { @@ -31,7 +32,6 @@ func NewSpeedWatcher( rmbClient *peer.RpcClient, interval uint, workers uint, - batchSize uint, ) *SpeedWatcher { return &SpeedWatcher{ database: database, @@ -40,7 +40,7 @@ func NewSpeedWatcher( resultChan: make(chan types.NetworkTestResult), interval: time.Duration(interval) * time.Minute, workers: workers, - batchSize: batchSize, + batchSize: speedBatchSize, } } @@ -54,7 +54,6 @@ func (w *SpeedWatcher) Start(ctx context.Context) { go w.startUpserter(ctx, w.database) } -// TODO: not only on interval but also on any node goes from down>up or newly added nodes func (w *SpeedWatcher) startNodeQuerier(ctx context.Context) { ticker := time.NewTicker(w.interval) queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) @@ -84,7 +83,6 @@ func (w *SpeedWatcher) startNodeCaller(ctx context.Context) { } } -// TODO: make it generic and then assert the result in each watcher func (w *SpeedWatcher) callNode(ctx context.Context, twinId uint32) (types.PerfResult, error) { var result types.PerfResult subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) diff --git a/grid-proxy/pkg/types/indexer.go b/grid-proxy/pkg/types/indexer.go index a9ebaaebb..a9e661002 100644 --- a/grid-proxy/pkg/types/indexer.go +++ b/grid-proxy/pkg/types/indexer.go @@ -6,6 +6,10 @@ import ( "errors" ) +// TODO: create new db_types pkg different than the server ones in pkg/types +// it will be easier to have a single type that implement Scanner/Valuer when needed +// will be used in: internal/explorer/db/types.go and tools/db/crafter/ and tests/mock_client + type NetworkTestResult struct { NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` UploadSpeed float64 `json:"upload_speed"` // in bit/sec From 2398e23dc6ddba6c856cc683bcde0f0a3269b79b Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Thu, 8 Feb 2024 17:37:01 +0200 Subject: [PATCH 13/19] stop data modification, remove unnecessary conversion, silent the gorm on tests --- .github/workflows/grid-proxy-integration.yml | 2 +- grid-proxy/internal/explorer/converters.go | 4 ++-- grid-proxy/tests/queries/main_test.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/grid-proxy-integration.yml b/.github/workflows/grid-proxy-integration.yml index e9d853586..280de9a4b 100644 --- a/.github/workflows/grid-proxy-integration.yml +++ b/.github/workflows/grid-proxy-integration.yml @@ -55,5 +55,5 @@ jobs: go run cmds/proxy_server/main.go -no-cert -no-indexer --address :8080 --log-level debug --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --mnemonics "$MNEMONICS" & sleep 10 pushd tests/queries - go test -v --seed 13 --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --endpoint http://localhost:8080 + go test -v --seed 13 -no-modify --postgres-host localhost --postgres-db tfgrid-graphql --postgres-password postgres --postgres-user postgres --endpoint http://localhost:8080 popd diff --git a/grid-proxy/internal/explorer/converters.go b/grid-proxy/internal/explorer/converters.go index cd8ec7019..d011ac378 100644 --- a/grid-proxy/internal/explorer/converters.go +++ b/grid-proxy/internal/explorer/converters.go @@ -62,7 +62,7 @@ func nodeFromDBNode(info db.Node) types.Node { Dmi: types.DmiInfo{ Processor: info.Processor, Memory: info.Memory, - BIOS: types.BIOS(info.Bios), + BIOS: info.Bios, Baseboard: info.Baseboard, }, Speed: types.Speed{ @@ -146,7 +146,7 @@ func nodeWithNestedCapacityFromDBNode(info db.Node) types.NodeWithNestedCapacity Dmi: types.DmiInfo{ Processor: info.Processor, Memory: info.Memory, - BIOS: types.BIOS(info.Bios), + BIOS: info.Bios, Baseboard: info.Baseboard, }, Speed: types.Speed{ diff --git a/grid-proxy/tests/queries/main_test.go b/grid-proxy/tests/queries/main_test.go index f3037b29a..28e8b6ae9 100644 --- a/grid-proxy/tests/queries/main_test.go +++ b/grid-proxy/tests/queries/main_test.go @@ -68,7 +68,7 @@ func TestMain(m *testing.M) { panic(errors.Wrap(err, "failed to open db")) } gormDB, err := gorm.Open(postgres.Open(psqlInfo), &gorm.Config{ - Logger: logger.Default.LogMode(4), + Logger: logger.Default.LogMode(0), }) if err != nil { panic(fmt.Errorf("failed to generate gorm db: %w", err)) From dca941c00a0daf7318e032035cd8247bfcdd1d0a Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Sun, 18 Feb 2024 15:54:55 +0200 Subject: [PATCH 14/19] refactor the indexer manager to implement a unified interface for indexers: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - configure a flag to stop or start the indexer manager on the startup. it is useful to use `-no-indexer` flag while running integration tests, to keep the loaded data in the mock client consistent with the actual data in DB. - organize the database calls in `postgres.go` file by separating the indexer calls into a new file. also, save consistency for numeric `NodeTwinId` filed to be `uint32` in all methods - remove the duplicated types across different pkgs and use only the ones in `pkg/types` pkgs db/tests/generator - add new trigger on the `node_gpu` table, this trigger should be fired on insert/delete to update the `num_gpu` filed on the `resources_cache` table - remove all implementation for `Scanner/Valuer` interfaces for jsonb fields in gorm model. instead easier and cleaner, use the registered json serializer. apply this for dmi fields/node power object - modify the generation of `healthy_reports` mock data to use `gorm` functionality instead of creating a plain SQL string - update and organize the schema file used on the db mock generation to be aligned with the new defined struct models. Indexer pkg changes: - import types directly from zos once needed instead of re-implement it, applied this for IperfTestResult and DMI - create utils method, generic methods that can be used in all indexers to avoid repeating the code - create a manager that register/control all the indexers - define an `Indexer` interface that defines the needed methods for each indexer - document the indexer package along with the registered indexers and their metadata - implement `Indexer` interface for each registered interface, this requires some refactoring for the indexer especially the GPU indexer - a better decision on old GPU node deletion by adding a timestamp expiration and check it is expired or not with each upsert, the allowed interval is the same as the check interval in the indexer an hour for now. this is better than checking the lastAdded twin id cause we have multiple caller/batcher that may conflict the results. - add new trigger for the dmi indexer, to start querying the newly added nodes and don’t wait for the new rotation --- grid-proxy/cmds/proxy_server/main.go | 131 ++++---- grid-proxy/internal/explorer/converters.go | 4 +- .../internal/explorer/db/indexer_calls.go | 62 ++++ grid-proxy/internal/explorer/db/postgres.go | 83 +---- grid-proxy/internal/explorer/db/setup.sql | 71 +++-- grid-proxy/internal/explorer/db/types.go | 55 ++-- grid-proxy/internal/indexer/README.md | 58 ++++ grid-proxy/internal/indexer/dmi.go | 128 +++++--- grid-proxy/internal/indexer/gpu.go | 290 +++++++----------- grid-proxy/internal/indexer/health.go | 109 ++++--- grid-proxy/internal/indexer/indexer.go | 54 ---- grid-proxy/internal/indexer/manager.go | 48 +++ grid-proxy/internal/indexer/network_speed.go | 144 --------- grid-proxy/internal/indexer/speed.go | 169 ++++++++++ grid-proxy/internal/indexer/types.go | 47 --- grid-proxy/internal/indexer/utils.go | 24 ++ grid-proxy/pkg/types/indexer.go | 138 +++------ grid-proxy/pkg/types/nodes.go | 24 +- .../tests/queries/mock_client/counters.go | 2 +- grid-proxy/tests/queries/mock_client/farms.go | 2 +- .../tests/queries/mock_client/loader.go | 36 +-- grid-proxy/tests/queries/mock_client/nodes.go | 22 +- grid-proxy/tests/queries/mock_client/types.go | 26 +- grid-proxy/tools/db/crafter/generator.go | 70 +++-- grid-proxy/tools/db/crafter/types.go | 7 +- grid-proxy/tools/db/generate.go | 4 + grid-proxy/tools/db/schema.sql | 23 +- 27 files changed, 895 insertions(+), 936 deletions(-) create mode 100644 grid-proxy/internal/explorer/db/indexer_calls.go create mode 100644 grid-proxy/internal/indexer/README.md delete mode 100644 grid-proxy/internal/indexer/indexer.go create mode 100644 grid-proxy/internal/indexer/manager.go delete mode 100644 grid-proxy/internal/indexer/network_speed.go create mode 100644 grid-proxy/internal/indexer/speed.go delete mode 100644 grid-proxy/internal/indexer/types.go diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index b4dc71f9f..b07ae9e1b 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -36,35 +36,35 @@ const ( var GitCommit string type flags struct { - debug string - postgresHost string - postgresPort int - postgresDB string - postgresUser string - postgresPassword string - sqlLogLevel int - address string - version bool - nocert bool - domain string - TLSEmail string - CA string - certCacheDir string - tfChainURL string - relayURL string - mnemonics string - gpuIndexerCheckIntervalMins uint - gpuIndexerBatchSize uint - gpuIndexerResultWorkers uint - gpuIndexerBatchWorkers uint - maxPoolOpenConnections int - healthIndexerWorkers uint - healthIndexerInterval uint - dmiWatcherWorkers uint - dmiWatcherInterval uint - speedWatcherWorkers uint - speedWatcherInterval uint - noIndexer bool // true to stop the indexer, useful on running for testing + debug string + postgresHost string + postgresPort int + postgresDB string + postgresUser string + postgresPassword string + sqlLogLevel int + address string + version bool + nocert bool + domain string + TLSEmail string + CA string + certCacheDir string + tfChainURL string + relayURL string + mnemonics string + maxPoolOpenConnections int + + noIndexer bool // true to stop the indexer, useful on running for testing + indexerUpserterBatchSize uint + gpuIndexerIntervalMins uint + gpuIndexerNumWorkers uint + healthIndexerNumWorkers uint + healthIndexerIntervalMins uint + dmiIndexerNumWorkers uint + dmiIndexerIntervalMins uint + speedIndexerNumWorkers uint + speedIndexerIntervalMins uint } func main() { @@ -86,19 +86,18 @@ func main() { flag.StringVar(&f.tfChainURL, "tfchain-url", DefaultTFChainURL, "TF chain url") flag.StringVar(&f.relayURL, "relay-url", DefaultRelayURL, "RMB relay url") flag.StringVar(&f.mnemonics, "mnemonics", "", "Dummy user mnemonics for relay calls") - flag.UintVar(&f.gpuIndexerCheckIntervalMins, "indexer-interval-min", 60, "the interval that the GPU indexer will run") - flag.UintVar(&f.gpuIndexerBatchSize, "indexer-batch-size", 20, "batch size for the GPU indexer worker batch") - flag.UintVar(&f.gpuIndexerResultWorkers, "indexer-results-workers", 2, "number of workers to process indexer GPU info") - flag.UintVar(&f.gpuIndexerBatchWorkers, "indexer-batch-workers", 2, "number of workers to process batch GPU info") flag.IntVar(&f.maxPoolOpenConnections, "max-open-conns", 80, "max number of db connection pool open connections") - flag.UintVar(&f.healthIndexerWorkers, "health-indexer-workers", 100, "number of workers checking on node health") - flag.UintVar(&f.healthIndexerInterval, "health-indexer-interval", 5, "node health check interval in min") - flag.UintVar(&f.dmiWatcherWorkers, "dmi-watcher-workers", 1, "number of workers checking on node dmi") - flag.UintVar(&f.dmiWatcherInterval, "dmi-watcher-interval", 60*24, "node dmi check interval in min") - flag.UintVar(&f.speedWatcherWorkers, "speed-watcher-workers", 100, "number of workers checking on node speed") - flag.UintVar(&f.speedWatcherInterval, "speed-watcher-interval", 5, "node speed check interval in min") flag.BoolVar(&f.noIndexer, "no-indexer", false, "do not start the indexer") + flag.UintVar(&f.indexerUpserterBatchSize, "indexer-upserter-batch-size", 20, "results batch size which collected before upserting") + flag.UintVar(&f.gpuIndexerIntervalMins, "gpu-indexer-interval", 60, "the interval that the GPU indexer will run") + flag.UintVar(&f.gpuIndexerNumWorkers, "gpu-indexer-workers", 100, "number of workers to process indexer GPU info") + flag.UintVar(&f.healthIndexerIntervalMins, "health-indexer-interval", 5, "node health check interval in min") + flag.UintVar(&f.healthIndexerNumWorkers, "health-indexer-workers", 100, "number of workers checking on node health") + flag.UintVar(&f.dmiIndexerIntervalMins, "dmi-indexer-interval", 60*24, "node dmi check interval in min") + flag.UintVar(&f.dmiIndexerNumWorkers, "dmi-indexer-workers", 1, "number of workers checking on node dmi") + flag.UintVar(&f.speedIndexerIntervalMins, "speed-indexer-interval", 5, "node speed check interval in min") + flag.UintVar(&f.speedIndexerNumWorkers, "speed-indexer-workers", 100, "number of workers checking on node speed") flag.Parse() // shows version and exit @@ -137,47 +136,49 @@ func main() { if err != nil { log.Fatal().Err(err).Msg("failed to create relay client") } - idxr := indexer.NewIndexer(ctx, f.noIndexer, rpcRmbClient) + manager := indexer.NewManager(ctx) - gpuWatcher := indexer.NewNodeGPUIndexer( - ctx, + gpuIndexer := indexer.NewGPUIndexer( rpcRmbClient, &db, - f.gpuIndexerCheckIntervalMins, - f.gpuIndexerBatchSize, - f.gpuIndexerResultWorkers, - f.gpuIndexerBatchWorkers, + f.indexerUpserterBatchSize, + f.gpuIndexerIntervalMins, + f.gpuIndexerNumWorkers, ) - idxr.RegisterWatcher("GPU", gpuWatcher) + manager.Register("GPU", gpuIndexer) - healthWatcher := indexer.NewNodeHealthIndexer( - ctx, + healthIndexer := indexer.NewNodeHealthIndexer( rpcRmbClient, &db, - f.healthIndexerWorkers, - f.healthIndexerInterval, + f.indexerUpserterBatchSize, + f.healthIndexerNumWorkers, + f.healthIndexerIntervalMins, ) - idxr.RegisterWatcher("Health", healthWatcher) + manager.Register("Health", healthIndexer) - dmiWatcher := indexer.NewDmiWatcher( - ctx, - &db, + dmiIndexer := indexer.NewDmiIndexer( rpcRmbClient, - f.dmiWatcherInterval, - f.dmiWatcherWorkers, + &db, + f.indexerUpserterBatchSize, + f.dmiIndexerIntervalMins, + f.dmiIndexerNumWorkers, ) - idxr.RegisterWatcher("DMI", dmiWatcher) + manager.Register("DMI", dmiIndexer) - speedWatcher := indexer.NewSpeedWatcher( - ctx, - &db, + speedIndexer := indexer.NewSpeedIndexer( rpcRmbClient, - f.speedWatcherInterval, - f.speedWatcherWorkers, + &db, + f.indexerUpserterBatchSize, + f.speedIndexerIntervalMins, + f.speedIndexerNumWorkers, ) - idxr.RegisterWatcher("Speed", speedWatcher) + manager.Register("Speed", speedIndexer) - idxr.Start() + manager.Start() + if !f.noIndexer { + } else { + log.Info().Msg("Indexers Manager did not start") + } s, err := createServer(f, dbClient, GitCommit, rpcRmbClient) if err != nil { diff --git a/grid-proxy/internal/explorer/converters.go b/grid-proxy/internal/explorer/converters.go index d011ac378..ee0db9b0d 100644 --- a/grid-proxy/internal/explorer/converters.go +++ b/grid-proxy/internal/explorer/converters.go @@ -59,7 +59,7 @@ func nodeFromDBNode(info db.Node) types.Node { NumGPU: info.NumGPU, ExtraFee: info.ExtraFee, Healthy: info.Healthy, - Dmi: types.DmiInfo{ + Dmi: types.Dmi{ Processor: info.Processor, Memory: info.Memory, BIOS: info.Bios, @@ -143,7 +143,7 @@ func nodeWithNestedCapacityFromDBNode(info db.Node) types.NodeWithNestedCapacity NumGPU: info.NumGPU, ExtraFee: info.ExtraFee, Healthy: info.Healthy, - Dmi: types.DmiInfo{ + Dmi: types.Dmi{ Processor: info.Processor, Memory: info.Memory, BIOS: info.Bios, diff --git a/grid-proxy/internal/explorer/db/indexer_calls.go b/grid-proxy/internal/explorer/db/indexer_calls.go new file mode 100644 index 000000000..805ac1798 --- /dev/null +++ b/grid-proxy/internal/explorer/db/indexer_calls.go @@ -0,0 +1,62 @@ +package db + +import ( + "context" + + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" + "gorm.io/gorm/clause" +) + +func (p *PostgresDatabase) DeleteOldGpus(ctx context.Context, nodeTwinIds []uint32, expiration int64) error { + return p.gormDB.WithContext(ctx).Table("node_gpu").Where("node_twin_id IN (?) AND updated_at < ?", nodeTwinIds, expiration).Delete(types.NodeGPU{}).Error +} + +func (p *PostgresDatabase) GetLastNodeTwinID(ctx context.Context) (uint32, error) { + var node Node + err := p.gormDB.WithContext(ctx).Table("node").Order("twin_id DESC").Limit(1).Scan(&node).Error + return uint32(node.TwinID), err +} + +func (p *PostgresDatabase) GetNodeTwinIDsAfter(ctx context.Context, twinID uint32) ([]uint32, error) { + nodeTwinIDs := make([]uint32, 0) + err := p.gormDB.WithContext(ctx).Table("node").Select("twin_id").Where("twin_id > ?", twinID).Order("twin_id DESC").Scan(&nodeTwinIDs).Error + return nodeTwinIDs, err +} + +func (p *PostgresDatabase) GetHealthyNodeTwinIds(ctx context.Context) ([]uint32, error) { + nodeTwinIDs := make([]uint32, 0) + err := p.gormDB.WithContext(ctx).Table("health_report").Select("node_twin_id").Where("healthy = true").Scan(&nodeTwinIDs).Error + return nodeTwinIDs, err +} + +func (p *PostgresDatabase) UpsertNodesGPU(ctx context.Context, gpus []types.NodeGPU) error { + conflictClause := clause.OnConflict{ + Columns: []clause.Column{{Name: "id"}, {Name: "node_twin_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"vendor", "device", "contract", "updated_at"}), + } + return p.gormDB.WithContext(ctx).Table("node_gpu").Clauses(conflictClause).Create(&gpus).Error +} + +func (p *PostgresDatabase) UpsertNodeHealth(ctx context.Context, healthReports []types.HealthReport) error { + conflictClause := clause.OnConflict{ + Columns: []clause.Column{{Name: "node_twin_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"healthy"}), + } + return p.gormDB.WithContext(ctx).Table("health_report").Clauses(conflictClause).Create(&healthReports).Error +} + +func (p *PostgresDatabase) UpsertNodeDmi(ctx context.Context, dmis []types.Dmi) error { + conflictClause := clause.OnConflict{ + Columns: []clause.Column{{Name: "node_twin_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"bios", "baseboard", "processor", "memory"}), + } + return p.gormDB.WithContext(ctx).Table("dmi").Clauses(conflictClause).Create(&dmis).Error +} + +func (p *PostgresDatabase) UpsertNetworkSpeed(ctx context.Context, speeds []types.Speed) error { + conflictClause := clause.OnConflict{ + Columns: []clause.Column{{Name: "node_twin_id"}}, + DoUpdates: clause.AssignmentColumns([]string{"download", "upload"}), + } + return p.gormDB.WithContext(ctx).Table("speed").Clauses(conflictClause).Create(&speeds).Error +} diff --git a/grid-proxy/internal/explorer/db/postgres.go b/grid-proxy/internal/explorer/db/postgres.go index fa20c956b..81f51e6c5 100644 --- a/grid-proxy/internal/explorer/db/postgres.go +++ b/grid-proxy/internal/explorer/db/postgres.go @@ -2,7 +2,6 @@ package db import ( "context" - "encoding/json" "fmt" "strings" @@ -13,7 +12,6 @@ import ( "github.com/threefoldtech/zos/pkg/gridtypes" "gorm.io/driver/postgres" "gorm.io/gorm" - "gorm.io/gorm/clause" "gorm.io/gorm/logger" _ "embed" @@ -79,7 +77,12 @@ func (d *PostgresDatabase) Close() error { } func (d *PostgresDatabase) Initialize() error { - err := d.gormDB.AutoMigrate(&NodeGPU{}, &HealthReport{}, &types.DmiInfo{}, &types.NetworkTestResult{}) + err := d.gormDB.AutoMigrate( + &types.NodeGPU{}, + &types.HealthReport{}, + &types.Dmi{}, + &types.Speed{}, + ) if err != nil { return errors.Wrap(err, "failed to migrate indexer tables") } @@ -193,17 +196,6 @@ func (d *PostgresDatabase) GetStats(ctx context.Context, filter types.StatsFilte return stats, nil } -// Scan is a custom decoder for jsonb filed. executed while scanning the node. -func (np *NodePower) Scan(value interface{}) error { - if value == nil { - return nil - } - if data, ok := value.([]byte); ok { - return json.Unmarshal(data, np) - } - return fmt.Errorf("failed to unmarshal NodePower") -} - // GetNode returns node info func (d *PostgresDatabase) GetNode(ctx context.Context, nodeID uint32) (Node, error) { q := d.nodeTableQuery(ctx, types.NodeFilter{}, &gorm.DB{}) @@ -877,66 +869,3 @@ func (d *PostgresDatabase) GetContractBills(ctx context.Context, contractID uint return bills, uint(count), nil } - -func (p *PostgresDatabase) UpsertNodesGPU(ctx context.Context, nodesGPU []types.NodeGPU) error { - // For upsert operation - conflictClause := clause.OnConflict{ - Columns: []clause.Column{{Name: "id"}, {Name: "node_twin_id"}}, - DoUpdates: clause.AssignmentColumns([]string{"vendor", "device", "contract"}), - } - err := p.gormDB.WithContext(ctx).Table("node_gpu").Clauses(conflictClause).Create(&nodesGPU).Error - if err != nil { - return fmt.Errorf("failed to upsert nodes GPU details: %w", err) - } - return nil -} - -func (p *PostgresDatabase) DeleteOldGpus(ctx context.Context, nodeTwinIds []uint32) error { - err := p.gormDB.WithContext(ctx).Table("node_gpu").Where("node_twin_id IN (?)", nodeTwinIds).Delete(types.NodeGPU{}).Error - if err != nil { - return fmt.Errorf("failed to delete old gpus: %w", err) - } - return nil -} - -func (p *PostgresDatabase) GetLastNodeTwinID(ctx context.Context) (int64, error) { - var node Node - err := p.gormDB.Table("node").Order("twin_id DESC").Limit(1).Scan(&node).Error - return node.TwinID, err -} - -func (p *PostgresDatabase) GetNodeTwinIDsAfter(ctx context.Context, twinID int64) ([]int64, error) { - nodeTwinIDs := make([]int64, 0) - err := p.gormDB.Table("node").Select("twin_id").Where("twin_id > ?", twinID).Order("twin_id DESC").Scan(&nodeTwinIDs).Error - return nodeTwinIDs, err -} - -func (p *PostgresDatabase) UpsertNodeHealth(ctx context.Context, healthReport types.HealthReport) error { - conflictClause := clause.OnConflict{ - Columns: []clause.Column{{Name: "node_twin_id"}}, - DoUpdates: clause.AssignmentColumns([]string{"healthy"}), - } - return p.gormDB.WithContext(ctx).Table("health_report").Clauses(conflictClause).Create(&healthReport).Error -} - -func (p *PostgresDatabase) GetHealthyNodeTwinIds(ctx context.Context) ([]int64, error) { - nodeTwinIDs := make([]int64, 0) - err := p.gormDB.Table("health_report").Select("node_twin_id").Where("healthy = true").Scan(&nodeTwinIDs).Error - return nodeTwinIDs, err -} - -func (p *PostgresDatabase) UpsertNodeDmi(ctx context.Context, dmi []types.DmiInfo) error { - conflictClause := clause.OnConflict{ - Columns: []clause.Column{{Name: "node_twin_id"}}, - DoUpdates: clause.AssignmentColumns([]string{"bios", "baseboard", "processor", "memory"}), - } - return p.gormDB.WithContext(ctx).Table("dmi_infos").Clauses(conflictClause).Create(&dmi).Error -} - -func (p *PostgresDatabase) UpsertNetworkSpeed(ctx context.Context, report []types.NetworkTestResult) error { - conflictClause := clause.OnConflict{ - Columns: []clause.Column{{Name: "node_twin_id"}}, - DoUpdates: clause.AssignmentColumns([]string{"download_speed", "upload_speed"}), - } - return p.gormDB.WithContext(ctx).Table("network_test_results").Clauses(conflictClause).Create(&report).Error -} diff --git a/grid-proxy/internal/explorer/db/setup.sql b/grid-proxy/internal/explorer/db/setup.sql index 31fb0e96e..ec7fa0190 100644 --- a/grid-proxy/internal/explorer/db/setup.sql +++ b/grid-proxy/internal/explorer/db/setup.sql @@ -52,12 +52,12 @@ SELECT COALESCE(node_gpu.node_gpu_count, 0) as node_gpu_count, node.country as country, country.region as region, - COALESCE(dmi_infos.bios, '{}') as bios, - COALESCE(dmi_infos.baseboard, '{}') as baseboard, - COALESCE(dmi_infos.processor, '[]') as processor, - COALESCE(dmi_infos.memory, '[]') as memory, - COALESCE(network_test_results.upload_speed, 0) as upload_speed, - COALESCE(network_test_results.download_speed, 0) as download_speed + COALESCE(dmi.bios, '{}') as bios, + COALESCE(dmi.baseboard, '{}') as baseboard, + COALESCE(dmi.processor, '[]') as processor, + COALESCE(dmi.memory, '[]') as memory, + COALESCE(speed.upload, 0) as upload_speed, + COALESCE(speed.download, 0) as download_speed FROM node LEFT JOIN node_contract ON node.node_id = node_contract.node_id AND node_contract.state IN ('Created', 'GracePeriod') LEFT JOIN contract_resources ON node_contract.resources_used_id = contract_resources.id @@ -72,8 +72,8 @@ FROM node node_twin_id ) AS node_gpu ON node.twin_id = node_gpu.node_twin_id LEFT JOIN country ON LOWER(node.country) = LOWER(country.name) - LEFT JOIN network_test_results ON node.twin_id = network_test_results.node_twin_id - LEFT JOIN dmi_infos ON node.twin_id = dmi_infos.node_twin_id + LEFT JOIN speed ON node.twin_id = speed.node_twin_id + LEFT JOIN dmi ON node.twin_id = dmi.node_twin_id GROUP BY node.node_id, node_resources_total.mru, @@ -86,12 +86,12 @@ GROUP BY COALESCE(node_gpu.node_gpu_count, 0), node.country, country.region, - COALESCE(dmi_infos.bios, '{}'), - COALESCE(dmi_infos.baseboard, '{}'), - COALESCE(dmi_infos.processor, '[]'), - COALESCE(dmi_infos.memory, '[]'), - COALESCE(network_test_results.upload_speed, 0), - COALESCE(network_test_results.download_speed, 0); + COALESCE(dmi.bios, '{}'), + COALESCE(dmi.baseboard, '{}'), + COALESCE(dmi.processor, '[]'), + COALESCE(dmi.memory, '[]'), + COALESCE(speed.upload, 0), + COALESCE(speed.download, 0); DROP TABLE IF EXISTS resources_cache; CREATE TABLE IF NOT EXISTS resources_cache( @@ -396,6 +396,40 @@ CREATE OR REPLACE TRIGGER tg_rent_contract AFTER INSERT OR UPDATE OF state ON rent_contract FOR EACH ROW EXECUTE PROCEDURE reflect_rent_contract_changes(); +/* + Gpu trigger + - Insert new node_gpu > increase the gpu_num in resources cache + - Delete node_gpu > decrease the gpu_num in resources cache +*/ +CREATE OR REPLACE FUNCTION reflect_node_gpu_count_change() RETURNS TRIGGER AS +$$ +BEGIN + BEGIN + UPDATE resources_cache + SET node_gpu_count = node_gpu_count + ( + CASE + WHEN TG_OP != 'DELETE' + THEN -1 + WHEN TG_OP != 'INSERT' + THEN 1 + ELSE 0 + END + ) + WHERE resources_cache.node_id = ( + SELECT node_id from node where node.twin_id = NEW.node_twin_id + ); + EXCEPTION + WHEN OTHERS THEN + RAISE NOTICE 'Error updating resources_cache gpu fields %', SQLERRM; + END; +RETURN NULL; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE TRIGGER tg_node_gpu_count + AFTER INSERT OR DELETE ON node_gpu FOR EACH ROW + EXECUTE PROCEDURE reflect_node_gpu_count_change(); + /* Dmi trigger - Insert new record/Update > update resources_cache @@ -421,11 +455,10 @@ END; $$ LANGUAGE plpgsql; CREATE OR REPLACE TRIGGER tg_dmi - AFTER INSERT OR UPDATE ON dmi_infos FOR EACH ROW + AFTER INSERT OR UPDATE ON dmi FOR EACH ROW EXECUTE PROCEDURE reflect_dmi_changes(); - /* speed trigger - Insert new record/Update > update resources_cache @@ -435,8 +468,8 @@ $$ BEGIN BEGIN UPDATE resources_cache - SET upload_speed = NEW.upload_speed, - download_speed = NEW.download_speed + SET upload_speed = NEW.upload, + download_speed = NEW.download WHERE resources_cache.node_id = ( SELECT node_id from node where node.twin_id = NEW.node_twin_id ); @@ -449,7 +482,7 @@ END; $$ LANGUAGE plpgsql; CREATE OR REPLACE TRIGGER tg_speed - AFTER INSERT OR UPDATE ON network_test_results FOR EACH ROW + AFTER INSERT OR UPDATE ON speed FOR EACH ROW EXECUTE PROCEDURE reflect_speed_changes(); diff --git a/grid-proxy/internal/explorer/db/types.go b/grid-proxy/internal/explorer/db/types.go index 7288b30d5..2d16ca3a0 100644 --- a/grid-proxy/internal/explorer/db/types.go +++ b/grid-proxy/internal/explorer/db/types.go @@ -8,7 +8,9 @@ import ( // Database interface for storing and fetching grid info type Database interface { - // TODO: separate the setter/getter + GetConnectionString() string + + // server getters GetStats(ctx context.Context, filter types.StatsFilter) (types.Stats, error) GetNode(ctx context.Context, nodeID uint32) (Node, error) GetFarm(ctx context.Context, farmID uint32) (Farm, error) @@ -18,16 +20,18 @@ type Database interface { GetContracts(ctx context.Context, filter types.ContractFilter, limit types.Limit) ([]DBContract, uint, error) GetContract(ctx context.Context, contractID uint32) (DBContract, error) GetContractBills(ctx context.Context, contractID uint32, limit types.Limit) ([]ContractBilling, uint, error) - UpsertNodesGPU(ctx context.Context, nodesGPU []types.NodeGPU) error - GetLastNodeTwinID(ctx context.Context) (int64, error) - GetNodeTwinIDsAfter(ctx context.Context, twinID int64) ([]int64, error) - DeleteOldGpus(ctx context.Context, nodeTwinIds []uint32) error - UpsertNodeHealth(ctx context.Context, healthReport types.HealthReport) error - GetHealthyNodeTwinIds(ctx context.Context) ([]int64, error) - GetConnectionString() string - UpsertNodeDmi(ctx context.Context, dmi []types.DmiInfo) error - UpsertNetworkSpeed(ctx context.Context, speed []types.NetworkTestResult) error + // indexer utils + DeleteOldGpus(ctx context.Context, nodeTwinIds []uint32, expiration int64) error + GetLastNodeTwinID(ctx context.Context) (uint32, error) + GetNodeTwinIDsAfter(ctx context.Context, twinID uint32) ([]uint32, error) + GetHealthyNodeTwinIds(ctx context.Context) ([]uint32, error) + + // indexer upserters + UpsertNodesGPU(ctx context.Context, gpus []types.NodeGPU) error + UpsertNodeHealth(ctx context.Context, healthReports []types.HealthReport) error + UpsertNodeDmi(ctx context.Context, dmis []types.Dmi) error + UpsertNetworkSpeed(ctx context.Context, speeds []types.Speed) error } type ContractBilling types.ContractBilling @@ -80,15 +84,15 @@ type Node struct { SerialNumber string Longitude *float64 Latitude *float64 - Power NodePower `gorm:"type:jsonb"` + Power NodePower `gorm:"type:jsonb;serializer:json"` NumGPU int `gorm:"num_gpu"` ExtraFee uint64 NodeContractsCount uint64 `gorm:"node_contracts_count"` Healthy bool - Bios types.BIOS `gorm:"type:jsonb"` - Baseboard types.Baseboard `gorm:"type:jsonb"` - Memory types.Memories `gorm:"type:jsonb"` - Processor types.Processors `gorm:"type:jsonb"` + Bios types.BIOS `gorm:"type:jsonb;serializer:json"` + Baseboard types.Baseboard `gorm:"type:jsonb;serializer:json"` + Memory []types.Memory `gorm:"type:jsonb;serializer:json"` + Processor []types.Processor `gorm:"type:jsonb;serializer:json"` UploadSpeed float64 DownloadSpeed float64 } @@ -116,24 +120,3 @@ type NodesDistribution struct { Country string `json:"country"` Nodes int64 `json:"nodes"` } - -type NodeGPU struct { - NodeTwinID int `gorm:"primaryKey;autoIncrement:false"` - ID string `gorm:"primaryKey"` - Vendor string - Device string - Contract int -} - -func (NodeGPU) TableName() string { - return "node_gpu" -} - -type HealthReport struct { - NodeTwinId int `gorm:"unique;not null"` - Healthy bool -} - -func (HealthReport) TableName() string { - return "health_report" -} diff --git a/grid-proxy/internal/indexer/README.md b/grid-proxy/internal/indexer/README.md new file mode 100644 index 000000000..403c62a67 --- /dev/null +++ b/grid-proxy/internal/indexer/README.md @@ -0,0 +1,58 @@ +# Node Indexers Manager + +Initially the node periodically reports its data to the chain, data like capacity, uptime, location, ...etc and then the chain events is processed by `graphql-processor` to dump these data among with others data for farms/contracts/twins to a postgres database which we use to serve both `graphql-api` and `proxy-api`. +Things looks fine, but when it comes to a bigger data like gpu/dmi it is not the best solution to store these data on the chain. +And that what the `Node-Indexers` solves by periodically calling the nodes based on a configurable interval to get the data and store it on the same postgres database and then it can be served to apis. only `proxy-api` for now. + +## The manager + +The manager is a service started from the `cmds/main.go` and it has multiple indexer each looking for a kind of data on the nodes and it is configured by command line flags. + +## The indexer structure + +Each indexer has +two clients: + +- `Database`: a client to the postgres db. +- `RmbClient`: an rmb client used to make the node calls. + +three channels: + +- `NodeTwinIdsChan`: it collects the twin ids for the nodes the indexer will call. +- `ResultChan`: it collects the results returned by the rmb call to the node. +- `BatchChan`: transfer batches of results ready to directly upserted. + +four types of workers: + +- `Finder`: this worker calls the database to filter nodes and push its data to the `NodeTwinIdsChan` +- `Caller`: this worker pop the twins from `NodeTwinIdsChan` and call the node with the `RmbClient` to get data and then push the result to `ResultChan` +- `Batcher`: this worker collect results from `ResultChan` in batches and send it to the `BatchChan` +- `Upserter`: this worker get data from `BatchChan` then update/insert to the `Database` + +Each indexer could have some extra feature based on the use case, but these are essential. + +## Registered Indexers + +1. Gpu indexer: + - Function: query the gpu list on node. + - Interval: `60 min` + - Other triggers: new node is added (check every 5m). + - Default caller worker number: 5 + - Dump table: `node_gpu` +2. Health indexer: + - Function: decide the node health based on its internal state. + - Interval: `5 min` + - Default caller worker number: 100 + - Dump table: `health_report` +3. Dmi indexer: + - Function: collect some hardware data from the node. + - Interval: `1 day` + - Other triggers: new node is added (check every 5m). + - Default caller worker number: 1 + - Dump table: `dmi` +4. Speed indexer: + + - Function: get the network upload/download speed on the node tested against `iperf` server. + - Interval: `5 min` + - Default caller worker number: 100 + - Dump table: `speed` diff --git a/grid-proxy/internal/indexer/dmi.go b/grid-proxy/internal/indexer/dmi.go index cf1cc348c..f67501a73 100644 --- a/grid-proxy/internal/indexer/dmi.go +++ b/grid-proxy/internal/indexer/dmi.go @@ -8,54 +8,59 @@ import ( "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" + zosDmiTypes "github.com/threefoldtech/zos/pkg/capacity/dmi" ) const ( - DmiCallCmd = "zos.system.dmi" - flushingInterval = 60 * time.Second - dmiBatchSize = 20 + DmiCallCmd = "zos.system.dmi" ) -type DmiWatcher struct { +type DmiIndexer struct { database db.Database rmbClient *peer.RpcClient - nodeTwinIdsChan chan uint32 - resultChan chan types.DmiInfo interval time.Duration workers uint batchSize uint + nodeTwinIdsChan chan uint32 + resultChan chan types.Dmi + batchChan chan []types.Dmi } -func NewDmiWatcher( - ctx context.Context, - database db.Database, +func NewDmiIndexer( rmbClient *peer.RpcClient, + database db.Database, + batchSize uint, interval uint, workers uint, -) *DmiWatcher { - return &DmiWatcher{ +) *DmiIndexer { + return &DmiIndexer{ database: database, rmbClient: rmbClient, - nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan types.DmiInfo), interval: time.Duration(interval) * time.Minute, workers: workers, - batchSize: dmiBatchSize, + batchSize: batchSize, + nodeTwinIdsChan: make(chan uint32), + resultChan: make(chan types.Dmi), + batchChan: make(chan []types.Dmi), } } -func (w *DmiWatcher) Start(ctx context.Context) { - go w.startNodeQuerier(ctx) +func (w *DmiIndexer) Start(ctx context.Context) { + go w.startNodeTableWatcher(ctx) + go w.StartNodeFinder(ctx) for i := uint(0); i < w.workers; i++ { - go w.startNodeCaller(ctx) + go w.StartNodeCaller(ctx) } - go w.startUpserter(ctx, w.database) + for i := uint(0); i < w.workers; i++ { + go w.StartResultBatcher(ctx) + } + + go w.StartBatchUpserter(ctx) } -// TODO: not only on interval but also on any node goes from down>up or newly added nodes -func (w *DmiWatcher) startNodeQuerier(ctx context.Context) { +func (w *DmiIndexer) StartNodeFinder(ctx context.Context) { ticker := time.NewTicker(w.interval) queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) for { @@ -68,58 +73,67 @@ func (w *DmiWatcher) startNodeQuerier(ctx context.Context) { } } -func (w *DmiWatcher) startNodeCaller(ctx context.Context) { +func (n *DmiIndexer) startNodeTableWatcher(ctx context.Context) { + ticker := time.NewTicker(newNodesCheckInterval) + latestCheckedID, err := n.database.GetLastNodeTwinID(ctx) + if err != nil { + log.Error().Err(err).Msg("failed to get last node twin id") + } + for { select { - case twinId := <-w.nodeTwinIdsChan: - response, err := w.callNode(ctx, twinId) + case <-ticker.C: + newIDs, err := n.database.GetNodeTwinIDsAfter(ctx, latestCheckedID) if err != nil { + log.Error().Err(err).Msgf("failed to get node twin ids after %d", latestCheckedID) + continue + } + if len(newIDs) == 0 { continue } - parsedDmi := parseDmiResponse(response) - parsedDmi.NodeTwinId = twinId - w.resultChan <- parsedDmi + + latestCheckedID = newIDs[0] + for _, id := range newIDs { + n.nodeTwinIdsChan <- id + } case <-ctx.Done(): return } } } -// TODO: make it generic and then assert the result in each watcher -func (w *DmiWatcher) callNode(ctx context.Context, twinId uint32) (DMI, error) { - var result DMI - subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) - defer cancel() +func (w *DmiIndexer) StartNodeCaller(ctx context.Context) { + for { + select { + case twinId := <-w.nodeTwinIdsChan: + var dmi zosDmiTypes.DMI + err := callNode(ctx, w.rmbClient, DmiCallCmd, nil, twinId, &dmi) + if err != nil { + continue + } - err := w.rmbClient.Call(subCtx, twinId, DmiCallCmd, nil, &result) - if err != nil { - log.Error().Err(err).Uint32("twinId", twinId).Msg("failed to call node") + w.resultChan <- parseDmiResponse(dmi, twinId) + case <-ctx.Done(): + return + } } - - return result, err } -func (w *DmiWatcher) startUpserter(ctx context.Context, database db.Database) { - buffer := make([]types.DmiInfo, 0, w.batchSize) +func (w *DmiIndexer) StartResultBatcher(ctx context.Context) { + buffer := make([]types.Dmi, 0, w.batchSize) - ticker := time.NewTicker(flushingInterval) + ticker := time.NewTicker(flushingBufferInterval) for { select { case dmiData := <-w.resultChan: buffer = append(buffer, dmiData) if len(buffer) >= int(w.batchSize) { - err := w.database.UpsertNodeDmi(ctx, buffer) - if err != nil { - log.Error().Err(err).Msgf("failed") - } + w.batchChan <- buffer buffer = nil } case <-ticker.C: if len(buffer) != 0 { - err := w.database.UpsertNodeDmi(ctx, buffer) - if err != nil { - log.Error().Err(err).Msgf("failed") - } + w.batchChan <- buffer buffer = nil } case <-ctx.Done(): @@ -128,8 +142,23 @@ func (w *DmiWatcher) startUpserter(ctx context.Context, database db.Database) { } } -func parseDmiResponse(dmiResponse DMI) types.DmiInfo { - var info types.DmiInfo +func (w *DmiIndexer) StartBatchUpserter(ctx context.Context) { + for { + + select { + case batch := <-w.batchChan: + err := w.database.UpsertNodeDmi(ctx, batch) + if err != nil { + log.Error().Err(err).Msg("failed to upsert node dmi") + } + case <-ctx.Done(): + return + } + } +} + +func parseDmiResponse(dmiResponse zosDmiTypes.DMI, twinId uint32) types.Dmi { + var info types.Dmi for _, sec := range dmiResponse.Sections { if sec.TypeStr == "Processor" { for _, subSec := range sec.SubSections { @@ -172,5 +201,6 @@ func parseDmiResponse(dmiResponse DMI) types.DmiInfo { } } + info.NodeTwinId = twinId return info } diff --git a/grid-proxy/internal/indexer/gpu.go b/grid-proxy/internal/indexer/gpu.go index 859e45c8e..3dfdc4f25 100644 --- a/grid-proxy/internal/indexer/gpu.go +++ b/grid-proxy/internal/indexer/gpu.go @@ -2,7 +2,6 @@ package indexer import ( "context" - "fmt" "time" "github.com/rs/zerolog/log" @@ -12,246 +11,169 @@ import ( ) const ( - resultsBatcherCleanupInterval = 10 * time.Second - minListenerReconnectInterval = 10 * time.Second - lingerBatch = 10 * time.Second - newNodesCheckInterval = 5 * time.Minute + gpuListCmd = "zos.gpu.list" ) type NodeGPUIndexer struct { - db db.Database - rpcClient *peer.RpcClient - checkInterval time.Duration - batchSize uint - nodesGPUResultsChan chan []types.NodeGPU - nodesGPUBatchesChan chan []types.NodeGPU - newNodeTwinIDChan chan []uint32 - nodesGPUResultsWorkers uint - nodesGPUBufferWorkers uint + database db.Database + rmbClient *peer.RpcClient + interval time.Duration + workers uint + batchSize uint + nodeTwinIdsChan chan uint32 + resultChan chan types.NodeGPU + batchChan chan []types.NodeGPU } -func NewNodeGPUIndexer( - ctx context.Context, - rpcClient *peer.RpcClient, - db db.Database, - indexerCheckIntervalMins, - batchSize, - nodesGPUResultsWorkers, - nodesGPUBufferWorkers uint) *NodeGPUIndexer { +func NewGPUIndexer( + rmbClient *peer.RpcClient, + database db.Database, + batchSize uint, + interval uint, + workers uint, +) *NodeGPUIndexer { return &NodeGPUIndexer{ - db: db, - rpcClient: rpcClient, - nodesGPUResultsChan: make(chan []types.NodeGPU), - nodesGPUBatchesChan: make(chan []types.NodeGPU), - newNodeTwinIDChan: make(chan []uint32), - checkInterval: time.Duration(indexerCheckIntervalMins) * time.Minute, - batchSize: batchSize, - nodesGPUResultsWorkers: nodesGPUResultsWorkers, - nodesGPUBufferWorkers: nodesGPUBufferWorkers, + database: database, + rmbClient: rmbClient, + batchSize: batchSize, + workers: workers, + interval: time.Duration(interval) * time.Minute, + nodeTwinIdsChan: make(chan uint32), + resultChan: make(chan types.NodeGPU), + batchChan: make(chan []types.NodeGPU), } } -func (n *NodeGPUIndexer) queryGridNodes(ctx context.Context) { - ticker := time.NewTicker(n.checkInterval) - n.runQueryGridNodes(ctx) - for { - select { - case <-ticker.C: - n.runQueryGridNodes(ctx) - case twinIDs := <-n.newNodeTwinIDChan: - n.queryNewNodes(ctx, twinIDs) - case <-ctx.Done(): - return - } - } -} +func (n *NodeGPUIndexer) Start(ctx context.Context) { + go n.StartNodeFinder(ctx) + go n.startNodeTableWatcher(ctx) -func (n *NodeGPUIndexer) queryNewNodes(ctx context.Context, twinIDs []uint32) { - for _, twinID := range twinIDs { - err := n.getNodeGPUInfo(ctx, twinID) - log.Error().Err(err).Msgf("failed to send get GPU info request from relay in GPU indexer for node %d", twinID) + for i := uint(0); i < n.workers; i++ { + go n.StartNodeCaller(ctx) } -} -// TODO: use the node in utils -func (n *NodeGPUIndexer) runQueryGridNodes(ctx context.Context) { - status := "up" - filter := types.NodeFilter{ - Status: &status, + for i := uint(0); i < n.workers; i++ { + go n.StartResultBatcher(ctx) } - limit := types.Limit{ - Size: 100, - RetCount: true, - Page: 1, - } + go n.StartBatchUpserter(ctx) +} - hasNext := true - for hasNext { - nodes, err := n.getNodes(ctx, filter, limit) - if err != nil { - log.Error().Err(err).Msg("unable to query nodes in GPU indexer") +func (n *NodeGPUIndexer) StartNodeFinder(ctx context.Context) { + ticker := time.NewTicker(n.interval) + queryUpNodes(ctx, n.database, n.nodeTwinIdsChan) + for { + select { + case <-ticker.C: + queryUpNodes(ctx, n.database, n.nodeTwinIdsChan) + case <-ctx.Done(): return } - - if len(nodes) < int(limit.Size) { - hasNext = false - } - - for _, node := range nodes { - if err := n.getNodeGPUInfo(ctx, uint32(node.TwinID)); err != nil { - log.Error().Err(err).Msgf("failed to send get GPU info request from relay in GPU indexer for node %d", node.NodeID) - } - } - - limit.Page++ } } -func (n *NodeGPUIndexer) getNodeGPUInfo(ctx context.Context, nodeTwinID uint32) error { - subCtx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - - var nodesGPU []types.NodeGPU - err := n.rpcClient.Call(subCtx, nodeTwinID, "zos.gpu.list", nil, &nodesGPU) - if err != nil { - return err - } - log.Debug().Msgf("gpu indexer: %+v", nodesGPU) - - for i := range nodesGPU { - nodesGPU[i].NodeTwinID = nodeTwinID - } - - if len(nodesGPU) != 0 { - n.nodesGPUResultsChan <- nodesGPU - } - - return nil -} - -func (n *NodeGPUIndexer) getNodes(ctx context.Context, filter types.NodeFilter, limit types.Limit) ([]db.Node, error) { - ctx, cancel := context.WithTimeout(ctx, time.Second*30) - defer cancel() - - nodes, _, err := n.db.GetNodes(ctx, filter, limit) +func (n *NodeGPUIndexer) startNodeTableWatcher(ctx context.Context) { + ticker := time.NewTicker(newNodesCheckInterval) + latestCheckedID, err := n.database.GetLastNodeTwinID(ctx) if err != nil { - return nil, err + log.Error().Err(err).Msg("failed to get last node twin id") } - return nodes, nil -} - -func (n *NodeGPUIndexer) discardOldGpus(ctx context.Context, lastAdded uint32, gpuBatch []types.NodeGPU) (uint32, error) { - // invalidate the old indexed GPUs for the same node, - // but check the batch first to ensure it does not contain related GPUs to node twin it from the last batch. + for { + select { + case <-ticker.C: + newIDs, err := n.database.GetNodeTwinIDsAfter(ctx, latestCheckedID) + if err != nil { + log.Error().Err(err).Msgf("failed to get node twin ids after %d", latestCheckedID) + continue + } + if len(newIDs) == 0 { + continue + } - nodeTwinIds := []uint32{} - for _, gpu := range gpuBatch { - if gpu.NodeTwinID == lastAdded { - continue + latestCheckedID = newIDs[0] + for _, id := range newIDs { + n.nodeTwinIdsChan <- id + } + case <-ctx.Done(): + return } - nodeTwinIds = append(nodeTwinIds, gpu.NodeTwinID) - } - - err := n.db.DeleteOldGpus(ctx, nodeTwinIds) - if err != nil { - return 0, fmt.Errorf("failed to delete GPU information in GPU indexer") } - - return gpuBatch[len(gpuBatch)-1].NodeTwinID, nil } -func (n *NodeGPUIndexer) gpuBatchesDBUpserter(ctx context.Context) { - lastAddedGpuNodeTwinId := 0 +func (n *NodeGPUIndexer) StartNodeCaller(ctx context.Context) { for { select { - case gpuBatch := <-n.nodesGPUBatchesChan: - lastAdded, err := n.discardOldGpus(ctx, uint32(lastAddedGpuNodeTwinId), gpuBatch) + case twinId := <-n.nodeTwinIdsChan: + var gpus []types.NodeGPU + err := callNode(ctx, n.rmbClient, gpuListCmd, nil, twinId, &gpus) if err != nil { - log.Error().Err(err).Msg("failed to update GPU info in GPU indexer") continue } - lastAddedGpuNodeTwinId = int(lastAdded) - err = n.db.UpsertNodesGPU(ctx, gpuBatch) - if err != nil { - log.Error().Err(err).Msg("failed to update GPU info in GPU indexer") - continue + + for i := 0; i < len(gpus); i++ { + gpus[i].NodeTwinID = twinId + gpus[i].UpdatedAt = time.Now().Unix() + log.Info().Msgf("%+v", gpus[i]) + n.resultChan <- gpus[i] } case <-ctx.Done(): - log.Error().Err(ctx.Err()).Msg("Nodes GPU DB Upserter exited") return } } } -func (n *NodeGPUIndexer) gpuNodeResultsBatcher(ctx context.Context) { - nodesGPUBuffer := make([]types.NodeGPU, 0, n.batchSize) - ticker := time.NewTicker(lingerBatch) +func (n *NodeGPUIndexer) StartResultBatcher(ctx context.Context) { + buffer := make([]types.NodeGPU, 0, n.batchSize) + + ticker := time.NewTicker(flushingBufferInterval) for { select { - case nodesGPU := <-n.nodesGPUResultsChan: - nodesGPUBuffer = append(nodesGPUBuffer, nodesGPU...) - if len(nodesGPUBuffer) >= int(n.batchSize) { - log.Debug().Msg("flushing gpu indexer buffer") - n.nodesGPUBatchesChan <- nodesGPUBuffer - nodesGPUBuffer = nil + case gpus := <-n.resultChan: + buffer = append(buffer, gpus) + if len(buffer) >= int(n.batchSize) { + n.batchChan <- buffer + buffer = nil } - // This case covers flushing data when the limit for the batch wasn't met case <-ticker.C: - if len(nodesGPUBuffer) != 0 { - log.Debug().Msg("cleaning up gpu indexer buffer") - n.nodesGPUBatchesChan <- nodesGPUBuffer - nodesGPUBuffer = nil + if len(buffer) != 0 { + n.batchChan <- buffer + buffer = nil } case <-ctx.Done(): - log.Error().Err(ctx.Err()).Msg("Node GPU results batcher exited") return } } } - -func (n *NodeGPUIndexer) Start(ctx context.Context) { - for i := uint(0); i < n.nodesGPUResultsWorkers; i++ { - go n.gpuNodeResultsBatcher(ctx) - } - - for i := uint(0); i < n.nodesGPUBufferWorkers; i++ { - go n.gpuBatchesDBUpserter(ctx) - } - - go n.queryGridNodes(ctx) - - go n.watchNodeTable(ctx) - -} - -func (n *NodeGPUIndexer) watchNodeTable(ctx context.Context) { - ticker := time.NewTicker(newNodesCheckInterval) - latestCheckedID, err := n.db.GetLastNodeTwinID(ctx) - if err != nil { - log.Error().Err(err).Msg("failed to get last node twin id") - } +func (n *NodeGPUIndexer) StartBatchUpserter(ctx context.Context) { for { select { - case <-ticker.C: - newIDs, err := n.db.GetNodeTwinIDsAfter(ctx, latestCheckedID) + case batch := <-n.batchChan: + log.Info().Msgf("%+v", batch) + err := discardOldGpus(ctx, n.database, n.interval, batch) if err != nil { - log.Error().Err(err).Msgf("failed to get node twin ids after %d", latestCheckedID) - continue - } - if len(newIDs) == 0 { - continue - } - nodeTwinIDs := make([]uint32, 0) - for _, id := range newIDs { - nodeTwinIDs = append(nodeTwinIDs, uint32(id)) + log.Error().Err(err).Msg("failed to remove old GPUs") } - n.newNodeTwinIDChan <- nodeTwinIDs - latestCheckedID = int64(nodeTwinIDs[0]) + err = n.database.UpsertNodesGPU(ctx, batch) + if err != nil { + log.Error().Err(err).Msg("failed to upsert new GPUs") + } case <-ctx.Done(): return } } } + +func discardOldGpus(ctx context.Context, database db.Database, interval time.Duration, gpuBatch []types.NodeGPU) error { + // invalidate the old indexed GPUs for the same node, + // but check the batch first to ensure it does not contain related GPUs to node twin it from the last batch. + // TODO: if timestamp > 1 + nodeTwinIds := []uint32{} + for _, gpu := range gpuBatch { + nodeTwinIds = append(nodeTwinIds, gpu.NodeTwinID) + } + + expiration := time.Now().Unix() - int64(interval.Seconds()) + return database.DeleteOldGpus(ctx, nodeTwinIds, expiration) +} diff --git a/grid-proxy/internal/indexer/health.go b/grid-proxy/internal/indexer/health.go index dacebc2a3..72e30d792 100644 --- a/grid-proxy/internal/indexer/health.go +++ b/grid-proxy/internal/indexer/health.go @@ -15,85 +15,108 @@ const ( ) type NodeHealthIndexer struct { - db db.Database - relayClient *peer.RpcClient + database db.Database + rmbClient *peer.RpcClient nodeTwinIdsChan chan uint32 + resultChan chan types.HealthReport + batchChan chan []types.HealthReport indexerInterval time.Duration indexerWorkers uint + batchSize uint } func NewNodeHealthIndexer( - ctx context.Context, rpcClient *peer.RpcClient, - db db.Database, + database db.Database, + batchSize uint, indexerWorkers uint, indexerInterval uint, ) *NodeHealthIndexer { return &NodeHealthIndexer{ - db: db, - relayClient: rpcClient, + database: database, + rmbClient: rpcClient, nodeTwinIdsChan: make(chan uint32), + resultChan: make(chan types.HealthReport), + batchChan: make(chan []types.HealthReport), + batchSize: batchSize, indexerWorkers: indexerWorkers, indexerInterval: time.Duration(indexerInterval) * time.Minute, } } func (c *NodeHealthIndexer) Start(ctx context.Context) { + go c.StartNodeFinder(ctx) - // start the node querier, push twin-ids into chan - go c.startNodeQuerier(ctx) + for i := uint(0); i < c.indexerWorkers; i++ { + go c.StartNodeCaller(ctx) + } - // start the health indexer workers, pop from twin-ids chan and update the db for i := uint(0); i < c.indexerWorkers; i++ { - go c.checkNodeHealth(ctx) + go c.StartResultBatcher(ctx) } + go c.StartBatchUpserter(ctx) } -func (c *NodeHealthIndexer) startNodeQuerier(ctx context.Context) { +func (c *NodeHealthIndexer) StartNodeFinder(ctx context.Context) { ticker := time.NewTicker(c.indexerInterval) - c.queryHealthyNodes(ctx) - queryUpNodes(ctx, c.db, c.nodeTwinIdsChan) + + queryHealthyNodes(ctx, c.database, c.nodeTwinIdsChan) // to revalidate the reports if node went down + queryUpNodes(ctx, c.database, c.nodeTwinIdsChan) for { select { case <-ticker.C: - c.queryHealthyNodes(ctx) - queryUpNodes(ctx, c.db, c.nodeTwinIdsChan) + queryHealthyNodes(ctx, c.database, c.nodeTwinIdsChan) + queryUpNodes(ctx, c.database, c.nodeTwinIdsChan) case <-ctx.Done(): return } } } -// to revalidate the reports -func (c *NodeHealthIndexer) queryHealthyNodes(ctx context.Context) { - ids, err := c.db.GetHealthyNodeTwinIds(ctx) - if err != nil { - log.Error().Err(err).Msg("failed to query healthy nodes") +func (c *NodeHealthIndexer) StartNodeCaller(ctx context.Context) { + for { + select { + case twinId := <-c.nodeTwinIdsChan: + var response types.HealthReport + err := callNode(ctx, c.rmbClient, healthCallCmd, nil, twinId, &response) + c.resultChan <- getHealthReport(response, err, twinId) + case <-ctx.Done(): + return + } } +} - for _, id := range ids { - c.nodeTwinIdsChan <- uint32(id) +func (c *NodeHealthIndexer) StartResultBatcher(ctx context.Context) { + buffer := make([]types.HealthReport, 0, c.batchSize) + + ticker := time.NewTicker(flushingBufferInterval) + for { + select { + case report := <-c.resultChan: + buffer = append(buffer, report) + if len(buffer) >= int(c.batchSize) { + c.batchChan <- buffer + buffer = nil + } + case <-ticker.C: + if len(buffer) != 0 { + c.batchChan <- buffer + buffer = nil + } + case <-ctx.Done(): + return + } } } -func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { - var result interface{} +func (c *NodeHealthIndexer) StartBatchUpserter(ctx context.Context) { for { select { - case twinId := <-c.nodeTwinIdsChan: - subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) - err := c.relayClient.Call(subCtx, twinId, healthCallCmd, nil, &result) - cancel() - - healthReport := types.HealthReport{ - NodeTwinId: twinId, - Healthy: isHealthy(err), - } - // TODO: separate this on a different channel - err = c.db.UpsertNodeHealth(ctx, healthReport) + case batch := <-c.batchChan: + err := c.database.UpsertNodeHealth(ctx, batch) if err != nil { - log.Error().Err(err).Msgf("failed to update health report for node with twin id %d", twinId) + log.Error().Err(err).Msg("failed to upsert node health") } case <-ctx.Done(): return @@ -101,6 +124,16 @@ func (c *NodeHealthIndexer) checkNodeHealth(ctx context.Context) { } } -func isHealthy(err error) bool { - return err == nil +func getHealthReport(response interface{}, err error, twinId uint32) types.HealthReport { + report := types.HealthReport{ + NodeTwinId: twinId, + Healthy: false, + } + + if err != nil { + return report + } + + report.Healthy = true + return report } diff --git a/grid-proxy/internal/indexer/indexer.go b/grid-proxy/internal/indexer/indexer.go deleted file mode 100644 index 8535f2657..000000000 --- a/grid-proxy/internal/indexer/indexer.go +++ /dev/null @@ -1,54 +0,0 @@ -package indexer - -import ( - "context" - "time" - - "github.com/rs/zerolog/log" - "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" -) - -const ( - indexerCallTimeout = 10 * time.Second -) - -type Watcher interface { - Start(ctx context.Context) -} - -type Indexer struct { - Watchers map[string]Watcher - Paused bool - Context context.Context - RmbClient *peer.RpcClient -} - -func NewIndexer( - ctx context.Context, - paused bool, - rmbClient *peer.RpcClient, -) *Indexer { - return &Indexer{ - Watchers: make(map[string]Watcher), - Paused: paused, - Context: ctx, - RmbClient: rmbClient, - } -} - -func (i *Indexer) RegisterWatcher(name string, watcher Watcher) { - i.Watchers[name] = watcher -} - -func (i *Indexer) Start() { - if i.Paused { - log.Info().Msg("Indexer paused") - return - } - - log.Info().Msg("Starting indexer...") - for name, watcher := range i.Watchers { - watcher.Start(i.Context) - log.Info().Msgf("%s watcher started", name) - } -} diff --git a/grid-proxy/internal/indexer/manager.go b/grid-proxy/internal/indexer/manager.go new file mode 100644 index 000000000..65e3b3405 --- /dev/null +++ b/grid-proxy/internal/indexer/manager.go @@ -0,0 +1,48 @@ +package indexer + +import ( + "context" + "time" + + "github.com/rs/zerolog/log" +) + +const ( + indexerCallTimeout = 30 * time.Second // rmb calls timeout + flushingBufferInterval = 60 * time.Second // upsert buffer in db if it didn't reach the batch size + newNodesCheckInterval = 5 * time.Minute +) + +type Indexer interface { + Start(ctx context.Context) + StartNodeFinder(ctx context.Context) + StartNodeCaller(ctx context.Context) + StartResultBatcher(ctx context.Context) + StartBatchUpserter(ctx context.Context) +} + +type Manager struct { + Indexers map[string]Indexer + Context context.Context +} + +func NewManager( + ctx context.Context, +) *Manager { + return &Manager{ + Indexers: make(map[string]Indexer), + Context: ctx, + } +} + +func (m *Manager) Register(name string, indexer Indexer) { + m.Indexers[name] = indexer +} + +func (m *Manager) Start() { + log.Info().Msg("Starting indexers manager...") + for name, watcher := range m.Indexers { + watcher.Start(m.Context) + log.Info().Msgf("%s indexer started", name) + } +} diff --git a/grid-proxy/internal/indexer/network_speed.go b/grid-proxy/internal/indexer/network_speed.go deleted file mode 100644 index 57989c271..000000000 --- a/grid-proxy/internal/indexer/network_speed.go +++ /dev/null @@ -1,144 +0,0 @@ -package indexer - -import ( - "context" - "time" - - "github.com/rs/zerolog/log" - "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" - "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" - "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" -) - -const ( - perfTestCallCmd = "zos.perf.get" - testName = "iperf" - speedBatchSize = 20 -) - -type SpeedWatcher struct { - database db.Database - rmbClient *peer.RpcClient - nodeTwinIdsChan chan uint32 - resultChan chan types.NetworkTestResult - interval time.Duration - workers uint - batchSize uint -} - -func NewSpeedWatcher( - ctx context.Context, - database db.Database, - rmbClient *peer.RpcClient, - interval uint, - workers uint, -) *SpeedWatcher { - return &SpeedWatcher{ - database: database, - rmbClient: rmbClient, - nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan types.NetworkTestResult), - interval: time.Duration(interval) * time.Minute, - workers: workers, - batchSize: speedBatchSize, - } -} - -func (w *SpeedWatcher) Start(ctx context.Context) { - go w.startNodeQuerier(ctx) - - for i := uint(0); i < w.workers; i++ { - go w.startNodeCaller(ctx) - } - - go w.startUpserter(ctx, w.database) -} - -func (w *SpeedWatcher) startNodeQuerier(ctx context.Context) { - ticker := time.NewTicker(w.interval) - queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) - for { - select { - case <-ticker.C: - queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) - case <-ctx.Done(): - return - } - } -} - -func (w *SpeedWatcher) startNodeCaller(ctx context.Context) { - for { - select { - case twinId := <-w.nodeTwinIdsChan: - response, err := w.callNode(ctx, twinId) - if err != nil { - continue - } - parsed := parse(response, twinId) - w.resultChan <- parsed - case <-ctx.Done(): - return - } - } -} - -func (w *SpeedWatcher) callNode(ctx context.Context, twinId uint32) (types.PerfResult, error) { - var result types.PerfResult - subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) - defer cancel() - - payload := struct { - Name string - }{ - Name: testName, - } - err := w.rmbClient.Call(subCtx, twinId, perfTestCallCmd, payload, &result) - if err != nil { - log.Error().Err(err).Uint32("twinId", twinId).Msg("failed to call node") - } - - return result, err -} - -func (w *SpeedWatcher) startUpserter(ctx context.Context, database db.Database) { - buffer := make([]types.NetworkTestResult, 0, w.batchSize) - - ticker := time.NewTicker(flushingInterval) - for { - select { - case report := <-w.resultChan: - buffer = append(buffer, report) - if len(buffer) >= int(w.batchSize) { - err := w.database.UpsertNetworkSpeed(ctx, buffer) - if err != nil { - log.Error().Err(err) - } - buffer = nil - } - case <-ticker.C: - if len(buffer) != 0 { - err := w.database.UpsertNetworkSpeed(ctx, buffer) - if err != nil { - log.Error().Err(err) - } - buffer = nil - } - case <-ctx.Done(): - return - } - } -} - -func parse(res types.PerfResult, twinId uint32) types.NetworkTestResult { - // TODO: better parsing - // we have four speeds tcp/udp for ipv4/ipv6. - // now, we just pick the first non-zero - for _, report := range res.Result { - if report.DownloadSpeed != 0 { - report.NodeTwinId = twinId - return report - } - } - return types.NetworkTestResult{} -} diff --git a/grid-proxy/internal/indexer/speed.go b/grid-proxy/internal/indexer/speed.go new file mode 100644 index 000000000..105333b21 --- /dev/null +++ b/grid-proxy/internal/indexer/speed.go @@ -0,0 +1,169 @@ +package indexer + +import ( + "context" + "encoding/json" + "time" + + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" + "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" + + zosPerfPkg "github.com/threefoldtech/zos/pkg/perf" + zosIPerfPkg "github.com/threefoldtech/zos/pkg/perf/iperf" +) + +const ( + perfTestCallCmd = "zos.perf.get" + testName = "iperf" +) + +type SpeedIndexer struct { + database db.Database + rmbClient *peer.RpcClient + interval time.Duration + workers uint + batchSize uint + nodeTwinIdsChan chan uint32 + resultChan chan types.Speed + batchChan chan []types.Speed +} + +func NewSpeedIndexer( + rmbClient *peer.RpcClient, + database db.Database, + batchSize uint, + interval uint, + workers uint, +) *SpeedIndexer { + return &SpeedIndexer{ + database: database, + rmbClient: rmbClient, + batchSize: batchSize, + interval: time.Duration(interval) * time.Minute, + workers: workers, + nodeTwinIdsChan: make(chan uint32), + resultChan: make(chan types.Speed), + batchChan: make(chan []types.Speed), + } +} + +func (w *SpeedIndexer) Start(ctx context.Context) { + go w.StartNodeFinder(ctx) + + for i := uint(0); i < w.workers; i++ { + go w.StartNodeCaller(ctx) + } + + for i := uint(0); i < w.workers; i++ { + go w.StartResultBatcher(ctx) + } + + go w.StartBatchUpserter(ctx) +} + +func (w *SpeedIndexer) StartNodeFinder(ctx context.Context) { + ticker := time.NewTicker(w.interval) + queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) + for { + select { + case <-ticker.C: + queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) + case <-ctx.Done(): + return + } + } +} + +func (w *SpeedIndexer) StartNodeCaller(ctx context.Context) { + for { + select { + case twinId := <-w.nodeTwinIdsChan: + payload := struct { + Name string + }{ + Name: testName, + } + var response zosPerfPkg.TaskResult + if err := callNode(ctx, w.rmbClient, perfTestCallCmd, payload, twinId, &response); err != nil { + continue + } + + speedReport, err := parseSpeed(response, twinId) + if err != nil { + continue + } + + w.resultChan <- speedReport + case <-ctx.Done(): + return + } + } +} + +func (w *SpeedIndexer) StartResultBatcher(ctx context.Context) { + buffer := make([]types.Speed, 0, w.batchSize) + + ticker := time.NewTicker(flushingBufferInterval) + for { + select { + case report := <-w.resultChan: + buffer = append(buffer, report) + if len(buffer) >= int(w.batchSize) { + w.batchChan <- buffer + buffer = nil + } + case <-ticker.C: + if len(buffer) != 0 { + w.batchChan <- buffer + buffer = nil + } + case <-ctx.Done(): + return + } + } +} + +func (w *SpeedIndexer) StartBatchUpserter(ctx context.Context) { + for { + select { + case batch := <-w.batchChan: + err := w.database.UpsertNetworkSpeed(ctx, batch) + if err != nil { + log.Error().Err(err).Msg("failed to upsert network speed") + } + case <-ctx.Done(): + return + } + } +} + +func parseSpeed(res zosPerfPkg.TaskResult, twinId uint32) (types.Speed, error) { + speed := types.Speed{ + NodeTwinId: twinId, + } + + iperfResultBytes, err := json.Marshal(res.Result) + if err != nil { + return speed, err + } + + var iperfResults []zosIPerfPkg.IperfResult + if err := json.Unmarshal(iperfResultBytes, &iperfResults); err != nil { + return speed, err + } + + // TODO: better parsing + // we have four speeds tcp/udp for ipv4/ipv6. + // now, we just pick the first non-zero + for _, report := range iperfResults { + if report.DownloadSpeed != 0 { + speed.Download = report.DownloadSpeed + speed.Upload = report.UploadSpeed + return speed, nil + } + } + + return speed, nil +} diff --git a/grid-proxy/internal/indexer/types.go b/grid-proxy/internal/indexer/types.go deleted file mode 100644 index f861c4684..000000000 --- a/grid-proxy/internal/indexer/types.go +++ /dev/null @@ -1,47 +0,0 @@ -package indexer - -// DMI represents a map of SectionTypeStr to Section parsed from dmidecode output, -// as well as information about the tool used to get these sections -// Property in section is in the form of key value pairs where values are optional -// and may include a list of items as well. -// k: [v] -// -// [ -// item1 -// item2 -// ... -// ] -type DMI struct { - Tooling Tooling `json:"tooling"` - Sections []Section `json:"sections"` -} - -// Tooling holds the information and version about the tool used to -// read DMI information -type Tooling struct { - Aggregator string `json:"aggregator"` - Decoder string `json:"decoder"` -} - -// Section represents a complete section like BIOS or Baseboard -type Section struct { - HandleLine string `json:"handleline"` - TypeStr string `json:"typestr,omitempty"` - Type Type `json:"typenum"` - SubSections []SubSection `json:"subsections"` -} - -// Type (allowed types 0 -> 42) -type Type int - -// SubSection represents part of a section, identified by a title -type SubSection struct { - Title string `json:"title"` - Properties map[string]PropertyData `json:"properties,omitempty"` -} - -// PropertyData represents a key value pair with optional list of items -type PropertyData struct { - Val string `json:"value"` - Items []string `json:"items,omitempty"` -} diff --git a/grid-proxy/internal/indexer/utils.go b/grid-proxy/internal/indexer/utils.go index 5071a1c61..d4c156e0c 100644 --- a/grid-proxy/internal/indexer/utils.go +++ b/grid-proxy/internal/indexer/utils.go @@ -6,6 +6,7 @@ import ( "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" + "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" ) func queryUpNodes(ctx context.Context, database db.Database, nodeTwinIdChan chan uint32) { @@ -32,3 +33,26 @@ func queryUpNodes(ctx context.Context, database db.Database, nodeTwinIdChan chan limit.Page++ } } + +func queryHealthyNodes(ctx context.Context, database db.Database, nodeTwinIdChan chan uint32) { + ids, err := database.GetHealthyNodeTwinIds(ctx) + if err != nil { + log.Error().Err(err).Msg("failed to query healthy nodes") + } + + for _, id := range ids { + nodeTwinIdChan <- id + } +} + +func callNode(ctx context.Context, rmbClient *peer.RpcClient, cmd string, payload interface{}, twinId uint32, result interface{}) error { + subCtx, cancel := context.WithTimeout(ctx, indexerCallTimeout) + defer cancel() + + err := rmbClient.Call(subCtx, twinId, cmd, payload, result) + if err != nil { + log.Error().Err(err).Uint32("twinId", twinId).Msg("failed to call node") + } + + return err +} diff --git a/grid-proxy/pkg/types/indexer.go b/grid-proxy/pkg/types/indexer.go index a9e661002..26bbf3cc3 100644 --- a/grid-proxy/pkg/types/indexer.go +++ b/grid-proxy/pkg/types/indexer.go @@ -1,32 +1,55 @@ package types -import ( - "database/sql/driver" - "encoding/json" - "errors" -) +// NodeGpu holds info for a node gpu +// used as both gorm model and server json response +type NodeGPU struct { + NodeTwinID uint32 `gorm:"uniqueIndex:uni_gpu_node_twin_id" json:"node_twin_id,omitempty" ` + ID string `gorm:"uniqueIndex:uni_gpu_node_twin_id" json:"id"` + Vendor string `json:"vendor"` + Device string `json:"device"` + Contract int `json:"contract"` + UpdatedAt int64 `json:"updated_at"` +} + +func (NodeGPU) TableName() string { + return "node_gpu" +} -// TODO: create new db_types pkg different than the server ones in pkg/types -// it will be easier to have a single type that implement Scanner/Valuer when needed -// will be used in: internal/explorer/db/types.go and tools/db/crafter/ and tests/mock_client +// HealthReport holds the state of node healthiness +// used as gorm model +type HealthReport struct { + NodeTwinId uint32 `gorm:"unique;not null"` + Healthy bool +} -type NetworkTestResult struct { - NodeTwinId uint32 `json:"node_twin_id" gorm:"unique;not null"` - UploadSpeed float64 `json:"upload_speed"` // in bit/sec - DownloadSpeed float64 `json:"download_speed"` // in bit/sec +func (HealthReport) TableName() string { + return "health_report" } -type PerfResult struct { - NodeTwinId uint32 `json:"node_twin_id"` - Result []NetworkTestResult `json:"result"` +// Speed holds upload/download speeds in `bit/sec` for a node +// used as both gorm model and server json response +type Speed struct { + NodeTwinId uint32 `json:"node_twin_id,omitempty" gorm:"unique;not null"` + Upload float64 `json:"upload"` // in bit/sec + Download float64 `json:"download"` // in bit/sec } -type DmiInfo struct { - NodeTwinId uint32 `json:"node_twin_id,omitempty" gorm:"unique;not null"` - BIOS BIOS `json:"bios" gorm:"type:jsonb"` - Baseboard Baseboard `json:"baseboard" gorm:"type:jsonb"` - Processor Processors `json:"processor" gorm:"type:jsonb"` - Memory Memories `json:"memory" gorm:"type:jsonb"` +func (Speed) TableName() string { + return "speed" +} + +// Dmi holds hardware dmi info for a node +// used as both gorm model and server json response +type Dmi struct { + NodeTwinId uint32 `json:"node_twin_id,omitempty" gorm:"unique;not null"` + BIOS BIOS `json:"bios" gorm:"type:jsonb;serializer:json"` + Baseboard Baseboard `json:"baseboard" gorm:"type:jsonb;serializer:json"` + Processor []Processor `json:"processor" gorm:"type:jsonb;serializer:json"` + Memory []Memory `json:"memory" gorm:"type:jsonb;serializer:json"` +} + +func (Dmi) TableName() string { + return "dmi" } type BIOS struct { @@ -43,81 +66,8 @@ type Processor struct { Version string `json:"version"` ThreadCount string `json:"thread_count"` } -type Processors []Processor type Memory struct { Manufacturer string `json:"manufacturer"` Type string `json:"type"` } -type Memories []Memory - -/* -GORM directly maps the structs to tables. These structs can contain fields with basic Go types, -pointers or aliases of these types, or even custom types, as long as they implement the Scanner -and Valuer interfaces from the database/sql package. - -Notes: - - For simple types like the BIOS struct, we can directly implement the Scan/Value methods. - However, for types like []Processor, we need to create an alias, Processors, - so we have method receivers. -*/ - -func (c Processors) Value() (driver.Value, error) { - return json.Marshal(c) -} - -func (c *Processors) Scan(value interface{}) error { - if value == nil { - return nil - } - bytes, ok := value.([]byte) - if !ok { - return errors.New("Invalid data type for Processor") - } - return json.Unmarshal(bytes, &c) -} - -func (c Memories) Value() (driver.Value, error) { - return json.Marshal(c) -} - -func (c *Memories) Scan(value interface{}) error { - if value == nil { - return nil - } - bytes, ok := value.([]byte) - if !ok { - return errors.New("Invalid data type for Processor") - } - return json.Unmarshal(bytes, &c) -} - -func (c *BIOS) Value() (driver.Value, error) { - return json.Marshal(c) -} - -func (c *BIOS) Scan(value interface{}) error { - if value == nil { - return nil - } - bytes, ok := value.([]byte) - if !ok { - return errors.New("Invalid data type for Processor") - } - return json.Unmarshal(bytes, &c) -} - -func (c *Baseboard) Value() (driver.Value, error) { - return json.Marshal(c) -} - -func (c *Baseboard) Scan(value interface{}) error { - if value == nil { - return nil - } - bytes, ok := value.([]byte) - if !ok { - return errors.New("Invalid data type for Processor") - } - return json.Unmarshal(bytes, &c) -} diff --git a/grid-proxy/pkg/types/nodes.go b/grid-proxy/pkg/types/nodes.go index 528f422df..6b568f0ea 100644 --- a/grid-proxy/pkg/types/nodes.go +++ b/grid-proxy/pkg/types/nodes.go @@ -45,7 +45,7 @@ type Node struct { NumGPU int `json:"num_gpu" sort:"num_gpu"` ExtraFee uint64 `json:"extraFee" sort:"extra_fee"` Healthy bool `json:"healthy"` - Dmi DmiInfo `json:"dmi"` + Dmi Dmi `json:"dmi"` Speed Speed `json:"speed"` } @@ -55,11 +55,6 @@ type CapacityResult struct { Used Capacity `json:"used_resources"` } -type Speed struct { - Upload float64 `json:"upload"` - Download float64 `json:"download"` -} - // Node to be compatible with old view type NodeWithNestedCapacity struct { ID string `json:"id"` @@ -88,7 +83,7 @@ type NodeWithNestedCapacity struct { NumGPU int `json:"num_gpu"` ExtraFee uint64 `json:"extraFee"` Healthy bool `json:"healthy"` - Dmi DmiInfo `json:"dmi"` + Dmi Dmi `json:"dmi"` Speed Speed `json:"speed"` } @@ -150,18 +145,3 @@ type NodeFilter struct { Healthy *bool `schema:"healthy,omitempty"` Excluded []uint64 `schema:"excluded,omitempty"` } - -// NodeGPU holds the info about gpu card -type NodeGPU struct { - NodeTwinID uint32 `json:"node_twin_id"` - ID string `json:"id"` - Vendor string `json:"vendor"` - Device string `json:"device"` - Contract int `json:"contract"` -} - -// HeathReport holds the info of node health -type HealthReport struct { - NodeTwinId uint32 - Healthy bool -} diff --git a/grid-proxy/tests/queries/mock_client/counters.go b/grid-proxy/tests/queries/mock_client/counters.go index 1870757bc..1fae7e1ce 100644 --- a/grid-proxy/tests/queries/mock_client/counters.go +++ b/grid-proxy/tests/queries/mock_client/counters.go @@ -35,7 +35,7 @@ func (g *GridProxyMockClient) Stats(ctx context.Context, filter types.StatsFilte res.Gateways++ } } - if _, ok := g.data.GPUs[node.TwinID]; ok { + if _, ok := g.data.GPUs[uint32(node.TwinID)]; ok { gpus++ } if isDedicatedNode(g.data, node) { diff --git a/grid-proxy/tests/queries/mock_client/farms.go b/grid-proxy/tests/queries/mock_client/farms.go index 5409c5a2c..96b625c87 100644 --- a/grid-proxy/tests/queries/mock_client/farms.go +++ b/grid-proxy/tests/queries/mock_client/farms.go @@ -153,7 +153,7 @@ func (f *Farm) satisfyFarmNodesFilter(data *DBData, filter types.FarmFilter) boo continue } - _, ok := data.GPUs[node.TwinID] + _, ok := data.GPUs[uint32(node.TwinID)] if filter.NodeHasGPU != nil && ok != *filter.NodeHasGPU { continue } diff --git a/grid-proxy/tests/queries/mock_client/loader.go b/grid-proxy/tests/queries/mock_client/loader.go index a5d6d0ebe..6a287a711 100644 --- a/grid-proxy/tests/queries/mock_client/loader.go +++ b/grid-proxy/tests/queries/mock_client/loader.go @@ -34,12 +34,12 @@ type DBData struct { BillReports uint32 ContractResources map[string]ContractResources NonDeletedContracts map[uint64][]uint64 - GPUs map[uint64][]NodeGPU + GPUs map[uint32][]types.NodeGPU Regions map[string]string Locations map[string]Location - HealthReports map[uint64]bool - DMIs map[uint32]types.DmiInfo - Speeds map[uint32]types.NetworkTestResult + HealthReports map[uint32]bool + DMIs map[uint32]types.Dmi + Speeds map[uint32]types.Speed DB *sql.DB } @@ -571,7 +571,7 @@ func loadNodeGPUs(db *sql.DB, data *DBData) error { return err } for rows.Next() { - var gpu NodeGPU + var gpu types.NodeGPU if err := rows.Scan( &gpu.ID, &gpu.Contract, @@ -597,7 +597,7 @@ func loadHealthReports(db *sql.DB, data *DBData) error { return err } for rows.Next() { - var health HealthReport + var health types.HealthReport if err := rows.Scan( &health.NodeTwinId, &health.Healthy, @@ -619,12 +619,12 @@ func loadDMIs(db *sql.DB, data *DBData) error { processor, memory FROM - dmi_infos;`) + dmi;`) if err != nil { return err } for rows.Next() { - var dmi types.DmiInfo + var dmi types.Dmi if err := rows.Scan( &dmi.NodeTwinId, &dmi.BIOS, @@ -646,19 +646,19 @@ func loadSpeeds(db *sql.DB, data *DBData) error { rows, err := db.Query(` SELECT node_twin_id, - upload_speed, - download_speed + upload, + download FROM - network_test_results;`) + speed;`) if err != nil { return err } for rows.Next() { - var speed types.NetworkTestResult + var speed types.Speed if err := rows.Scan( &speed.NodeTwinId, - &speed.UploadSpeed, - &speed.DownloadSpeed, + &speed.Upload, + &speed.Download, ); err != nil { return err } @@ -690,13 +690,13 @@ func Load(db *sql.DB) (DBData, error) { NodeTotalResources: make(map[uint64]NodeResourcesTotal), NodeUsedResources: make(map[uint64]NodeResourcesTotal), NonDeletedContracts: make(map[uint64][]uint64), - GPUs: make(map[uint64][]NodeGPU), + GPUs: make(map[uint32][]types.NodeGPU), FarmHasRentedNode: make(map[uint64]map[uint64]bool), Regions: make(map[string]string), Locations: make(map[string]Location), - HealthReports: make(map[uint64]bool), - DMIs: make(map[uint32]types.DmiInfo), - Speeds: make(map[uint32]types.NetworkTestResult), + HealthReports: make(map[uint32]bool), + DMIs: make(map[uint32]types.Dmi), + Speeds: make(map[uint32]types.Speed), DB: db, } if err := loadNodes(db, &data); err != nil { diff --git a/grid-proxy/tests/queries/mock_client/nodes.go b/grid-proxy/tests/queries/mock_client/nodes.go index 7821ba59e..5335ce4f3 100644 --- a/grid-proxy/tests/queries/mock_client/nodes.go +++ b/grid-proxy/tests/queries/mock_client/nodes.go @@ -29,7 +29,7 @@ func (g *GridProxyMockClient) Nodes(ctx context.Context, filter types.NodeFilter } for _, node := range g.data.Nodes { if node.satisfies(filter, &g.data) { - numGPU := len(g.data.GPUs[node.TwinID]) + numGPU := len(g.data.GPUs[uint32(node.TwinID)]) nodePower := types.NodePower{ State: node.Power.State, @@ -87,11 +87,11 @@ func (g *GridProxyMockClient) Nodes(ctx context.Context, filter types.NodeFilter }, NumGPU: numGPU, ExtraFee: node.ExtraFee, - Healthy: g.data.HealthReports[node.TwinID], + Healthy: g.data.HealthReports[uint32(node.TwinID)], Dmi: g.data.DMIs[uint32(node.TwinID)], Speed: types.Speed{ - Upload: g.data.Speeds[uint32(node.TwinID)].UploadSpeed, - Download: g.data.Speeds[uint32(node.TwinID)].DownloadSpeed, + Upload: g.data.Speeds[uint32(node.TwinID)].Upload, + Download: g.data.Speeds[uint32(node.TwinID)].Download, }, }) } @@ -119,7 +119,7 @@ func (g *GridProxyMockClient) Node(ctx context.Context, nodeID uint32) (res type return res, fmt.Errorf("node not found") } - numGPU := len(g.data.GPUs[node.TwinID]) + numGPU := len(g.data.GPUs[uint32(node.TwinID)]) nodePower := types.NodePower{ State: node.Power.State, @@ -179,11 +179,11 @@ func (g *GridProxyMockClient) Node(ctx context.Context, nodeID uint32) (res type }, NumGPU: numGPU, ExtraFee: node.ExtraFee, - Healthy: g.data.HealthReports[node.TwinID], + Healthy: g.data.HealthReports[uint32(node.TwinID)], Dmi: g.data.DMIs[uint32(node.TwinID)], Speed: types.Speed{ - Upload: g.data.Speeds[uint32(node.TwinID)].UploadSpeed, - Download: g.data.Speeds[uint32(node.TwinID)].DownloadSpeed, + Upload: g.data.Speeds[uint32(node.TwinID)].Upload, + Download: g.data.Speeds[uint32(node.TwinID)].Download, }, } return @@ -225,7 +225,7 @@ func (n *Node) satisfies(f types.NodeFilter, data *DBData) bool { return false } - if f.Healthy != nil && *f.Healthy != data.HealthReports[n.TwinID] { + if f.Healthy != nil && *f.Healthy != data.HealthReports[uint32(n.TwinID)] { return false } @@ -348,7 +348,7 @@ func (n *Node) satisfies(f types.NodeFilter, data *DBData) bool { } foundGpuFilter := f.HasGPU != nil || f.GpuDeviceName != nil || f.GpuVendorName != nil || f.GpuVendorID != nil || f.GpuDeviceID != nil || f.GpuAvailable != nil - gpus, foundGpuCards := data.GPUs[n.TwinID] + gpus, foundGpuCards := data.GPUs[uint32(n.TwinID)] if !foundGpuCards && foundGpuFilter { return false @@ -372,7 +372,7 @@ func (n *Node) satisfies(f types.NodeFilter, data *DBData) bool { return true } -func gpuSatisfied(gpu NodeGPU, f types.NodeFilter) bool { +func gpuSatisfied(gpu types.NodeGPU, f types.NodeFilter) bool { if f.GpuDeviceName != nil && !contains(gpu.Device, *f.GpuDeviceName) { return false } diff --git a/grid-proxy/tests/queries/mock_client/types.go b/grid-proxy/tests/queries/mock_client/types.go index ffbe0aa9d..ef9764ca8 100644 --- a/grid-proxy/tests/queries/mock_client/types.go +++ b/grid-proxy/tests/queries/mock_client/types.go @@ -1,11 +1,6 @@ // nolint package mock -import ( - "encoding/json" - "fmt" -) - // TODO: the one in tools/db/types.go is unexported but it's the same file type ContractResources struct { @@ -46,7 +41,7 @@ type Node struct { CreatedAt uint64 UpdatedAt uint64 LocationID string - Power NodePower `gorm:"type:jsonb"` + Power NodePower `gorm:"type:jsonb;serializer:json"` HasGPU bool ExtraFee uint64 Dedicated bool @@ -57,17 +52,6 @@ type NodePower struct { Target string `json:"target"` } -// Scan is a custom decoder for jsonb filed. executed while scanning the node. -func (np *NodePower) Scan(value interface{}) error { - if value == nil { - return nil - } - if data, ok := value.([]byte); ok { - return json.Unmarshal(data, np) - } - return fmt.Errorf("failed to unmarshal NodePower") -} - type Twin struct { ID string GridVersion uint64 @@ -141,14 +125,6 @@ type NameContract struct { CreatedAt uint64 } -type NodeGPU struct { - NodeTwinID uint64 - ID string - Vendor string - Device string - Contract int -} - type HealthReport struct { NodeTwinId uint64 Healthy bool diff --git a/grid-proxy/tools/db/crafter/generator.go b/grid-proxy/tools/db/crafter/generator.go index a4bd20594..63e552729 100644 --- a/grid-proxy/tools/db/crafter/generator.go +++ b/grid-proxy/tools/db/crafter/generator.go @@ -90,7 +90,6 @@ func (c *Crafter) GenerateNodes() error { powerState := []string{"Up", "Down"} var locations []string - var healthReports []string var nodes []string var totalResources []string var publicConfigs []string @@ -138,7 +137,7 @@ func (c *Crafter) GenerateNodes() error { c.nodesHRU[i] = hru c.nodeUP[i] = up - // location latitude and longitue needs to be castable to decimal + // location latitude and longitude needs to be castable to decimal // if not, the convert_to_decimal function will raise a notice // reporting the incident, which downgrades performance locationId := fmt.Sprintf("location-%d", uint64(start)+i) @@ -189,15 +188,6 @@ func (c *Crafter) GenerateNodes() error { node_id: fmt.Sprintf("node-%d", i), } - health := true - if flip(.5) { - health = false - } - healthReport := health_report{ - node_twin_id: node.twin_id, - healthy: health, - } - if _, ok := c.dedicatedFarms[node.farm_id]; ok { c.availableRentNodes[i] = struct{}{} c.availableRentNodesList = append(c.availableRentNodesList, i) @@ -209,12 +199,6 @@ func (c *Crafter) GenerateNodes() error { } locations = append(locations, locationTuple) - healthTuple, err := objectToTupleString(healthReport) - if err != nil { - return fmt.Errorf("failed to convert health report to tuple string: %w", err) - } - healthReports = append(healthReports, healthTuple) - nodeTuple, err := objectToTupleString(node) if err != nil { return fmt.Errorf("failed to convert node object to tuple string: %w", err) @@ -250,10 +234,6 @@ func (c *Crafter) GenerateNodes() error { return fmt.Errorf("failed to insert locations: %w", err) } - if err := c.insertTuples(health_report{}, healthReports); err != nil { - return fmt.Errorf("failed to insert health reports: %w", err) - } - if err := c.insertTuples(node{}, nodes); err != nil { return fmt.Errorf("failed to insert nodes: %w", err) } @@ -792,7 +772,7 @@ func (c *Crafter) updateNodeContractPublicIPs(nodeContracts []uint64) error { } func (c *Crafter) GenerateNodeGPUs() error { - var GPUs []string + var gpus []string vendors := []string{"NVIDIA Corporation", "AMD", "Intel Corporation"} devices := []string{"GeForce RTX 3080", "Radeon RX 6800 XT", "Intel Iris Xe MAX"} @@ -803,7 +783,6 @@ func (c *Crafter) GenerateNodeGPUs() error { gpuNum := len(vendors) - 1 for j := 0; j <= gpuNum; j++ { g := node_gpu{ - // WATCH node_twin_id: uint64(nodeTwinsStart + uint(i)), vendor: vendors[j], device: devices[j], @@ -814,11 +793,11 @@ func (c *Crafter) GenerateNodeGPUs() error { if err != nil { return fmt.Errorf("failed to convert gpu object to tuple string: %w", err) } - GPUs = append(GPUs, gpuTuple) + gpus = append(gpus, gpuTuple) } } - if err := c.insertTuples(node_gpu{}, GPUs); err != nil { + if err := c.insertTuples(node_gpu{}, gpus); err != nil { return fmt.Errorf("failed to insert node gpu: %w", err) } @@ -867,12 +846,12 @@ func (c *Crafter) GenerateSpeedReports() error { end := c.NodeStart + c.NodeCount nodeTwinsStart := c.TwinStart + (c.FarmStart + c.FarmCount) - var speedReports []types.NetworkTestResult + var speedReports []types.Speed for i := start; i < end; i += 2 { - speedReport := types.NetworkTestResult{ - NodeTwinId: uint32(nodeTwinsStart + i), - UploadSpeed: rand.Float64() * float64(rand.Intn(9999999)), - DownloadSpeed: rand.Float64() * float64(rand.Intn(9999999)), + speedReport := types.Speed{ + NodeTwinId: uint32(nodeTwinsStart + i), + Upload: rand.Float64() * float64(rand.Intn(9999999)), + Download: rand.Float64() * float64(rand.Intn(9999999)), } speedReports = append(speedReports, speedReport) } @@ -890,9 +869,9 @@ func (c *Crafter) GenerateDmi() error { end := c.NodeStart + c.NodeCount nodeTwinsStart := c.TwinStart + (c.FarmStart + c.FarmCount) - var dmis []types.DmiInfo + var dmis []types.Dmi for i := start; i < end; i++ { - dmi := types.DmiInfo{ + dmi := types.Dmi{ NodeTwinId: uint32(nodeTwinsStart + i), BIOS: bios[rand.Intn(len(bios))], Baseboard: baseboard[rand.Intn(len(baseboard))], @@ -909,3 +888,30 @@ func (c *Crafter) GenerateDmi() error { return nil } + +func (c *Crafter) GenerateHealthReports() error { + start := c.NodeStart + end := c.NodeStart + c.NodeCount + nodeTwinsStart := c.TwinStart + (c.FarmStart + c.FarmCount) + + var healthReports []types.HealthReport + for i := start; i < end; i++ { + health := true + if flip(.5) { + health = false + } + + healthReport := types.HealthReport{ + NodeTwinId: uint32(nodeTwinsStart + i), + Healthy: health, + } + healthReports = append(healthReports, healthReport) + } + + if err := c.gormDB.Create(healthReports).Error; err != nil { + return fmt.Errorf("failed to insert health reports: %w", err) + } + fmt.Println("health reports generated") + + return nil +} diff --git a/grid-proxy/tools/db/crafter/types.go b/grid-proxy/tools/db/crafter/types.go index 006a72ab0..9a086d647 100644 --- a/grid-proxy/tools/db/crafter/types.go +++ b/grid-proxy/tools/db/crafter/types.go @@ -142,7 +142,7 @@ type node struct { created_at uint64 updated_at uint64 location_id string - power *nodePower `gorm:"type:jsonb"` + power *nodePower `gorm:"type:jsonb;serializer:json"` extra_fee uint64 dedicated bool } @@ -248,8 +248,3 @@ type country struct { lat string long string } - -type health_report struct { - node_twin_id uint64 - healthy bool -} diff --git a/grid-proxy/tools/db/generate.go b/grid-proxy/tools/db/generate.go index 69b0ffc65..d61e6bb34 100644 --- a/grid-proxy/tools/db/generate.go +++ b/grid-proxy/tools/db/generate.go @@ -125,5 +125,9 @@ func generateData(db *sql.DB, gormDB *gorm.DB, seed int) error { return fmt.Errorf("failed to generate dmi reports: %w", err) } + if err := generator.GenerateHealthReports(); err != nil { + return fmt.Errorf("failed to generate dmi reports: %w", err) + } + return nil } diff --git a/grid-proxy/tools/db/schema.sql b/grid-proxy/tools/db/schema.sql index 978a3fdfa..6fd9a774f 100644 --- a/grid-proxy/tools/db/schema.sql +++ b/grid-proxy/tools/db/schema.sql @@ -1029,7 +1029,8 @@ CREATE TABLE IF NOT EXISTS public.node_gpu ( node_twin_id bigint NOT NULL, vendor text, device text, - contract bigint + contract bigint, + updated_at timestamp with time zone ); ALTER TABLE public.node_gpu @@ -1053,29 +1054,29 @@ ALTER TABLE public.health_report -- --- Name: dmi_infos; Type: TABLE; Schema: public; Owner: postgres +-- Name: dmi; Type: TABLE; Schema: public; Owner: postgres -- -CREATE TABLE public.dmi_infos( - node_twin_id bigint PRIMARY KEY, +CREATE TABLE public.dmi( + node_twin_id bigint NOT NULL, bios jsonb, baseboard jsonb, processor jsonb, memory jsonb ); -ALTER TABLE public.dmi_infos +ALTER TABLE public.dmi OWNER TO postgres; -- --- Name: network_test_results; Type: TABLE; Schema: public; Owner: postgres +-- Name: speed; Type: TABLE; Schema: public; Owner: postgres -- -CREATE TABLE public.network_test_results( - node_twin_id bigint PRIMARY KEY, - upload_speed numeric, - download_speed numeric +CREATE TABLE public.speed( + node_twin_id bigint NOT NULL, + upload numeric, + download numeric ); -ALTER TABLE public.network_test_results +ALTER TABLE public.speed OWNER TO postgres; From ee9a31b9e59ef06c36d36c09ebb0e4c41d64d67e Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Mon, 19 Feb 2024 12:57:14 +0200 Subject: [PATCH 15/19] clean & complete some missing parts: - correctly handle the indexer manager start is !noIndexer after debugging - change the updated_at field on gpu table in schema to be a bigint to align with the struct model - use gorm instead of plain sql for scanning in loaders in both load nodes/dmi to benefit from the registered json serializer instead of implementing the scanning logic for the jsonb fields --- grid-proxy/cmds/proxy_server/main.go | 2 +- grid-proxy/internal/indexer/gpu.go | 2 - grid-proxy/tests/queries/main_test.go | 4 +- .../tests/queries/mock_client/loader.go | 85 +++---------------- grid-proxy/tools/db/schema.sql | 2 +- 5 files changed, 16 insertions(+), 79 deletions(-) diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index b07ae9e1b..0a7c8e3f0 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -174,8 +174,8 @@ func main() { ) manager.Register("Speed", speedIndexer) - manager.Start() if !f.noIndexer { + manager.Start() } else { log.Info().Msg("Indexers Manager did not start") } diff --git a/grid-proxy/internal/indexer/gpu.go b/grid-proxy/internal/indexer/gpu.go index 3dfdc4f25..d3baa4501 100644 --- a/grid-proxy/internal/indexer/gpu.go +++ b/grid-proxy/internal/indexer/gpu.go @@ -114,7 +114,6 @@ func (n *NodeGPUIndexer) StartNodeCaller(ctx context.Context) { for i := 0; i < len(gpus); i++ { gpus[i].NodeTwinID = twinId gpus[i].UpdatedAt = time.Now().Unix() - log.Info().Msgf("%+v", gpus[i]) n.resultChan <- gpus[i] } case <-ctx.Done(): @@ -149,7 +148,6 @@ func (n *NodeGPUIndexer) StartBatchUpserter(ctx context.Context) { for { select { case batch := <-n.batchChan: - log.Info().Msgf("%+v", batch) err := discardOldGpus(ctx, n.database, n.interval, batch) if err != nil { log.Error().Err(err).Msg("failed to remove old GPUs") diff --git a/grid-proxy/tests/queries/main_test.go b/grid-proxy/tests/queries/main_test.go index 28e8b6ae9..8c560b87c 100644 --- a/grid-proxy/tests/queries/main_test.go +++ b/grid-proxy/tests/queries/main_test.go @@ -93,7 +93,7 @@ func TestMain(m *testing.M) { DBClient = &dbClient // load mock client - data, err = mock.Load(db) + data, err = mock.Load(db, gormDB) if err != nil { panic(err) } @@ -103,7 +103,7 @@ func TestMain(m *testing.M) { if err != nil { panic(err) } - data, err = mock.Load(db) + data, err = mock.Load(db, gormDB) if err != nil { panic(err) } diff --git a/grid-proxy/tests/queries/mock_client/loader.go b/grid-proxy/tests/queries/mock_client/loader.go index 6a287a711..bfc175edd 100644 --- a/grid-proxy/tests/queries/mock_client/loader.go +++ b/grid-proxy/tests/queries/mock_client/loader.go @@ -7,6 +7,7 @@ import ( "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/zos/pkg/gridtypes" + "gorm.io/gorm" ) const deleted = "Deleted" @@ -44,58 +45,13 @@ type DBData struct { DB *sql.DB } -func loadNodes(db *sql.DB, data *DBData) error { - rows, err := db.Query(` - SELECT - COALESCE(id, ''), - COALESCE(grid_version, 0), - COALESCE(node_id, 0), - COALESCE(farm_id, 0), - COALESCE(twin_id, 0), - COALESCE(country, ''), - COALESCE(city, ''), - COALESCE(uptime, 0), - COALESCE(created, 0), - COALESCE(farming_policy_id, 0), - COALESCE(certification, ''), - COALESCE(secure, false), - COALESCE(virtualized, false), - COALESCE(serial_number, ''), - COALESCE(created_at, 0), - COALESCE(updated_at, 0), - COALESCE(location_id, ''), - COALESCE(extra_fee, 0), - power - FROM - node;`) +func loadNodes(db *sql.DB, gormDB *gorm.DB, data *DBData) error { + var nodes []Node + err := gormDB.Table("node").Scan(&nodes).Error if err != nil { return err } - for rows.Next() { - var node Node - if err := rows.Scan( - &node.ID, - &node.GridVersion, - &node.NodeID, - &node.FarmID, - &node.TwinID, - &node.Country, - &node.City, - &node.Uptime, - &node.Created, - &node.FarmingPolicyID, - &node.Certification, - &node.Secure, - &node.Virtualized, - &node.SerialNumber, - &node.CreatedAt, - &node.UpdatedAt, - &node.LocationID, - &node.ExtraFee, - &node.Power, - ); err != nil { - return err - } + for _, node := range nodes { data.Nodes[node.NodeID] = node data.NodeIDMap[node.ID] = node.NodeID } @@ -610,30 +566,13 @@ func loadHealthReports(db *sql.DB, data *DBData) error { return nil } -func loadDMIs(db *sql.DB, data *DBData) error { - rows, err := db.Query(` - SELECT - node_twin_id, - bios, - baseboard, - processor, - memory - FROM - dmi;`) +func loadDMIs(db *sql.DB, gormDB *gorm.DB, data *DBData) error { + var dmis []types.Dmi + err := gormDB.Table("dmi").Scan(&dmis).Error if err != nil { return err } - for rows.Next() { - var dmi types.Dmi - if err := rows.Scan( - &dmi.NodeTwinId, - &dmi.BIOS, - &dmi.Baseboard, - &dmi.Processor, - &dmi.Memory, - ); err != nil { - return err - } + for _, dmi := range dmis { twinId := dmi.NodeTwinId dmi.NodeTwinId = 0 // to omit it as empty, cleaner response data.DMIs[twinId] = dmi @@ -668,7 +607,7 @@ func loadSpeeds(db *sql.DB, data *DBData) error { return nil } -func Load(db *sql.DB) (DBData, error) { +func Load(db *sql.DB, gormDB *gorm.DB) (DBData, error) { data := DBData{ NodeIDMap: make(map[string]uint64), FarmIDMap: make(map[string]uint64), @@ -699,7 +638,7 @@ func Load(db *sql.DB) (DBData, error) { Speeds: make(map[uint32]types.Speed), DB: db, } - if err := loadNodes(db, &data); err != nil { + if err := loadNodes(db, gormDB, &data); err != nil { return data, err } if err := loadFarms(db, &data); err != nil { @@ -744,7 +683,7 @@ func Load(db *sql.DB) (DBData, error) { if err := loadHealthReports(db, &data); err != nil { return data, err } - if err := loadDMIs(db, &data); err != nil { + if err := loadDMIs(db, gormDB, &data); err != nil { return data, err } if err := loadSpeeds(db, &data); err != nil { diff --git a/grid-proxy/tools/db/schema.sql b/grid-proxy/tools/db/schema.sql index 6fd9a774f..2878c25e0 100644 --- a/grid-proxy/tools/db/schema.sql +++ b/grid-proxy/tools/db/schema.sql @@ -1030,7 +1030,7 @@ CREATE TABLE IF NOT EXISTS public.node_gpu ( vendor text, device text, contract bigint, - updated_at timestamp with time zone + updated_at bigint ); ALTER TABLE public.node_gpu From 708df2e877c092aea379ce3e27497a672c298c33 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Mon, 19 Feb 2024 13:45:10 +0200 Subject: [PATCH 16/19] update mod files --- grid-proxy/go.mod | 21 +++++++++ grid-proxy/go.sum | 108 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/grid-proxy/go.mod b/grid-proxy/go.mod index 61e48b321..efc9af5cf 100644 --- a/grid-proxy/go.mod +++ b/grid-proxy/go.mod @@ -27,6 +27,7 @@ require ( require ( github.com/ChainSafe/go-schnorrkel v1.1.0 // indirect github.com/KyleBanks/depth v1.2.1 // indirect + github.com/blang/semver v3.5.1+incompatible // indirect github.com/cenkalti/backoff v2.2.1+incompatible // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/centrifuge/go-substrate-rpc-client/v4 v4.0.12 // indirect @@ -37,17 +38,23 @@ require ( github.com/decred/dcrd/crypto/blake256 v1.0.1 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/ethereum/go-ethereum v1.11.6 // indirect + github.com/garyburd/redigo v1.6.2 // indirect + github.com/go-co-op/gocron v1.33.1 // indirect github.com/go-jose/go-jose/v3 v3.0.1 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/spec v0.20.8 // indirect github.com/go-openapi/swag v0.22.3 // indirect github.com/go-stack/stack v1.8.1 // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/gomodule/redigo v2.0.0+incompatible // indirect + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/gtank/merlin v0.1.1 // indirect github.com/gtank/ristretto255 v0.1.2 // indirect + github.com/hasura/go-graphql-client v0.10.0 // indirect github.com/holiman/uint256 v1.2.3 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect @@ -56,6 +63,7 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/josharian/intern v1.0.0 // indirect + github.com/klauspost/compress v1.16.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -63,18 +71,31 @@ require ( github.com/mimoo/StrobeGo v0.0.0-20220103164710-9a04d6ca976b // indirect github.com/pierrec/xxHash v0.1.5 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/robfig/cron/v3 v3.0.1 // indirect github.com/rs/cors v1.10.1 // indirect + github.com/shirou/gopsutil v3.21.11+incompatible // indirect github.com/swaggo/files v1.0.1 // indirect + github.com/threefoldtech/zbus v1.0.1 // indirect + github.com/tklauser/go-sysconf v0.3.11 // indirect + github.com/tklauser/numcpus v0.6.0 // indirect github.com/vedhavyas/go-subkey v1.0.3 // indirect + github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 // indirect + github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f // indirect + github.com/vmihailenco/msgpack v4.0.4+incompatible // indirect + github.com/yusufpapurcu/wmi v1.2.2 // indirect + go.uber.org/atomic v1.9.0 // indirect golang.org/x/crypto v0.18.0 // indirect golang.org/x/mod v0.14.0 // indirect golang.org/x/net v0.20.0 // indirect golang.org/x/sys v0.16.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.17.0 // indirect + google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + nhooyr.io/websocket v1.8.7 // indirect ) replace github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go => ../rmb-sdk-go diff --git a/grid-proxy/go.sum b/grid-proxy/go.sum index 83edbe78d..80c49c619 100644 --- a/grid-proxy/go.sum +++ b/grid-proxy/go.sum @@ -2,6 +2,8 @@ github.com/ChainSafe/go-schnorrkel v1.1.0 h1:rZ6EU+CZFCjB4sHUE1jIu8VDoB/wRKZxoe1 github.com/ChainSafe/go-schnorrkel v1.1.0/go.mod h1:ABkENxiP+cvjFiByMIZ9LYbRoNNLeBLiakC1XeTFxfE= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= +github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= +github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/btcsuite/btcd v0.22.0-beta h1:LTDpDKUM5EeOFBPM8IXpinEcmZ6FWfNZbE3lfrfdnWo= github.com/btcsuite/btcd/btcec/v2 v2.2.0 h1:fzn1qaOt32TuLjFlkzYSsBC35Q3KUjT1SwPxiMSCF5k= github.com/btcsuite/btcd/btcec/v2 v2.2.0/go.mod h1:U7MHm051Al6XmscBQ0BoNydpOTsFAn707034b5nY8zU= @@ -15,10 +17,12 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/centrifuge/go-substrate-rpc-client/v4 v4.0.12 h1:DCYWIBOalB0mKKfUg2HhtGgIkBbMA1fnlnkZp7fHB18= github.com/centrifuge/go-substrate-rpc-client/v4 v4.0.12/go.mod h1:5g1oM4Zu3BOaLpsKQ+O8PAv2kNuq+kPcA1VzFbsSqxE= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cosmos/go-bip39 v1.0.0 h1:pcomnQdrdH22njcAatO0yWojsUnCO3y2tNoV1cb6hHY= github.com/cosmos/go-bip39 v1.0.0/go.mod h1:RNJv0H/pOIVgxw6KS7QeX2a0Uo0aKUlfhZ4xuwvCdJw= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/dave/jennifer v1.3.0/go.mod h1:fIb+770HOpJ2fmN9EPPKOqm1vMGhB+TwXKMZhrIygKg= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -35,8 +39,16 @@ github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etly github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= github.com/ethereum/go-ethereum v1.11.6 h1:2VF8Mf7XiSUfmoNOy3D+ocfl9Qu8baQBrCNbo2CXQ8E= github.com/ethereum/go-ethereum v1.11.6/go.mod h1:+a8pUj1tOyJ2RinsNQD4326YS+leSoKGiG/uVVb0x6Y= +github.com/garyburd/redigo v1.6.2 h1:yE/pwKCrbLpLpQICzYTeZ7JsTA/C53wFTJHaEtRqniM= +github.com/garyburd/redigo v1.6.2/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= +github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.6.3 h1:ahKqKTFpO5KTPHxWZjEdPScmYaGtLo8Y4DMHoEsnp14= +github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= github.com/go-acme/lego/v4 v4.15.0 h1:A7MHEU3b+TDFqhC/HmzMJnzPbyeaYvMZQBbqgvbThhU= github.com/go-acme/lego/v4 v4.15.0/go.mod h1:eeGhjW4zWT7Ccqa3sY7ayEqFLCAICx+mXgkMHKIkLxg= +github.com/go-co-op/gocron v1.33.1 h1:wjX+Dg6Ae29a/f9BSQjY1Rl+jflTpW9aDyMqseCj78c= +github.com/go-co-op/gocron v1.33.1/go.mod h1:NLi+bkm4rRSy1F8U7iacZOz0xPseMoIOnvabGoSe/no= github.com/go-jose/go-jose/v3 v3.0.1 h1:pWmKFVtt+Jl0vBZTIpz/eAKwsm6LkIxDVVbFHKkchhA= github.com/go-jose/go-jose/v3 v3.0.1/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= @@ -54,34 +66,67 @@ github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= +github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= +github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= +github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= +github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY= +github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw= github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/gomodule/redigo v1.8.9/go.mod h1:7ArFNvsTjH8GMMzB4uy1snslv2BwmginuMs06a1uzZE= github.com/gomodule/redigo v2.0.0+incompatible h1:K/R+8tc58AaqLkqG2Ol3Qk+DR/TlNuhuh457pBFPtt0= github.com/gomodule/redigo v2.0.0+incompatible/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.1-0.20200604201612-c04b05f3adfa h1:Q75Upo5UN4JbPFURXZ8nLKYUvF85dyFRop/vQ0Rv+64= github.com/google/gofuzz v1.1.1-0.20200604201612-c04b05f3adfa/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/gorilla/schema v1.2.1 h1:tjDxcmdb+siIqkTNoV+qRH2mjYdr2hHe5MKXbp61ziM= github.com/gorilla/schema v1.2.1/go.mod h1:Dg5SSm5PV60mhF2NFaTV1xuYYj8tV8NOPRo4FggUMnM= +github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= +github.com/graph-gophers/graphql-go v1.5.0 h1:fDqblo50TEpD0LY7RXk/LFVYEVqo3+tXMNMPSVXA1yc= +github.com/graph-gophers/graphql-go v1.5.0/go.mod h1:YtmJZDLbF1YYNrlNAuiO5zAStUWc3XZT07iGsVqe1Os= +github.com/graph-gophers/graphql-transport-ws v0.0.2 h1:DbmSkbIGzj8SvHei6n8Mh9eLQin8PtA8xY9eCzjRpvo= +github.com/graph-gophers/graphql-transport-ws v0.0.2/go.mod h1:5BVKvFzOd2BalVIBFfnfmHjpJi/MZ5rOj8G55mXvZ8g= github.com/gtank/merlin v0.1.1 h1:eQ90iG7K9pOhtereWsmyRJ6RAwcP4tHTDBHXNg+u5is= github.com/gtank/merlin v0.1.1/go.mod h1:T86dnYJhcGOh5BjZFCJWTDeTK7XW8uE+E21Cy/bIQ+s= github.com/gtank/ristretto255 v0.1.2 h1:JEqUCPA1NvLq5DwYtuzigd7ss8fwbYay9fi4/5uMzcc= github.com/gtank/ristretto255 v0.1.2/go.mod h1:Ph5OpO6c7xKUGROZfWVLiJf9icMDwUeIvY4OmlYW69o= +github.com/hasura/go-graphql-client v0.10.0 h1:eQm/ap/rqxMG6yAGe6J+FkXu1VqJ9p21E63vz0A7zLQ= +github.com/hasura/go-graphql-client v0.10.0/go.mod h1:z9UPkMmCBMuJjvBEtdE6F+oTR2r15AcjirVNq/8P+Ig= github.com/holiman/uint256 v1.2.3 h1:K8UWO1HUJpRMXBxbmaY1Y8IAMZC/RsKB+ArEnnK4l5o= github.com/holiman/uint256 v1.2.3/go.mod h1:SC8Ryt4n+UBbPbIBKaG9zbbDlp4jOru9xFZmPzLUTxw= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -98,14 +143,23 @@ github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= +github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= @@ -115,6 +169,7 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= @@ -124,19 +179,33 @@ github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPk github.com/mimoo/StrobeGo v0.0.0-20181016162300-f8f6d4d2b643/go.mod h1:43+3pMjjKimDBf5Kr4ZFNGbLql1zKkbImw+fZbw3geM= github.com/mimoo/StrobeGo v0.0.0-20220103164710-9a04d6ca976b h1:QrHweqAtyJ9EwCaGHBu1fghwxIPiopAHV06JlXrMHjk= github.com/mimoo/StrobeGo v0.0.0-20220103164710-9a04d6ca976b/go.mod h1:xxLb2ip6sSUts3g1irPVHyk/DGslwQsNOo9I7smJfNU= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/pierrec/xxHash v0.1.5 h1:n/jBpwTHiER4xYvK3/CdPVnLDPchj8eTJFFLUb4QHBo= github.com/pierrec/xxHash v0.1.5/go.mod h1:w2waW5Zoa/Wc4Yqe0wgrIYAGKqRMf7czn2HNKXmuL+I= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= +github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/rs/cors v1.10.1 h1:L0uuZVXIKlI1SShY2nhFfo44TYvDPQ1w4oFkUJNfhyo= github.com/rs/cors v1.10.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= +github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/rs/zerolog v1.14.3/go.mod h1:3WXPzbXEEliJ+a6UFE4vhIxV8qR1EML6ngzP9ug4eYg= github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0= github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= @@ -145,11 +214,13 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE= @@ -160,17 +231,34 @@ github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= github.com/threefoldtech/tfchain/clients/tfchain-client-go v0.0.0-20240116163757-68c63d80a9e0 h1:0ZMm/xYPgYv3vICNRw3vXTiJ/jVg8LkBLe8TPBEO0MQ= github.com/threefoldtech/tfchain/clients/tfchain-client-go v0.0.0-20240116163757-68c63d80a9e0/go.mod h1:dtDKAPiUDxAwIkfHV7xcAFZcOm+xwNIuOI1MLFS+MeQ= +github.com/threefoldtech/zbus v1.0.1 h1:3KaEpyOiDYAw+lrAyoQUGIvY9BcjVRXlQ1beBRqhRNk= +github.com/threefoldtech/zbus v1.0.1/go.mod h1:E/v/xEvG/l6z/Oj0aDkuSUXFm/1RVJkhKBwDTAIdsHo= github.com/threefoldtech/zos v0.5.6-0.20240201092442-d2ba5be539d2 h1:ChxlTKzZ9g0+S5OkUNzbugkrnuiB7PecOAniOi1JO0Y= github.com/threefoldtech/zos v0.5.6-0.20240201092442-d2ba5be539d2/go.mod h1:Ad9Vej4azEWK/fqGBSbnE/wwLd+EJURJCPaEQRDFP48= github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= +github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/vedhavyas/go-subkey v1.0.3 h1:iKR33BB/akKmcR2PMlXPBeeODjWLM90EL98OrOGs8CA= github.com/vedhavyas/go-subkey v1.0.3/go.mod h1:CloUaFQSSTdWnINfBRFjVMkWXZANW+nd8+TI5jYcl6Y= +github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 h1:cPXZWzzG0NllBLdjWoD1nDfaqu98YMv+OneaKc8sPOA= +github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f h1:p4VB7kIXpOQvVn1ZaTIVp+3vuYAXFe3OJEvjbUYJLaA= +github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= +github.com/vmihailenco/msgpack v4.0.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk= +github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI= +github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -182,7 +270,11 @@ golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQz golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190514140710-3ec191127204/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -195,11 +287,16 @@ golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -209,19 +306,26 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -229,9 +333,11 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce h1:+JknDZhAj8YMt7GC73Ei8pv4MzjDUNPHgQWJdtMAaDU= gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce/go.mod h1:5AcXVHNjg+BDxry382+8OKon8SEWiKktQR07RKPsv1c= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -242,3 +348,5 @@ gorm.io/driver/postgres v1.5.6 h1:ydr9xEd5YAM0vxVDY0X139dyzNz10spDiDlC7+ibLeU= gorm.io/driver/postgres v1.5.6/go.mod h1:3e019WlBaYI5o5LIdNV+LyxCMNtLOQETBXL2h4chKpA= gorm.io/gorm v1.25.7 h1:VsD6acwRjz2zFxGO50gPO6AkNs7KKnvfzUjHQhZDz/A= gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8= +nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= +nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= From ad4d979c99e120b5acede50ed1d5aeabf513b3ab Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Mon, 19 Feb 2024 14:32:15 +0200 Subject: [PATCH 17/19] silent the gorm logs in dump data generation --- grid-proxy/tools/db/db.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grid-proxy/tools/db/db.go b/grid-proxy/tools/db/db.go index af0606dfc..23a401b17 100644 --- a/grid-proxy/tools/db/db.go +++ b/grid-proxy/tools/db/db.go @@ -47,7 +47,7 @@ func main() { panic(errors.Wrap(err, "failed to open db")) } gormDB, err := gorm.Open(postgres.Open(psqlInfo), &gorm.Config{ - Logger: logger.Default.LogMode(4), + Logger: logger.Default.LogMode(1), }) if err != nil { panic(fmt.Errorf("failed to generate gorm db: %w", err)) From 2db2a2178ed85f96d326cf90c14ad1c18ab75c53 Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Sun, 25 Feb 2024 14:20:36 +0200 Subject: [PATCH 18/19] abstract the indexer pkg code using generics Description: - redefining the indexer structs to be a single generic indexer without manager, each indexer should do the generic functions of find/get/batch/upsert results. with each indexer have different job based on the defined type will creating the indexer. - each indexer job should only handle its specific job of: - just defining its finders or triggers - preparing the rmb call to the node, parse the response and return a ready db model template that can stored - upserting its restults by updating or descarding or inserting the passed arguments - the indexer code is the one responsible for doing the logic, by - executing the finders methods for each indexer as it is defined in its job based on the interval - starting the getter worker that invoke the get method on the work - starting a batcher for each indexer, that have two channels as its ends - starting the upserter worker that invoke the upsert method on the work Changes: - remove the indexer manager for easily starting a separate generic indexer based on the resource type. - remove the redundant `NewIndexer` `Start` `Find` `Batch` methods for each indexer for a generic method on the `Indexer[T any]` struct - update the return of the get method on work to be `[]T` instead `T` to cover all the types indexer responses like gpu call which return a slice of gpus - creating the finders map for easly reusablity of the finders method on new/up/health nodes - check the uniqueness of the nodes on the batch before inserting to prevent having duplicated data comming from two different triggers like healthy/up or up/new - modify the default sql-log-level in the makefile - add more debug logging statement - update docs --- grid-proxy/Makefile | 1 + grid-proxy/cmds/proxy_server/main.go | 76 ++++++------ grid-proxy/internal/indexer/README.md | 17 +-- grid-proxy/internal/indexer/dmi.go | 146 +++------------------- grid-proxy/internal/indexer/finders.go | 76 ++++++++++++ grid-proxy/internal/indexer/gpu.go | 161 +++++-------------------- grid-proxy/internal/indexer/health.go | 117 +++--------------- grid-proxy/internal/indexer/indexer.go | 141 ++++++++++++++++++++++ grid-proxy/internal/indexer/manager.go | 48 -------- grid-proxy/internal/indexer/speed.go | 130 ++++---------------- 10 files changed, 355 insertions(+), 558 deletions(-) create mode 100644 grid-proxy/internal/indexer/finders.go create mode 100644 grid-proxy/internal/indexer/indexer.go delete mode 100644 grid-proxy/internal/indexer/manager.go diff --git a/grid-proxy/Makefile b/grid-proxy/Makefile index 2a93f500f..b61d06bff 100644 --- a/grid-proxy/Makefile +++ b/grid-proxy/Makefile @@ -49,6 +49,7 @@ server-start: ## Start the proxy server (Args: `m=`) -no-indexer \ --address :8080 \ --log-level debug \ + --sql-log-level 4 \ --postgres-host $(PQ_HOST) \ --postgres-db tfgrid-graphql \ --postgres-password postgres \ diff --git a/grid-proxy/cmds/proxy_server/main.go b/grid-proxy/cmds/proxy_server/main.go index 0a7c8e3f0..d14cb3a2d 100644 --- a/grid-proxy/cmds/proxy_server/main.go +++ b/grid-proxy/cmds/proxy_server/main.go @@ -20,6 +20,7 @@ import ( "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/indexer" logging "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" rmb "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" "gorm.io/gorm/logger" @@ -136,59 +137,60 @@ func main() { if err != nil { log.Fatal().Err(err).Msg("failed to create relay client") } - manager := indexer.NewManager(ctx) - gpuIndexer := indexer.NewGPUIndexer( + if !f.noIndexer { + startIndexers(ctx, f, &db, rpcRmbClient) + } else { + log.Info().Msg("Indexers did not start") + } + + s, err := createServer(f, dbClient, GitCommit, rpcRmbClient) + if err != nil { + log.Fatal().Err(err).Msg("failed to create mux server") + } + + if err := app(s, f); err != nil { + log.Fatal().Msg(err.Error()) + } + +} + +func startIndexers(ctx context.Context, f flags, db db.Database, rpcRmbClient *peer.RpcClient) { + gpuIdx := indexer.NewIndexer[types.NodeGPU]( + indexer.NewGPUWork(f.gpuIndexerIntervalMins), + "GPU", + db, rpcRmbClient, - &db, - f.indexerUpserterBatchSize, - f.gpuIndexerIntervalMins, f.gpuIndexerNumWorkers, ) - manager.Register("GPU", gpuIndexer) + gpuIdx.Start(ctx) - healthIndexer := indexer.NewNodeHealthIndexer( + healthIdx := indexer.NewIndexer[types.HealthReport]( + indexer.NewHealthWork(f.healthIndexerIntervalMins), + "Health", + db, rpcRmbClient, - &db, - f.indexerUpserterBatchSize, f.healthIndexerNumWorkers, - f.healthIndexerIntervalMins, ) - manager.Register("Health", healthIndexer) + healthIdx.Start(ctx) - dmiIndexer := indexer.NewDmiIndexer( + dmiIdx := indexer.NewIndexer[types.Dmi]( + indexer.NewDMIWork(f.dmiIndexerIntervalMins), + "DMI", + db, rpcRmbClient, - &db, - f.indexerUpserterBatchSize, - f.dmiIndexerIntervalMins, f.dmiIndexerNumWorkers, ) - manager.Register("DMI", dmiIndexer) + dmiIdx.Start(ctx) - speedIndexer := indexer.NewSpeedIndexer( + speedIdx := indexer.NewIndexer[types.Speed]( + indexer.NewSpeedWork(f.speedIndexerIntervalMins), + "Speed", + db, rpcRmbClient, - &db, - f.indexerUpserterBatchSize, - f.speedIndexerIntervalMins, f.speedIndexerNumWorkers, ) - manager.Register("Speed", speedIndexer) - - if !f.noIndexer { - manager.Start() - } else { - log.Info().Msg("Indexers Manager did not start") - } - - s, err := createServer(f, dbClient, GitCommit, rpcRmbClient) - if err != nil { - log.Fatal().Err(err).Msg("failed to create mux server") - } - - if err := app(s, f); err != nil { - log.Fatal().Msg(err.Error()) - } - + speedIdx.Start(ctx) } func app(s *http.Server, f flags) error { diff --git a/grid-proxy/internal/indexer/README.md b/grid-proxy/internal/indexer/README.md index 403c62a67..d8483f208 100644 --- a/grid-proxy/internal/indexer/README.md +++ b/grid-proxy/internal/indexer/README.md @@ -4,10 +4,6 @@ Initially the node periodically reports its data to the chain, data like capacit Things looks fine, but when it comes to a bigger data like gpu/dmi it is not the best solution to store these data on the chain. And that what the `Node-Indexers` solves by periodically calling the nodes based on a configurable interval to get the data and store it on the same postgres database and then it can be served to apis. only `proxy-api` for now. -## The manager - -The manager is a service started from the `cmds/main.go` and it has multiple indexer each looking for a kind of data on the nodes and it is configured by command line flags. - ## The indexer structure Each indexer has @@ -18,18 +14,23 @@ two clients: three channels: -- `NodeTwinIdsChan`: it collects the twin ids for the nodes the indexer will call. +- `IdChan`: it collects the twin ids for the nodes the indexer will call. - `ResultChan`: it collects the results returned by the rmb call to the node. - `BatchChan`: transfer batches of results ready to directly upserted. four types of workers: -- `Finder`: this worker calls the database to filter nodes and push its data to the `NodeTwinIdsChan` -- `Caller`: this worker pop the twins from `NodeTwinIdsChan` and call the node with the `RmbClient` to get data and then push the result to `ResultChan` +- `Finder`: this worker calls the database to filter nodes and push its data to the `IdChan` +- `Getter`: this worker pop the twins from `IdChan` and call the node with the `RmbClient` to get data and then push the result to `ResultChan` - `Batcher`: this worker collect results from `ResultChan` in batches and send it to the `BatchChan` - `Upserter`: this worker get data from `BatchChan` then update/insert to the `Database` -Each indexer could have some extra feature based on the use case, but these are essential. +The indexer struct is generic and each indexer functionality differ from the others based on its Work. +Work a struct that implement the interface `Work` which have three methods: + +- `Finders`: this is a map of string and interval to decide which finders this node should use. +- `Get`: a method that prepare the payload from rmb call and parse the response to return a ready db model data. +- `Upsert`: calling the equivalent db upserting method with the ability to remove old expired data. ## Registered Indexers diff --git a/grid-proxy/internal/indexer/dmi.go b/grid-proxy/internal/indexer/dmi.go index f67501a73..b7e998b0d 100644 --- a/grid-proxy/internal/indexer/dmi.go +++ b/grid-proxy/internal/indexer/dmi.go @@ -4,7 +4,6 @@ import ( "context" "time" - "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" @@ -15,146 +14,37 @@ const ( DmiCallCmd = "zos.system.dmi" ) -type DmiIndexer struct { - database db.Database - rmbClient *peer.RpcClient - interval time.Duration - workers uint - batchSize uint - nodeTwinIdsChan chan uint32 - resultChan chan types.Dmi - batchChan chan []types.Dmi +type DMIWork struct { + findersInterval map[string]time.Duration } -func NewDmiIndexer( - rmbClient *peer.RpcClient, - database db.Database, - batchSize uint, - interval uint, - workers uint, -) *DmiIndexer { - return &DmiIndexer{ - database: database, - rmbClient: rmbClient, - interval: time.Duration(interval) * time.Minute, - workers: workers, - batchSize: batchSize, - nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan types.Dmi), - batchChan: make(chan []types.Dmi), +func NewDMIWork(interval uint) *DMIWork { + return &DMIWork{ + findersInterval: map[string]time.Duration{ + "up": time.Duration(interval) * time.Minute, + "new": newNodesCheckInterval, + }, } } -func (w *DmiIndexer) Start(ctx context.Context) { - go w.startNodeTableWatcher(ctx) - go w.StartNodeFinder(ctx) - - for i := uint(0); i < w.workers; i++ { - go w.StartNodeCaller(ctx) - } - - for i := uint(0); i < w.workers; i++ { - go w.StartResultBatcher(ctx) - } - - go w.StartBatchUpserter(ctx) -} - -func (w *DmiIndexer) StartNodeFinder(ctx context.Context) { - ticker := time.NewTicker(w.interval) - queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) - for { - select { - case <-ticker.C: - queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) - case <-ctx.Done(): - return - } - } +func (w *DMIWork) Finders() map[string]time.Duration { + return w.findersInterval } -func (n *DmiIndexer) startNodeTableWatcher(ctx context.Context) { - ticker := time.NewTicker(newNodesCheckInterval) - latestCheckedID, err := n.database.GetLastNodeTwinID(ctx) +func (w *DMIWork) Get(ctx context.Context, rmb *peer.RpcClient, twinId uint32) ([]types.Dmi, error) { + var dmi zosDmiTypes.DMI + err := callNode(ctx, rmb, DmiCallCmd, nil, twinId, &dmi) if err != nil { - log.Error().Err(err).Msg("failed to get last node twin id") + return []types.Dmi{}, err } - for { - select { - case <-ticker.C: - newIDs, err := n.database.GetNodeTwinIDsAfter(ctx, latestCheckedID) - if err != nil { - log.Error().Err(err).Msgf("failed to get node twin ids after %d", latestCheckedID) - continue - } - if len(newIDs) == 0 { - continue - } + res := parseDmiResponse(dmi, twinId) + return []types.Dmi{res}, nil - latestCheckedID = newIDs[0] - for _, id := range newIDs { - n.nodeTwinIdsChan <- id - } - case <-ctx.Done(): - return - } - } } -func (w *DmiIndexer) StartNodeCaller(ctx context.Context) { - for { - select { - case twinId := <-w.nodeTwinIdsChan: - var dmi zosDmiTypes.DMI - err := callNode(ctx, w.rmbClient, DmiCallCmd, nil, twinId, &dmi) - if err != nil { - continue - } - - w.resultChan <- parseDmiResponse(dmi, twinId) - case <-ctx.Done(): - return - } - } -} - -func (w *DmiIndexer) StartResultBatcher(ctx context.Context) { - buffer := make([]types.Dmi, 0, w.batchSize) - - ticker := time.NewTicker(flushingBufferInterval) - for { - select { - case dmiData := <-w.resultChan: - buffer = append(buffer, dmiData) - if len(buffer) >= int(w.batchSize) { - w.batchChan <- buffer - buffer = nil - } - case <-ticker.C: - if len(buffer) != 0 { - w.batchChan <- buffer - buffer = nil - } - case <-ctx.Done(): - return - } - } -} - -func (w *DmiIndexer) StartBatchUpserter(ctx context.Context) { - for { - - select { - case batch := <-w.batchChan: - err := w.database.UpsertNodeDmi(ctx, batch) - if err != nil { - log.Error().Err(err).Msg("failed to upsert node dmi") - } - case <-ctx.Done(): - return - } - } +func (w *DMIWork) Upsert(ctx context.Context, db db.Database, batch []types.Dmi) error { + return db.UpsertNodeDmi(ctx, batch) } func parseDmiResponse(dmiResponse zosDmiTypes.DMI, twinId uint32) types.Dmi { diff --git a/grid-proxy/internal/indexer/finders.go b/grid-proxy/internal/indexer/finders.go new file mode 100644 index 000000000..9c6bc451e --- /dev/null +++ b/grid-proxy/internal/indexer/finders.go @@ -0,0 +1,76 @@ +package indexer + +import ( + "context" + "time" + + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" +) + +var ( + finders = map[string]Finder{ + "up": upNodesFinder, + "healthy": healthyNodesFinder, + "new": newNodesFinder, + } +) + +type Finder func(context.Context, time.Duration, db.Database, chan uint32) + +func upNodesFinder(ctx context.Context, interval time.Duration, db db.Database, idsChan chan uint32) { + ticker := time.NewTicker(interval) + + queryUpNodes(ctx, db, idsChan) + for { + select { + case <-ticker.C: + queryUpNodes(ctx, db, idsChan) + case <-ctx.Done(): + return + } + } +} + +func healthyNodesFinder(ctx context.Context, interval time.Duration, db db.Database, idsChan chan uint32) { + ticker := time.NewTicker(interval) + + queryHealthyNodes(ctx, db, idsChan) + for { + select { + case <-ticker.C: + queryHealthyNodes(ctx, db, idsChan) + case <-ctx.Done(): + return + } + } +} + +func newNodesFinder(ctx context.Context, interval time.Duration, db db.Database, idsChan chan uint32) { + ticker := time.NewTicker(interval) + latestCheckedID, err := db.GetLastNodeTwinID(ctx) + if err != nil { + log.Error().Err(err).Msg("failed to get last node twin id") + } + + for { + select { + case <-ticker.C: + newIDs, err := db.GetNodeTwinIDsAfter(ctx, latestCheckedID) + if err != nil { + log.Error().Err(err).Msgf("failed to get node twin ids after %d", latestCheckedID) + continue + } + if len(newIDs) == 0 { + continue + } + + latestCheckedID = newIDs[0] + for _, id := range newIDs { + idsChan <- id + } + case <-ctx.Done(): + return + } + } +} diff --git a/grid-proxy/internal/indexer/gpu.go b/grid-proxy/internal/indexer/gpu.go index d3baa4501..9c1873a29 100644 --- a/grid-proxy/internal/indexer/gpu.go +++ b/grid-proxy/internal/indexer/gpu.go @@ -2,9 +2,9 @@ package indexer import ( "context" + "fmt" "time" - "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" @@ -14,159 +14,56 @@ const ( gpuListCmd = "zos.gpu.list" ) -type NodeGPUIndexer struct { - database db.Database - rmbClient *peer.RpcClient - interval time.Duration - workers uint - batchSize uint - nodeTwinIdsChan chan uint32 - resultChan chan types.NodeGPU - batchChan chan []types.NodeGPU +type GPUWork struct { + findersInterval map[string]time.Duration } -func NewGPUIndexer( - rmbClient *peer.RpcClient, - database db.Database, - batchSize uint, - interval uint, - workers uint, -) *NodeGPUIndexer { - return &NodeGPUIndexer{ - database: database, - rmbClient: rmbClient, - batchSize: batchSize, - workers: workers, - interval: time.Duration(interval) * time.Minute, - nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan types.NodeGPU), - batchChan: make(chan []types.NodeGPU), +func NewGPUWork(interval uint) *GPUWork { + return &GPUWork{ + findersInterval: map[string]time.Duration{ + "up": time.Duration(interval) * time.Minute, + "new": newNodesCheckInterval, + }, } } -func (n *NodeGPUIndexer) Start(ctx context.Context) { - go n.StartNodeFinder(ctx) - go n.startNodeTableWatcher(ctx) - - for i := uint(0); i < n.workers; i++ { - go n.StartNodeCaller(ctx) - } - - for i := uint(0); i < n.workers; i++ { - go n.StartResultBatcher(ctx) - } - - go n.StartBatchUpserter(ctx) +func (w *GPUWork) Finders() map[string]time.Duration { + return w.findersInterval } -func (n *NodeGPUIndexer) StartNodeFinder(ctx context.Context) { - ticker := time.NewTicker(n.interval) - queryUpNodes(ctx, n.database, n.nodeTwinIdsChan) - for { - select { - case <-ticker.C: - queryUpNodes(ctx, n.database, n.nodeTwinIdsChan) - case <-ctx.Done(): - return - } - } -} - -func (n *NodeGPUIndexer) startNodeTableWatcher(ctx context.Context) { - ticker := time.NewTicker(newNodesCheckInterval) - latestCheckedID, err := n.database.GetLastNodeTwinID(ctx) +func (w *GPUWork) Get(ctx context.Context, rmb *peer.RpcClient, twinId uint32) ([]types.NodeGPU, error) { + var gpus []types.NodeGPU + err := callNode(ctx, rmb, gpuListCmd, nil, twinId, &gpus) if err != nil { - log.Error().Err(err).Msg("failed to get last node twin id") + return gpus, err } - for { - select { - case <-ticker.C: - newIDs, err := n.database.GetNodeTwinIDsAfter(ctx, latestCheckedID) - if err != nil { - log.Error().Err(err).Msgf("failed to get node twin ids after %d", latestCheckedID) - continue - } - if len(newIDs) == 0 { - continue - } - - latestCheckedID = newIDs[0] - for _, id := range newIDs { - n.nodeTwinIdsChan <- id - } - case <-ctx.Done(): - return - } + for i := 0; i < len(gpus); i++ { + gpus[i].NodeTwinID = twinId + gpus[i].UpdatedAt = time.Now().Unix() } -} - -func (n *NodeGPUIndexer) StartNodeCaller(ctx context.Context) { - for { - select { - case twinId := <-n.nodeTwinIdsChan: - var gpus []types.NodeGPU - err := callNode(ctx, n.rmbClient, gpuListCmd, nil, twinId, &gpus) - if err != nil { - continue - } - for i := 0; i < len(gpus); i++ { - gpus[i].NodeTwinID = twinId - gpus[i].UpdatedAt = time.Now().Unix() - n.resultChan <- gpus[i] - } - case <-ctx.Done(): - return - } - } + return gpus, nil } -func (n *NodeGPUIndexer) StartResultBatcher(ctx context.Context) { - buffer := make([]types.NodeGPU, 0, n.batchSize) - - ticker := time.NewTicker(flushingBufferInterval) - for { - select { - case gpus := <-n.resultChan: - buffer = append(buffer, gpus) - if len(buffer) >= int(n.batchSize) { - n.batchChan <- buffer - buffer = nil - } - case <-ticker.C: - if len(buffer) != 0 { - n.batchChan <- buffer - buffer = nil - } - case <-ctx.Done(): - return - } +func (w *GPUWork) Upsert(ctx context.Context, db db.Database, batch []types.NodeGPU) error { + expirationInterval := w.findersInterval["up"] + err := discardOldGpus(ctx, db, expirationInterval, batch) + if err != nil { + return fmt.Errorf("failed to remove old GPUs: %w", err) } -} -func (n *NodeGPUIndexer) StartBatchUpserter(ctx context.Context) { - for { - select { - case batch := <-n.batchChan: - err := discardOldGpus(ctx, n.database, n.interval, batch) - if err != nil { - log.Error().Err(err).Msg("failed to remove old GPUs") - } - err = n.database.UpsertNodesGPU(ctx, batch) - if err != nil { - log.Error().Err(err).Msg("failed to upsert new GPUs") - } - case <-ctx.Done(): - return - } + err = db.UpsertNodesGPU(ctx, batch) + if err != nil { + return fmt.Errorf("failed to upsert new GPUs: %w", err) } + + return nil } func discardOldGpus(ctx context.Context, database db.Database, interval time.Duration, gpuBatch []types.NodeGPU) error { // invalidate the old indexed GPUs for the same node, // but check the batch first to ensure it does not contain related GPUs to node twin it from the last batch. - // TODO: if timestamp > 1 nodeTwinIds := []uint32{} for _, gpu := range gpuBatch { nodeTwinIds = append(nodeTwinIds, gpu.NodeTwinID) diff --git a/grid-proxy/internal/indexer/health.go b/grid-proxy/internal/indexer/health.go index 72e30d792..d6b833eca 100644 --- a/grid-proxy/internal/indexer/health.go +++ b/grid-proxy/internal/indexer/health.go @@ -4,7 +4,6 @@ import ( "context" "time" - "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" @@ -14,116 +13,36 @@ const ( healthCallCmd = "zos.system.version" ) -type NodeHealthIndexer struct { - database db.Database - rmbClient *peer.RpcClient - nodeTwinIdsChan chan uint32 - resultChan chan types.HealthReport - batchChan chan []types.HealthReport - indexerInterval time.Duration - indexerWorkers uint - batchSize uint +type HealthWork struct { + findersInterval map[string]time.Duration } -func NewNodeHealthIndexer( - rpcClient *peer.RpcClient, - database db.Database, - batchSize uint, - indexerWorkers uint, - indexerInterval uint, -) *NodeHealthIndexer { - return &NodeHealthIndexer{ - database: database, - rmbClient: rpcClient, - nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan types.HealthReport), - batchChan: make(chan []types.HealthReport), - batchSize: batchSize, - indexerWorkers: indexerWorkers, - indexerInterval: time.Duration(indexerInterval) * time.Minute, +func NewHealthWork(interval uint) *HealthWork { + return &HealthWork{ + findersInterval: map[string]time.Duration{ + "up": time.Duration(interval) * time.Minute, + "healthy": time.Duration(interval) * time.Minute, + }, } } -func (c *NodeHealthIndexer) Start(ctx context.Context) { - go c.StartNodeFinder(ctx) - - for i := uint(0); i < c.indexerWorkers; i++ { - go c.StartNodeCaller(ctx) - } - - for i := uint(0); i < c.indexerWorkers; i++ { - go c.StartResultBatcher(ctx) - } - - go c.StartBatchUpserter(ctx) -} - -func (c *NodeHealthIndexer) StartNodeFinder(ctx context.Context) { - ticker := time.NewTicker(c.indexerInterval) - - queryHealthyNodes(ctx, c.database, c.nodeTwinIdsChan) // to revalidate the reports if node went down - queryUpNodes(ctx, c.database, c.nodeTwinIdsChan) - for { - select { - case <-ticker.C: - queryHealthyNodes(ctx, c.database, c.nodeTwinIdsChan) - queryUpNodes(ctx, c.database, c.nodeTwinIdsChan) - case <-ctx.Done(): - return - } - } +func (w *HealthWork) Finders() map[string]time.Duration { + return w.findersInterval } -func (c *NodeHealthIndexer) StartNodeCaller(ctx context.Context) { - for { - select { - case twinId := <-c.nodeTwinIdsChan: - var response types.HealthReport - err := callNode(ctx, c.rmbClient, healthCallCmd, nil, twinId, &response) - c.resultChan <- getHealthReport(response, err, twinId) - case <-ctx.Done(): - return - } - } -} +func (w *HealthWork) Get(ctx context.Context, rmb *peer.RpcClient, twinId uint32) ([]types.HealthReport, error) { + var response types.HealthReport + err := callNode(ctx, rmb, healthCallCmd, nil, twinId, &response) -func (c *NodeHealthIndexer) StartResultBatcher(ctx context.Context) { - buffer := make([]types.HealthReport, 0, c.batchSize) - - ticker := time.NewTicker(flushingBufferInterval) - for { - select { - case report := <-c.resultChan: - buffer = append(buffer, report) - if len(buffer) >= int(c.batchSize) { - c.batchChan <- buffer - buffer = nil - } - case <-ticker.C: - if len(buffer) != 0 { - c.batchChan <- buffer - buffer = nil - } - case <-ctx.Done(): - return - } - } + res := getHealthReport(response, err, twinId) + return []types.HealthReport{res}, nil } -func (c *NodeHealthIndexer) StartBatchUpserter(ctx context.Context) { - for { - select { - case batch := <-c.batchChan: - err := c.database.UpsertNodeHealth(ctx, batch) - if err != nil { - log.Error().Err(err).Msg("failed to upsert node health") - } - case <-ctx.Done(): - return - } - } +func (w *HealthWork) Upsert(ctx context.Context, db db.Database, batch []types.HealthReport) error { + return db.UpsertNodeHealth(ctx, batch) } +// TODO: use diagnostics call instead func getHealthReport(response interface{}, err error, twinId uint32) types.HealthReport { report := types.HealthReport{ NodeTwinId: twinId, diff --git a/grid-proxy/internal/indexer/indexer.go b/grid-proxy/internal/indexer/indexer.go new file mode 100644 index 000000000..2260377d8 --- /dev/null +++ b/grid-proxy/internal/indexer/indexer.go @@ -0,0 +1,141 @@ +package indexer + +import ( + "context" + "reflect" + "time" + + "github.com/rs/zerolog/log" + "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" + "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" +) + +const ( + indexerCallTimeout = 30 * time.Second // rmb calls timeout + flushingBufferInterval = 60 * time.Second // upsert buffer in db if it didn't reach the batch size + newNodesCheckInterval = 5 * time.Minute + batchSize = 20 +) + +type Work[T any] interface { + Finders() map[string]time.Duration + Get(ctx context.Context, rmb *peer.RpcClient, id uint32) ([]T, error) + Upsert(ctx context.Context, db db.Database, batch []T) error +} + +type Indexer[T any] struct { + name string + work Work[T] + dbClient db.Database + rmbClient *peer.RpcClient + idChan chan uint32 + resultChan chan T + batchChan chan []T + workerNum uint +} + +func NewIndexer[T any]( + work Work[T], + name string, + db db.Database, + rmb *peer.RpcClient, + worker uint, +) *Indexer[T] { + return &Indexer[T]{ + work: work, + name: name, + dbClient: db, + rmbClient: rmb, + workerNum: worker, + idChan: make(chan uint32), + resultChan: make(chan T), + batchChan: make(chan []T), + } +} + +func (i *Indexer[T]) Start(ctx context.Context) { + for name, interval := range i.work.Finders() { + go finders[name](ctx, interval, i.dbClient, i.idChan) + } + + for j := uint(0); j < i.workerNum; j++ { + go i.get(ctx) + } + + go i.batch(ctx) + + go i.upsert(ctx) + + log.Info().Msgf("%s Indexer started", i.name) +} + +func (i *Indexer[T]) get(ctx context.Context) { + for { + select { + case id := <-i.idChan: + res, err := i.work.Get(ctx, i.rmbClient, id) + if err != nil { + log.Error().Err(err).Str("indexer", i.name).Uint32("twinId", id).Msg("failed to call") + continue + } + + for _, item := range res { + log.Debug().Str("indexer", i.name).Uint32("twinId", id).Msgf("response: %+v", item) + i.resultChan <- item + } + case <-ctx.Done(): + return + } + } +} + +func (i *Indexer[T]) batch(ctx context.Context) { + buffer := make([]T, 0, batchSize) + + ticker := time.NewTicker(flushingBufferInterval) + for { + select { + case data := <-i.resultChan: + // to prevent having multiple data for the same twin from different finders + if i.isUnique(buffer, data) { + buffer = append(buffer, data) + } + if len(buffer) >= int(batchSize) { + log.Debug().Str("indexer", i.name).Int("size", len(buffer)).Msg("batching") + i.batchChan <- buffer + buffer = nil + } + case <-ticker.C: + if len(buffer) != 0 { + log.Debug().Str("indexer", i.name).Int("size", len(buffer)).Msg("batching") + i.batchChan <- buffer + buffer = nil + } + case <-ctx.Done(): + return + } + } +} + +func (i *Indexer[T]) upsert(ctx context.Context) { + for { + select { + case batch := <-i.batchChan: + err := i.work.Upsert(ctx, i.dbClient, batch) + if err != nil { + log.Error().Err(err).Str("indexer", i.name).Msg("failed to upsert batch") + } + case <-ctx.Done(): + return + } + } +} + +func (i *Indexer[T]) isUnique(buffer []T, data T) bool { + for _, item := range buffer { + if reflect.DeepEqual(item, data) { + return false + } + } + return true +} diff --git a/grid-proxy/internal/indexer/manager.go b/grid-proxy/internal/indexer/manager.go deleted file mode 100644 index 65e3b3405..000000000 --- a/grid-proxy/internal/indexer/manager.go +++ /dev/null @@ -1,48 +0,0 @@ -package indexer - -import ( - "context" - "time" - - "github.com/rs/zerolog/log" -) - -const ( - indexerCallTimeout = 30 * time.Second // rmb calls timeout - flushingBufferInterval = 60 * time.Second // upsert buffer in db if it didn't reach the batch size - newNodesCheckInterval = 5 * time.Minute -) - -type Indexer interface { - Start(ctx context.Context) - StartNodeFinder(ctx context.Context) - StartNodeCaller(ctx context.Context) - StartResultBatcher(ctx context.Context) - StartBatchUpserter(ctx context.Context) -} - -type Manager struct { - Indexers map[string]Indexer - Context context.Context -} - -func NewManager( - ctx context.Context, -) *Manager { - return &Manager{ - Indexers: make(map[string]Indexer), - Context: ctx, - } -} - -func (m *Manager) Register(name string, indexer Indexer) { - m.Indexers[name] = indexer -} - -func (m *Manager) Start() { - log.Info().Msg("Starting indexers manager...") - for name, watcher := range m.Indexers { - watcher.Start(m.Context) - log.Info().Msgf("%s indexer started", name) - } -} diff --git a/grid-proxy/internal/indexer/speed.go b/grid-proxy/internal/indexer/speed.go index 105333b21..a29fec64c 100644 --- a/grid-proxy/internal/indexer/speed.go +++ b/grid-proxy/internal/indexer/speed.go @@ -5,7 +5,6 @@ import ( "encoding/json" "time" - "github.com/rs/zerolog/log" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/internal/explorer/db" "github.com/threefoldtech/tfgrid-sdk-go/grid-proxy/pkg/types" "github.com/threefoldtech/tfgrid-sdk-go/rmb-sdk-go/peer" @@ -19,124 +18,43 @@ const ( testName = "iperf" ) -type SpeedIndexer struct { - database db.Database - rmbClient *peer.RpcClient - interval time.Duration - workers uint - batchSize uint - nodeTwinIdsChan chan uint32 - resultChan chan types.Speed - batchChan chan []types.Speed +type SpeedWork struct { + findersInterval map[string]time.Duration } -func NewSpeedIndexer( - rmbClient *peer.RpcClient, - database db.Database, - batchSize uint, - interval uint, - workers uint, -) *SpeedIndexer { - return &SpeedIndexer{ - database: database, - rmbClient: rmbClient, - batchSize: batchSize, - interval: time.Duration(interval) * time.Minute, - workers: workers, - nodeTwinIdsChan: make(chan uint32), - resultChan: make(chan types.Speed), - batchChan: make(chan []types.Speed), +func NewSpeedWork(interval uint) *SpeedWork { + return &SpeedWork{ + findersInterval: map[string]time.Duration{ + "up": time.Duration(interval) * time.Minute, + }, } } -func (w *SpeedIndexer) Start(ctx context.Context) { - go w.StartNodeFinder(ctx) - - for i := uint(0); i < w.workers; i++ { - go w.StartNodeCaller(ctx) - } - - for i := uint(0); i < w.workers; i++ { - go w.StartResultBatcher(ctx) - } - - go w.StartBatchUpserter(ctx) +func (w *SpeedWork) Finders() map[string]time.Duration { + return w.findersInterval } -func (w *SpeedIndexer) StartNodeFinder(ctx context.Context) { - ticker := time.NewTicker(w.interval) - queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) - for { - select { - case <-ticker.C: - queryUpNodes(ctx, w.database, w.nodeTwinIdsChan) - case <-ctx.Done(): - return - } +func (w *SpeedWork) Get(ctx context.Context, rmb *peer.RpcClient, twinId uint32) ([]types.Speed, error) { + payload := struct { + Name string + }{ + Name: testName, } -} - -func (w *SpeedIndexer) StartNodeCaller(ctx context.Context) { - for { - select { - case twinId := <-w.nodeTwinIdsChan: - payload := struct { - Name string - }{ - Name: testName, - } - var response zosPerfPkg.TaskResult - if err := callNode(ctx, w.rmbClient, perfTestCallCmd, payload, twinId, &response); err != nil { - continue - } - - speedReport, err := parseSpeed(response, twinId) - if err != nil { - continue - } - - w.resultChan <- speedReport - case <-ctx.Done(): - return - } + var response zosPerfPkg.TaskResult + if err := callNode(ctx, rmb, perfTestCallCmd, payload, twinId, &response); err != nil { + return []types.Speed{}, err } -} -func (w *SpeedIndexer) StartResultBatcher(ctx context.Context) { - buffer := make([]types.Speed, 0, w.batchSize) - - ticker := time.NewTicker(flushingBufferInterval) - for { - select { - case report := <-w.resultChan: - buffer = append(buffer, report) - if len(buffer) >= int(w.batchSize) { - w.batchChan <- buffer - buffer = nil - } - case <-ticker.C: - if len(buffer) != 0 { - w.batchChan <- buffer - buffer = nil - } - case <-ctx.Done(): - return - } + speedReport, err := parseSpeed(response, twinId) + if err != nil { + return []types.Speed{}, err } + + return []types.Speed{speedReport}, nil } -func (w *SpeedIndexer) StartBatchUpserter(ctx context.Context) { - for { - select { - case batch := <-w.batchChan: - err := w.database.UpsertNetworkSpeed(ctx, batch) - if err != nil { - log.Error().Err(err).Msg("failed to upsert network speed") - } - case <-ctx.Done(): - return - } - } +func (w *SpeedWork) Upsert(ctx context.Context, db db.Database, batch []types.Speed) error { + return db.UpsertNetworkSpeed(ctx, batch) } func parseSpeed(res zosPerfPkg.TaskResult, twinId uint32) (types.Speed, error) { From e068289528bdc22f38fa6378b8bb73a684241f2f Mon Sep 17 00:00:00 2001 From: Omar Abdulaziz Date: Tue, 12 Mar 2024 10:30:58 +0200 Subject: [PATCH 19/19] remove already merged trigger on node_gpu --- grid-proxy/internal/explorer/db/setup.sql | 34 ----------------------- 1 file changed, 34 deletions(-) diff --git a/grid-proxy/internal/explorer/db/setup.sql b/grid-proxy/internal/explorer/db/setup.sql index 4fa554031..7e3b5a97c 100644 --- a/grid-proxy/internal/explorer/db/setup.sql +++ b/grid-proxy/internal/explorer/db/setup.sql @@ -521,40 +521,6 @@ CREATE OR REPLACE TRIGGER tg_rent_contract AFTER INSERT OR UPDATE OF state ON rent_contract FOR EACH ROW EXECUTE PROCEDURE reflect_rent_contract_changes(); -/* - Gpu trigger - - Insert new node_gpu > increase the gpu_num in resources cache - - Delete node_gpu > decrease the gpu_num in resources cache -*/ -CREATE OR REPLACE FUNCTION reflect_node_gpu_count_change() RETURNS TRIGGER AS -$$ -BEGIN - BEGIN - UPDATE resources_cache - SET node_gpu_count = node_gpu_count + ( - CASE - WHEN TG_OP != 'DELETE' - THEN -1 - WHEN TG_OP != 'INSERT' - THEN 1 - ELSE 0 - END - ) - WHERE resources_cache.node_id = ( - SELECT node_id from node where node.twin_id = NEW.node_twin_id - ); - EXCEPTION - WHEN OTHERS THEN - RAISE NOTICE 'Error updating resources_cache gpu fields %', SQLERRM; - END; -RETURN NULL; -END; -$$ LANGUAGE plpgsql; - -CREATE OR REPLACE TRIGGER tg_node_gpu_count - AFTER INSERT OR DELETE ON node_gpu FOR EACH ROW - EXECUTE PROCEDURE reflect_node_gpu_count_change(); - /* Dmi trigger - Insert new record/Update > update resources_cache