-
Notifications
You must be signed in to change notification settings - Fork 95
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(export): add support in onGCE + no conn to metadata case
Fixes b/344740239 (edge case with GKE Metadata Server and GKE sandbox). * Added debug logging. * Updated metadata deps to get googleapis/google-cloud-go#9733 & use timeout-ed context * Moved risky logic from FromFlags, see code comment why. * Added regession test. ### Alternatives Everything we do in FromFlags or constructor is within readines period. We could consider moving potentially "slow" things on slow network or metadata srv to exporter.Run. This could be questionable as for GMP to work we at end need export functionality to work, so delaying that information or making it surface in separation to readiness might not be helpful. Signed-off-by: bwplotka <[email protected]>
- Loading branch information
Showing
11 changed files
with
266 additions
and
320 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package setup | ||
|
||
import ( | ||
"context" | ||
"net/http" | ||
"net/http/httptest" | ||
"os" | ||
"sync" | ||
"testing" | ||
"time" | ||
|
||
"cloud.google.com/go/compute/metadata" | ||
"github.com/GoogleCloudPlatform/prometheus-engine/pkg/export" | ||
"github.com/alecthomas/kingpin/v2" | ||
"github.com/go-kit/log" | ||
"github.com/google/go-cmp/cmp" | ||
) | ||
|
||
func TestFromFlags_NotOnGCE(t *testing.T) { | ||
// Asserting there is actually no GCE underneath. | ||
if metadata.OnGCE() { | ||
t.Fatal("This test assumes we don't run on GCP") | ||
} | ||
|
||
fake := kingpin.New("test", "test") | ||
newExport := FromFlags(fake, "product") | ||
// Our readines is default (3 * 10s), so ensure FromFlags is never longer than 30s. | ||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) | ||
defer cancel() | ||
|
||
// Fake app invocation. | ||
if _, err := fake.Parse(nil); err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
// Make sure constructor does is not stuck forever. | ||
_, _ = newExport(ctx, log.NewLogfmtLogger(os.Stderr), nil) | ||
} | ||
|
||
// Regression test for b/344740239. We ensure that even stuck metadata servers | ||
// calls will timeout correctly (we propagate context properly). | ||
func TestTryPopulateUnspecifiedFromMetadata(t *testing.T) { | ||
// Asserting there is actually no GCE underneath. | ||
if metadata.OnGCE() { | ||
t.Fatal("This test assumes we don't run on GCP") | ||
} | ||
|
||
var wg sync.WaitGroup | ||
wg.Add(1) | ||
|
||
// Setup fake HTTP server taking forever. | ||
s := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { | ||
wg.Wait() | ||
})) | ||
t.Cleanup(func() { | ||
wg.Done() | ||
s.Close() | ||
}) | ||
|
||
// Main "readiness" like timeout that we have to be faster than. | ||
testCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) | ||
defer cancel() | ||
|
||
// Inject metadata URL for our server (does not matter if 404). | ||
t.Setenv("GCE_METADATA_HOST", s.Listener.Addr().String()) | ||
|
||
// We expect this to finish sooner. | ||
ctx, cancel2 := context.WithTimeout(context.Background(), 1*time.Second) | ||
defer cancel2() | ||
opts := export.ExporterOpts{} | ||
tryPopulateUnspecifiedFromMetadata(ctx, log.NewLogfmtLogger(os.Stderr), &opts) | ||
if diff := cmp.Diff(export.ExporterOpts{}, opts); diff != "" { | ||
t.Fatal("expected no options populated", diff) | ||
} | ||
// We expect to finish the test, before testCtx is cancelled. | ||
// TODO(bwplotka): Assert we are not exiting faster because metadata cannot access our HTTP test server. | ||
// I checked manually for inverse to fail (e.g. setting in ctx longer than textCtx). | ||
if testCtx.Err() != nil { | ||
t.Fatal("tryPopulateUnspecifiedFromMetadata took 30s to complete, it should timeout after 1s") | ||
} | ||
} |
Oops, something went wrong.