Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RUMM-1744 Add E2E tests for Kronos (NTP) #703

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions Datadog/Datadog.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -402,11 +402,9 @@
61D3E0D9277B23F1008BE766 /* KronosNTPProtocol.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0CF277B23F0008BE766 /* KronosNTPProtocol.swift */; };
61D3E0DA277B23F1008BE766 /* KronosTimeFreeze.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0D0277B23F1008BE766 /* KronosTimeFreeze.swift */; };
61D3E0DB277B23F1008BE766 /* KronosNSTimer+ClosureKit.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0D1277B23F1008BE766 /* KronosNSTimer+ClosureKit.swift */; };
61D3E0E3277B3D92008BE766 /* KronosNTPClientTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0DE277B3D92008BE766 /* KronosNTPClientTests.swift */; };
61D3E0E4277B3D92008BE766 /* KronosNTPPacketTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0DF277B3D92008BE766 /* KronosNTPPacketTests.swift */; };
61D3E0E5277B3D92008BE766 /* KronosClockTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0E0277B3D92008BE766 /* KronosClockTests.swift */; };
61D3E0E6277B3D92008BE766 /* KronosDNSResolverTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0E1277B3D92008BE766 /* KronosDNSResolverTests.swift */; };
61D3E0E7277B3D92008BE766 /* KronosTimeStorageTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0E2277B3D92008BE766 /* KronosTimeStorageTests.swift */; };
61D3E0EA277E0C58008BE766 /* KronosE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D3E0E9277E0C58008BE766 /* KronosE2ETests.swift */; };
61D447E224917F8F00649287 /* DateFormatting.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D447E124917F8F00649287 /* DateFormatting.swift */; };
61D50C3C2580EEF8006038A3 /* LoggingScenarios.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D50C3B2580EEF8006038A3 /* LoggingScenarios.swift */; };
61D50C462580EF19006038A3 /* TracingScenarios.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61D50C452580EF19006038A3 /* TracingScenarios.swift */; };
Expand Down Expand Up @@ -1088,11 +1086,9 @@
61D3E0CF277B23F0008BE766 /* KronosNTPProtocol.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosNTPProtocol.swift; sourceTree = "<group>"; };
61D3E0D0277B23F1008BE766 /* KronosTimeFreeze.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosTimeFreeze.swift; sourceTree = "<group>"; };
61D3E0D1277B23F1008BE766 /* KronosNSTimer+ClosureKit.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "KronosNSTimer+ClosureKit.swift"; sourceTree = "<group>"; };
61D3E0DE277B3D92008BE766 /* KronosNTPClientTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosNTPClientTests.swift; sourceTree = "<group>"; };
61D3E0DF277B3D92008BE766 /* KronosNTPPacketTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosNTPPacketTests.swift; sourceTree = "<group>"; };
61D3E0E0277B3D92008BE766 /* KronosClockTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosClockTests.swift; sourceTree = "<group>"; };
61D3E0E1277B3D92008BE766 /* KronosDNSResolverTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosDNSResolverTests.swift; sourceTree = "<group>"; };
61D3E0E2277B3D92008BE766 /* KronosTimeStorageTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = KronosTimeStorageTests.swift; sourceTree = "<group>"; };
61D3E0E9277E0C58008BE766 /* KronosE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KronosE2ETests.swift; sourceTree = "<group>"; };
61D447E124917F8F00649287 /* DateFormatting.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DateFormatting.swift; sourceTree = "<group>"; };
61D50C3B2580EEF8006038A3 /* LoggingScenarios.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoggingScenarios.swift; sourceTree = "<group>"; };
61D50C452580EF19006038A3 /* TracingScenarios.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TracingScenarios.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -2676,6 +2672,7 @@
61993666265BBEDC009D7EA8 /* E2ETests */ = {
isa = PBXGroup;
children = (
61D3E0E8277E0C16008BE766 /* NTP */,
9E64849B27031050007CCD7B /* RUM */,
61B3BD502661224800A9BEF0 /* Logging */,
6187A53726FCBDD00015D94A /* Tracing */,
Expand Down Expand Up @@ -2965,15 +2962,20 @@
61D3E0DD277B3D6E008BE766 /* Kronos */ = {
isa = PBXGroup;
children = (
61D3E0E0277B3D92008BE766 /* KronosClockTests.swift */,
61D3E0E1277B3D92008BE766 /* KronosDNSResolverTests.swift */,
61D3E0DE277B3D92008BE766 /* KronosNTPClientTests.swift */,
61D3E0DF277B3D92008BE766 /* KronosNTPPacketTests.swift */,
61D3E0E2277B3D92008BE766 /* KronosTimeStorageTests.swift */,
);
path = Kronos;
sourceTree = "<group>";
};
61D3E0E8277E0C16008BE766 /* NTP */ = {
isa = PBXGroup;
children = (
61D3E0E9277E0C58008BE766 /* KronosE2ETests.swift */,
);
path = NTP;
sourceTree = "<group>";
};
61D50C3A2580EED5006038A3 /* ManualInstrumentation */ = {
isa = PBXGroup;
children = (
Expand Down Expand Up @@ -4210,7 +4212,6 @@
61B0387D252724AB00518F3C /* URLSessionSwizzlerTests.swift in Sources */,
617CD0DD24CEDDD300B0B557 /* RUMUserActionScopeTests.swift in Sources */,
D29889C9273413ED00A4D1A9 /* RUMViewsHandlerTests.swift in Sources */,
61D3E0E3277B3D92008BE766 /* KronosNTPClientTests.swift in Sources */,
61C5A8A024509C1100DA608C /* Casting+Tracing.swift in Sources */,
61133C662423990D00786299 /* LogSanitizerTests.swift in Sources */,
6114FE23257671F00084E372 /* ConsentAwareDataWriterTests.swift in Sources */,
Expand Down Expand Up @@ -4342,10 +4343,8 @@
611F82032563C66100CB9BDB /* UIKitRUMViewsPredicateTests.swift in Sources */,
61133C652423990D00786299 /* LogBuilderTests.swift in Sources */,
61B5E42B26DFC433000B0A5F /* DDNSURLSessionDelegate+apiTests.m in Sources */,
61D3E0E5277B3D92008BE766 /* KronosClockTests.swift in Sources */,
61F8CC092469295500FE2908 /* DatadogConfigurationBuilderTests.swift in Sources */,
61F1A623249B811200075390 /* Encoding.swift in Sources */,
61D3E0E6277B3D92008BE766 /* KronosDNSResolverTests.swift in Sources */,
6114FE3B25768AA90084E372 /* ConsentProviderTests.swift in Sources */,
61133C642423990D00786299 /* LoggerTests.swift in Sources */,
617B953D24BF4D8F00E6F443 /* RUMMonitorTests.swift in Sources */,
Expand Down Expand Up @@ -4557,6 +4556,7 @@
6147E3B3270486920092BC9F /* TracingConfigurationE2ETests.swift in Sources */,
6185F4AE26FE1956001A7641 /* SpanE2ETests.swift in Sources */,
61216B802667C79B0089DCD1 /* LoggingTrackingConsentE2ETests.swift in Sources */,
61D3E0EA277E0C58008BE766 /* KronosE2ETests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
7 changes: 5 additions & 2 deletions Datadog/E2ETests/E2ETests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,17 @@ class E2ETests: XCTestCase {
// MARK: - Measuring Performance with APM

/// Measures time of execution for given `block` - sends it as a `"perf_measure"` `Span` with a given resource name.
func measure(resourceName: String, _ block: () -> Void) {
@discardableResult
func measure<T>(resourceName: String, _ block: () -> T) -> T {
let start = Date()
block()
let result = block()
let stop = Date()

let performanceSpan = Global.sharedTracer.startRootSpan(operationName: "perf_measure", startTime: start)
performanceSpan.setTag(key: DDTags.resource, value: resourceName)
performanceSpan.finish(at: stop)

return result
}

// MARK: - SDK Lifecycle
Expand Down
195 changes: 195 additions & 0 deletions Datadog/E2ETests/NTP/KronosE2ETests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
/*
* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2019-2020 Datadog, Inc.
*/

@testable import Datadog

class KronosE2ETests: E2ETests {
/// The logger sending additional telemetry on Kronos execution. These logs are available in Mobile Integrations org.
private var logger: Logger! // swiftlint:disable:this implicitly_unwrapped_optional

override func setUp() {
super.setUp()
logger = Logger
.builder
.set(loggerName: "kronos-e2e")
.build()
}

override func tearDown() {
logger = nil
super.tearDown()
}

/// TODO: RUMM-1859: Add E2E tests for monitoring Kronos in nightly tests
func test_kronos_clock_performs_sync_using_datadog_ntp_pool() { // E2E:wip
/// The result of `KronosClock.sync()`.
struct KronosSyncResult {
/// First received server date.
var firstReceivedDate: Date? = nil
/// First received server offset.
var firstReceivedOffset: TimeInterval? = nil
/// Last received server date.
var lastReceivedDate: Date? = nil
/// Last received server offset.
var lastReceivedOffset: TimeInterval? = nil
/// Device date measured at the moment of receiving any server date. Used for additional debugging and comparision.
var measuredDeviceDate = Date()
}

func performKronosSync(using pool: String) -> KronosSyncResult {
KronosClock.reset()
defer { KronosClock.reset() }

// Given
let numberOfSamplesForEachIP = 2 // exchange only 2 samples with each resolved IP - to run test quick

// Each IP (each server) is asked in parallel, but samples are obtained sequentially.
// Here we compute test timeout, to ensure that all (parallel) servers complete querying their (sequential) samples
// below `testTimeout` with assuming +50% margin. This should guarantee no flakiness on test timeout.
let testTimeout = kronosDefaultTimeout * Double(numberOfSamplesForEachIP) * 1.5

// When
let completionExpectation = expectation(description: "KronosClock.sync() calls completion closure")
var result = KronosSyncResult()

KronosClock.sync(
from: pool,
samples: numberOfSamplesForEachIP,
first: { date, offset in // this closure could not be called if all samples to all servers resulted with failure
result.firstReceivedDate = date
result.firstReceivedOffset = offset
result.measuredDeviceDate = Date()
},
completion: { date, offset in // this closure should always be called
result.lastReceivedDate = date
result.lastReceivedOffset = offset
result.measuredDeviceDate = Date()
completionExpectation.fulfill()
}
)

// Then

// We don't expect receiving timeout on `completionExpectation`. Number of samples and individual sample timeout
// is configured in a way that lets `KronosNTPClient` always fulfill the `completionExpectation`.
waitForExpectations(timeout: testTimeout)

return result
}

// Run test for each Datadog NTP pool:
DateCorrector.datadogNTPServers.forEach { ddNTPPool in
let result = measure(resourceName: DD.PerfSpanName.fromCurrentMethodName()) {
performKronosSync(using: ddNTPPool)
}

// Report result for this pool:
if let _ = result.firstReceivedDate, let _ = result.firstReceivedOffset, let serverDate = result.lastReceivedDate, let serverOffset = result.lastReceivedOffset {
// We consider `KronosClock.sync()` result to be consistent only if it has both `first` and `last` time values set.
// We log consistent result as INFO log that can be seen in Mobile Integration org.
logger.info("KronosClock.sync() completed with consistent result for \(ddNTPPool)", attributes: [
"serverOffset_measured": serverDate.timeIntervalSince(result.measuredDeviceDate),
"serverOffset_received": serverOffset,
"serverDate_received": iso8601DateFormatter.string(from: serverDate),
])
} else {
// Inconsistent result may correspond to flaky execution, e.g. if network was unreachable or if **all** NTP calls received timeout.
// We track inconsistent result as WARN log that will be watched by E2E monitor.
logger.warn("KronosClock.sync() completed with inconsistent result for \(ddNTPPool)", attributes: [
"serverDate_firstReceived": result.firstReceivedDate.flatMap { iso8601DateFormatter.string(from: $0) },
"serverDate_lastReceived": result.lastReceivedDate.flatMap { iso8601DateFormatter.string(from: $0) },
"serverOffset_firstReceived": result.firstReceivedOffset,
"serverOffset_lastReceived": result.lastReceivedOffset,
])
Comment on lines +99 to +106
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example WARN log received from here:

Screenshot 2021-12-30 at 19 09 00

}
}
}

/// TODO: RUMM-1859: Add E2E tests for monitoring Kronos in nightly tests
func test_kronos_ntp_client_queries_both_ipv4_and_ipv6_ips() { // E2E:wip
/// The result of `KronosNTPClient.query(pool:)`.
struct KronosNTPClientQueryResult {
/// Partial offsets received for each NTP packet sent to each resolved IP.
var receivedOffsets: [TimeInterval?] = []
/// Expected number of NTP packets to send.
var expectedNumberOfSamples = 0
/// Actual number of NTP packets that completed.
var numberOfCompletedSamples = 0
}

func performKronosNTPClientQuery() -> KronosNTPClientQueryResult {
let testTimeout: TimeInterval = 30

// Given
let pool = "2.datadog.pool.ntp.org" // a pool resolved to multiple IPv4 and IPv6 addresses (e.g. 4 + 4)
let numberOfSamplesForEachIP = 2 // exchange only 2 samples with each resolved IP - to run test quick

// Each IP (each server) is asked in parallel, but samples are obtained sequentially.
// Here we compute individual sample timeout, to ensure that all (parallel) servers complete querying their (sequential) samples
// below `testTimeout` with assuming -30% margin. This should guarantee no flakiness on test timeout.
let timeoutForEachSample = (testTimeout / Double(numberOfSamplesForEachIP)) * 0.7

// When
let completionExpectation = expectation(description: "It completes all samples for all IPs")
var result = KronosNTPClientQueryResult()

KronosNTPClient()
.query(
pool: pool,
numberOfSamples: numberOfSamplesForEachIP,
maximumServers: .max, // query all resolved IPs in the pool - to include both IPv4 and IPv6
timeout: timeoutForEachSample
) { offset, completed, total in
result.receivedOffsets.append(offset)
result.numberOfCompletedSamples = completed
result.expectedNumberOfSamples = total

if completed == total {
completionExpectation.fulfill()
}
}

// Then

// We don't expect receiving timeout on `completionExpectation`. Number of samples and individual sample timeout
// is configured in a way that lets `KronosNTPClient` always fulfill the `completionExpectation`.
// In worst case, it can fulfill it, with recording only `nil` offsets, which will mean receiving timeouts
// or error on all NTP queries.
waitForExpectations(timeout: testTimeout)

return result
}

// Run test:
let result = measure(resourceName: DD.PerfSpanName.fromCurrentMethodName()) {
performKronosNTPClientQuery()
}

// Report result:
if result.receivedOffsets.contains(where: { offset in offset != nil }) {
// We consider `KronosNTPClient.query(pool:)` result to be consistent if it received at least one offset.
let receivedOffsets: [String] = result.receivedOffsets.map { offset in
if let offset = offset {
return "\(offset)"
} else {
return "(nil)"
}
}
logger.info(
"KronosNTPClient.query(pool:) completed with consistent result receiving \(result.numberOfCompletedSamples)/\(result.expectedNumberOfSamples) NTP packets",
attributes: [
"offsets_received": receivedOffsets
]
)
Comment on lines +181 to +186
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example INFO log received from here:

Screenshot 2021-12-30 at 19 09 25

} else {
// Inconsistent result may correspond to flaky execution, e.g. if network was unreachable or if **all** NTP calls received timeout.
// We track inconsistent result as WARN log that will be watched by E2E monitor.
logger.warn(
"KronosNTPClient.query(pool:) completed with inconsistent result receiving \(result.numberOfCompletedSamples)/\(result.expectedNumberOfSamples) NTP packets"
)
}
}
}
8 changes: 4 additions & 4 deletions Sources/Datadog/Kronos/KronosNTPClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import Foundation

private let kDefaultTimeout = 6.0
internal let kronosDefaultTimeout = 6.0
private let kDefaultSamples = 4
private let kMaximumNTPServers = 5
private let kMaximumResultDispersion = 10.0
Expand Down Expand Up @@ -38,7 +38,7 @@ internal final class KronosNTPClient {
port: Int = 123,
numberOfSamples: Int = kDefaultSamples,
maximumServers: Int = kMaximumNTPServers,
timeout: CFTimeInterval = kDefaultTimeout,
timeout: CFTimeInterval = kronosDefaultTimeout,
progress: @escaping (TimeInterval?, Int, Int) -> Void
) {
var servers: [KronosInternetAddress: [KronosNTPPacket]] = [:]
Expand Down Expand Up @@ -89,7 +89,7 @@ internal final class KronosNTPClient {
ip: KronosInternetAddress,
port: Int = 123,
version: Int8 = 3,
timeout: CFTimeInterval = kDefaultTimeout,
timeout: CFTimeInterval = kronosDefaultTimeout,
numberOfSamples: Int = kDefaultSamples,
completion: @escaping (KronosNTPPacket?) -> Void
) {
Expand Down Expand Up @@ -168,7 +168,7 @@ internal final class KronosNTPClient {
if callbackType == .writeCallBack {
var packet = KronosNTPPacket()
let PDU = packet.prepareToSend() as CFData
CFSocketSendData(socket, nil, PDU, kDefaultTimeout)
CFSocketSendData(socket, nil, PDU, kronosDefaultTimeout)
return
}

Expand Down
Loading