From 8d28c6bad09b8739d534766acb4023f6c3c093df Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 15 Apr 2024 15:37:45 -0400 Subject: [PATCH 01/51] feat(tvu): basic tvu/repair pipeline - receive shreds - verify shreds - process shreds (track which have been received) - repair based on shred tracking --- src/cmd/cmd.zig | 19 ++- src/lib.zig | 7 + src/net/packet.zig | 24 +++ src/net/socket_utils.zig | 1 + src/sync/bit_array.zig | 69 +++++++++ src/sync/ref_counter.zig | 77 ++++++++++ src/tvu/repair_message.zig | 10 ++ src/tvu/repair_service.zig | 78 +++++++++- src/tvu/shred.zig | 292 ++++++++++++++++++++++++++++++++++++ src/tvu/shred_processor.zig | 39 +++++ src/tvu/shred_receiver.zig | 108 +++++++++++-- src/tvu/shred_tracker.zig | 266 ++++++++++++++++++++++++++++++++ src/tvu/shred_verifier.zig | 113 ++++++++++++++ src/utils/bitflags.zig | 21 +++ 14 files changed, 1106 insertions(+), 18 deletions(-) create mode 100644 src/sync/bit_array.zig create mode 100644 src/sync/ref_counter.zig create mode 100644 src/tvu/shred.zig create mode 100644 src/tvu/shred_processor.zig create mode 100644 src/tvu/shred_tracker.zig create mode 100644 src/tvu/shred_verifier.zig create mode 100644 src/utils/bitflags.zig diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 7da012907..0e7b49329 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -11,6 +11,7 @@ const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = network.Socket; +const BasicShredTracker = sig.tvu.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GossipService = sig.gossip.GossipService; const IpAddr = sig.net.IpAddr; @@ -22,6 +23,7 @@ const RepairService = sig.tvu.RepairService; const RepairPeerProvider = sig.tvu.RepairPeerProvider; const RepairRequester = sig.tvu.RepairRequester; const ShredReceiver = sig.tvu.ShredReceiver; +const Slot = sig.core.Slot; const SocketAddr = sig.net.SocketAddr; const enumFromName = sig.utils.enumFromName; @@ -318,11 +320,17 @@ fn validator(_: []const []const u8) !void { defer gossip_service.deinit(); var gossip_handle = try spawnGossip(&gossip_service); + const shred_version = sig.tvu.CachedAtomic(u16).init(&gossip_service.my_shred_version); + var repair_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); try repair_socket.bindToPort(repair_port); try repair_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); - var repair_svc = try initRepair(logger, &my_keypair, &exit, rand.random(), &gossip_service, &repair_socket); + var shred_tracker = try sig.tvu.BasicShredTracker.init(gpa_allocator, @intCast(test_repair_option.value.int orelse 0)); + const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); + const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); + + var repair_svc = try initRepair(logger, &my_keypair, &exit, rand.random(), &gossip_service, &repair_socket, &shred_tracker); defer repair_svc.deinit(); var repair_handle = try std.Thread.spawn(.{}, RepairService.run, .{&repair_svc}); @@ -332,8 +340,15 @@ fn validator(_: []const []const u8) !void { .exit = &exit, .logger = logger, .socket = &repair_socket, + .outgoing_shred_channel = unverified_shreds_channel, + .shred_version = shred_version, }; + var shred_receive_handle = try std.Thread.spawn(.{}, ShredReceiver.run, .{&shred_receiver}); + var verify_shreds_handle = try std.Thread.spawn(.{}, sig.tvu.runShredSigVerify, .{ &exit, unverified_shreds_channel, verified_shreds_channel }); + var process_shreds_handle = try std.Thread.spawn(.{}, sig.tvu.processShreds, .{ gpa_allocator, logger, verified_shreds_channel, &shred_tracker }); + _ = process_shreds_handle; + _ = verify_shreds_handle; gossip_handle.join(); repair_handle.join(); @@ -378,6 +393,7 @@ fn initRepair( random: Random, gossip_service: *GossipService, socket: *Socket, + shred_tracker: *sig.tvu.BasicShredTracker, ) !RepairService { var peer_provider = try RepairPeerProvider.init( gpa_allocator, @@ -396,6 +412,7 @@ fn initRepair( .logger = logger, }, .peer_provider = peer_provider, + .shred_tracker = shred_tracker, .logger = logger, .exit = exit, .slot_to_request = if (test_repair_option.value.int) |n| @intCast(n) else null, diff --git a/src/lib.zig b/src/lib.zig index bae111b2f..296c3428b 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -49,15 +49,18 @@ pub const version = struct { }; pub const sync = struct { + pub usingnamespace @import("sync/bit_array.zig"); pub usingnamespace @import("sync/channel.zig"); pub usingnamespace @import("sync/mpmc.zig"); pub usingnamespace @import("sync/ref.zig"); + pub usingnamespace @import("sync/ref_counter.zig"); pub usingnamespace @import("sync/mux.zig"); pub usingnamespace @import("sync/once_cell.zig"); pub usingnamespace @import("sync/thread_pool.zig"); }; pub const utils = struct { + pub usingnamespace @import("utils/bitflags.zig"); pub usingnamespace @import("utils/shortvec.zig"); pub usingnamespace @import("utils/types.zig"); pub usingnamespace @import("utils/varint.zig"); @@ -103,4 +106,8 @@ pub const tvu = struct { pub usingnamespace @import("tvu/repair_message.zig"); pub usingnamespace @import("tvu/repair_service.zig"); pub usingnamespace @import("tvu/shred_receiver.zig"); + pub usingnamespace @import("tvu/shred_verifier.zig"); + pub usingnamespace @import("tvu/shred.zig"); + pub usingnamespace @import("tvu/shred_tracker.zig"); + pub usingnamespace @import("tvu/shred_processor.zig"); }; diff --git a/src/net/packet.zig b/src/net/packet.zig index f096dcaa7..cdc22adfa 100644 --- a/src/net/packet.zig +++ b/src/net/packet.zig @@ -10,6 +10,7 @@ pub const Packet = struct { data: [PACKET_DATA_SIZE]u8, size: usize, addr: network.EndPoint, + flags: u8 = 0, const Self = @This(); @@ -28,4 +29,27 @@ pub const Packet = struct { .size = 0, }; } + + pub fn set(self: *Self, flag: Flag) void { + self.flags |= @intFromEnum(flag); + } + + pub fn isSet(self: *const Self, flag: Flag) bool { + return self.flags & @intFromEnum(flag) == @intFromEnum(flag); + } +}; + +/// TODO this violates separation of concerns. it's unusual for network-specific +/// type definitions to include information that's specific to application +/// components (like repair) +/// +/// it would be nice to find another approach that is equally easy to use, +/// without sacrificing safety, performance, or readability. +pub const Flag = enum(u8) { + discard = 0b0000_0001, + // forwarded = 0b0000_0010, + repair = 0b0000_0100, + // simple_vote_tx = 0b0000_1000, + // tracer_packet = 0b0001_0000, + // round_compute_unit_price = 0b0010_0000, }; diff --git a/src/net/socket_utils.zig b/src/net/socket_utils.zig index 10ab67b46..7db0116df 100644 --- a/src/net/socket_utils.zig +++ b/src/net/socket_utils.zig @@ -96,6 +96,7 @@ pub fn recvMmsg( } packet.addr = recv_meta.sender; packet.size = bytes_read; + packet.flags = 0; if (count == 0) { // nonblocking mode diff --git a/src/sync/bit_array.zig b/src/sync/bit_array.zig new file mode 100644 index 000000000..287bb3227 --- /dev/null +++ b/src/sync/bit_array.zig @@ -0,0 +1,69 @@ +const std = @import("std"); + +const Atomic = std.atomic.Atomic; +const Ordering = std.atomic.Ordering; + +pub fn AtomicBitArray(comptime size: usize) type { + const num_bytes = (size + 7) / 8; + return struct { + bytes: [num_bytes]Atomic(u8) = .{.{ .value = 0 }} ** num_bytes, + + pub const len = size; + + const Self = @This(); + + pub fn get(self: *Self, index: usize, comptime ordering: Ordering) !bool { + if (index >= size) return error.OutOfBounds; + const bitmask = mask(index); + return self.bytes[index / 8].load(ordering) & bitmask == bitmask; + } + + pub fn set(self: *Self, index: usize, comptime ordering: Ordering) !void { + if (index >= size) return error.OutOfBounds; + _ = self.bytes[index / 8].fetchOr(mask(index), ordering); + } + + pub fn unset(self: *Self, index: usize, comptime ordering: Ordering) !void { + if (index >= size) return error.OutOfBounds; + _ = self.bytes[index / 8].fetchAnd(~mask(index), ordering); + } + + fn mask(index: usize) u8 { + return @as(u8, 1) << @intCast(index % 8); + } + }; +} + +test "sync.bit_array" { + var x = AtomicBitArray(3){}; + try std.testing.expect(!try x.get(0, .Monotonic)); + try std.testing.expect(!try x.get(1, .Monotonic)); + try std.testing.expect(!try x.get(2, .Monotonic)); + + try x.set(1, .Monotonic); + + try std.testing.expect(!try x.get(0, .Monotonic)); + try std.testing.expect(try x.get(1, .Monotonic)); + try std.testing.expect(!try x.get(2, .Monotonic)); + + try x.set(0, .Monotonic); + try x.set(1, .Monotonic); + try x.set(2, .Monotonic); + + try std.testing.expect(try x.get(0, .Monotonic)); + try std.testing.expect(try x.get(1, .Monotonic)); + try std.testing.expect(try x.get(2, .Monotonic)); + + try x.unset(2, .Monotonic); + try x.unset(1, .Monotonic); + try x.unset(2, .Monotonic); + + try std.testing.expect(try x.get(0, .Monotonic)); + try std.testing.expect(!try x.get(1, .Monotonic)); + try std.testing.expect(!try x.get(2, .Monotonic)); + + if (x.get(3, .Monotonic)) |_| @panic("") else |_| {} + if (x.set(3, .Monotonic)) |_| @panic("") else |_| {} + if (x.unset(3, .Monotonic)) |_| @panic("") else |_| {} + if (x.get(3, .Monotonic)) |_| @panic("") else |_| {} +} diff --git a/src/sync/ref_counter.zig b/src/sync/ref_counter.zig new file mode 100644 index 000000000..5fb83e7d3 --- /dev/null +++ b/src/sync/ref_counter.zig @@ -0,0 +1,77 @@ +const std = @import("std"); + +const Atomic = std.atomic.Atomic; + +/// Thread-safe counter to track the lifetime of a shared resource. +/// This does not manage the resource directly. It is just a tool +/// that can be used by multiple contexts to communicate with each +/// other about the lifetime of a shared resource. +/// +/// This can be used to determine whether a resource: +/// - is still alive and safe to use. +/// - is safe to deinitialize. +/// +/// Initializes with refs = 1, assuming there is currently exactly +/// one valid reference, which will need `release` called when it +/// is no longer in use. Call `acquire` to register additional +/// references beyond the first. +pub const ReferenceCounter = struct { + state: Atomic(u64) = Atomic(u64).init(@bitCast(State{ .refs = 1 })), + + const State = packed struct { + /// While the resource is still alive, this is the number of active references. + /// After the resource dies, this value no longer has the same meaning. + /// Check `refs == acquirers` to see if the resource is dead. + refs: i32 = 0, + /// Number of threads currently in the process of attempting to acquire the resource. + acquirers: i32 = 0, + }; + + const Self = @This(); + + /// Acquire access to the shared resource in a new context. + /// Call `release` when you are done using the resource in this context. + /// + /// If successfully acquired, the resource will be safe + /// to use until you call `release` in the same context. + /// + /// Returns: + /// - true: access granted, counter has incremented + /// - false: access denied, already destroyed + pub fn acquire(self: *Self) bool { + const prior: State = @bitCast(self.state.fetchAdd( + @bitCast(State{ .acquirers = 1, .refs = 1 }), + .Acquire, + )); + if (prior.refs > prior.acquirers) { + _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1 }), .Monotonic); + return true; + } + // resource was already destroyed + _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1, .refs = 1 }), .Monotonic); + return false; + } + + /// Release a reference from a context where it is no longer in use. + /// + /// Returns: + /// - true: this was the last reference. you should now destroy the resource. + /// - false: there are still more references. don't do anything. + pub fn release(self: *Self) bool { + const prior: State = @bitCast(self.state.fetchSub(@bitCast(State{ .refs = 1 }), .AcqRel)); + // if this fails, the resource is already dead (analogous to double-free) + std.debug.assert(prior.refs > prior.acquirers); + return prior.refs == 1; + } +}; + +test "sync.ref_counter: ReferenceCounter works" { + var x = ReferenceCounter{}; + try std.testing.expect(x.acquire()); + try std.testing.expect(x.acquire()); + try std.testing.expect(x.acquire()); + try std.testing.expect(!x.release()); + try std.testing.expect(!x.release()); + try std.testing.expect(!x.release()); + try std.testing.expect(x.release()); +} diff --git a/src/tvu/repair_message.zig b/src/tvu/repair_message.zig index b4a16c31a..4e8d8561f 100644 --- a/src/tvu/repair_message.zig +++ b/src/tvu/repair_message.zig @@ -29,6 +29,16 @@ pub const RepairRequest = union(enum) { HighestShred: struct { Slot, u64 }, /// Requesting the missing shred at a particular index Shred: struct { Slot, u64 }, + + const Self = @This(); + + pub fn slot(self: *const Self) Slot { + return switch (self.*) { + .Orphan => |x| x, + .HighestShred => |x| x[0], + .Shred => |x| x[0], + }; + } }; /// Executes all three because they are tightly coupled: diff --git a/src/tvu/repair_service.zig b/src/tvu/repair_service.zig index 51f94bb5e..c02ec6ecb 100644 --- a/src/tvu/repair_service.zig +++ b/src/tvu/repair_service.zig @@ -3,11 +3,13 @@ const zig_network = @import("zig-network"); const sig = @import("../lib.zig"); const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; const Atomic = std.atomic.Atomic; const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = zig_network.Socket; +const BasicShredTracker = sig.tvu.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GossipTable = sig.gossip.GossipTable; const Logger = sig.trace.Logger; @@ -28,6 +30,7 @@ pub const RepairService = struct { allocator: Allocator, requester: RepairRequester, peer_provider: RepairPeerProvider, + shred_tracker: *BasicShredTracker, logger: Logger, exit: *Atomic(bool), slot_to_request: ?u64, @@ -36,18 +39,32 @@ pub const RepairService = struct { self.peer_provider.deinit(); } + /// Start the long-running service and block until it exits. pub fn run(self: *@This()) !void { self.logger.info("starting repair service"); defer self.logger.info("exiting repair service"); while (!self.exit.load(.Unordered)) { - if (try self.initialSnapshotRepair()) |request| { - try self.requester.sendRepairRequest(request); - } - // TODO repair logic + try self.sendNecessaryRepairs(); + // TODO sleep? std.time.sleep(100_000_000); } } + /// Identifies which repairs are needed based on the current state, + /// and sends those repairs, then returns. + fn sendNecessaryRepairs(self: *@This()) !void { + // if (try self.initialSnapshotRepair()) |request| { + // try self.requester.sendRepairRequest(request); + // } + const repair_requests = try self.getRepairs(); + defer repair_requests.deinit(); + const addressed_requests = try self.assignRequestsToPeers(repair_requests.items); + defer addressed_requests.deinit(); + for (addressed_requests.items) |addressed_request| { + try self.requester.sendRepairRequest(addressed_request); + } + } + fn initialSnapshotRepair(self: *@This()) !?AddressedRepairRequest { if (self.slot_to_request == null) return null; const request: RepairRequest = .{ .HighestShred = .{ self.slot_to_request.?, 0 } }; @@ -61,6 +78,46 @@ pub const RepairService = struct { return null; } } + + fn getRepairs(self: *@This()) !ArrayList(RepairRequest) { + const all_missing = try self.shred_tracker.identifyMissing(self.allocator); + defer all_missing.deinit(); + var repairs = ArrayList(RepairRequest).init(self.allocator); + var individual_count: usize = 0; + for (all_missing.reports.items) |report| { + const slot = report.slot; + for (report.missing_shreds.items) |shred_window| { + if (shred_window.end) |end| { + for (shred_window.start..end) |i| { + if (individual_count > 500) break; + individual_count += 1; + try repairs.append(.{ .Shred = .{ slot, i } }); + } + } else { + try repairs.append(.{ .HighestShred = .{ slot, shred_window.start } }); + } + } + } + return repairs; + } + + fn assignRequestsToPeers( + self: *@This(), + requests: []const RepairRequest, + ) !ArrayList(AddressedRepairRequest) { + var addressed = ArrayList(AddressedRepairRequest).init(self.allocator); + for (requests) |request| { + if (try self.peer_provider.getRandomPeer(request.slot())) |peer| { + try addressed.append(.{ + .request = request, + .recipient = peer.pubkey, + .recipient_addr = peer.serve_repair_socket, + }); + } + // TODO do something if a peer is not found + } + return addressed; + } }; /// Signs and serializes repair requests. Sends them over the network. @@ -71,6 +128,7 @@ pub const RepairRequester = struct { udp_send_socket: *Socket, logger: Logger, + /// TODO: send batch pub fn sendRepairRequest( self: *const @This(), request: AddressedRepairRequest, @@ -86,10 +144,11 @@ pub const RepairRequester = struct { ); defer self.allocator.free(data); const addr = request.recipient_addr.toString(); - self.logger.infof( - "sending repair request to {s} - {}", - .{ addr[0][0..addr[1]], request.request }, - ); + _ = addr; + // self.logger.infof( + // "sending repair request to {s} - {}", + // .{ addr[0][0..addr[1]], request.request }, + // ); _ = try self.udp_send_socket.sendTo(request.recipient_addr.toEndpoint(), data); } }; @@ -279,6 +338,8 @@ test "tvu.repair_service: RepairService sends repair request to gossip peer" { Pubkey.fromPublicKey(&keypair.public_key), &my_shred_version, ); + var tracker = try BasicShredTracker.init(allocator, 13579); + defer tracker.deinit(); var service = RepairService{ .allocator = allocator, .requester = RepairRequester{ @@ -292,6 +353,7 @@ test "tvu.repair_service: RepairService sends repair request to gossip peer" { .logger = logger, .exit = &exit, .slot_to_request = 13579, + .shred_tracker = &tracker, }; defer service.deinit(); diff --git a/src/tvu/shred.zig b/src/tvu/shred.zig new file mode 100644 index 000000000..f4e1050b6 --- /dev/null +++ b/src/tvu/shred.zig @@ -0,0 +1,292 @@ +const std = @import("std"); +const sig = @import("../lib.zig"); + +const bincode = sig.bincode; + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; + +const BitFlags = sig.utils.BitFlags; +const Hash = sig.core.Hash; +const Nonce = sig.core.Nonce; +const Packet = sig.net.Packet; +const Signature = sig.core.Signature; +const Slot = sig.core.Slot; + +const SIGNATURE_LENGTH = sig.core.SIGNATURE_LENGTH; + +pub const MAX_DATA_SHREDS_PER_SLOT: usize = 32_768; +pub const MAX_CODE_SHREDS_PER_SLOT: usize = MAX_DATA_SHREDS_PER_SLOT; +pub const MAX_SHREDS_PER_SLOT: usize = MAX_CODE_SHREDS_PER_SLOT + MAX_DATA_SHREDS_PER_SLOT; + +pub const Shred = union(enum) { + code: ShredCode, + data: ShredData, + + const Self = @This(); + + pub fn fromPayload(allocator: Allocator, shred: []const u8) !Self { + const variant = shred_layout.getShredVariant(shred) orelse return error.uygugj; + return switch (variant.shred_type) { + .Code => .{ .code = try ShredCode.fromPayload(allocator, shred) }, + .Data => .{ .data = try ShredData.fromPayload(allocator, shred) }, + }; + } + + pub fn isLastInSlot(self: *const Self) bool { + return switch (self.*) { + .code => false, + .data => |data| data.isLastInSlot(), + }; + } +}; + +pub const ShredData = struct { + common_header: ShredCommonHeader, + custom_header: DataShredHeader, + payload: ArrayList(u8), + + const SIZE_OF_PAYLOAD: usize = 1203; // TODO this can be calculated like solana + + const Self = @This(); + + pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { + return try someShredFromPayload(Self, DataShredHeader, SIZE_OF_PAYLOAD, allocator, payload); + } + + pub fn isLastInSlot(self: *const Self) bool { + return self.custom_header.flags.isSet(.last_shred_in_slot); + } + + fn sanitize(self: *const Self) !void { + _ = self; + // TODO + } +}; + +pub const ShredCode = struct { + common_header: ShredCommonHeader, + custom_header: CodingShredHeader, + payload: ArrayList(u8), + + const SIZE_OF_PAYLOAD: usize = 1228; // TODO this can be calculated like solana + + const Self = @This(); + + pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { + return try someShredFromPayload(Self, CodingShredHeader, SIZE_OF_PAYLOAD, allocator, payload); + } + + fn sanitize(self: *const Self) !void { + _ = self; + // TODO + } +}; + +fn someShredFromPayload( + comptime Self: type, + comptime Header: type, + comptime SIZE_OF_PAYLOAD: usize, + allocator: Allocator, + payload: []const u8, +) !Self { + if (payload.len < SIZE_OF_PAYLOAD) { + return error.InvalidPayloadSize; + } + const exact_payload = payload[0..SIZE_OF_PAYLOAD]; + var buf = std.io.fixedBufferStream(exact_payload); + const common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}); + const custom_header = try bincode.read(allocator, Header, buf.reader(), .{}); + var owned_payload = ArrayList(u8).init(allocator); + try owned_payload.appendSlice(exact_payload); // TODO this is expensive, but necessary, unless packet allocations are changed + var self = Self{ + .common_header = common_header, + .custom_header = custom_header, + .payload = owned_payload, + }; + try self.sanitize(); + return self; +} + +pub const ShredCommonHeader = struct { + signature: Signature, + shred_variant: ShredVariant, + slot: Slot, + index: u32, + version: u16, + fec_set_index: u32, + + pub const @"!bincode-config:shred_variant" = ShredVariantConfig; +}; + +pub const DataShredHeader = struct { + parent_offset: u16, + flags: ShredFlags, + size: u16, // common shred header + data shred header + data +}; + +pub const CodingShredHeader = struct { + num_data_shreds: u16, + num_coding_shreds: u16, + position: u16, // [0..num_coding_shreds) +}; + +pub const ShredType = enum(u8) { + Code = 0b0101_1010, + Data = 0b1010_0101, +}; + +pub const ShredVariant = struct { + shred_type: ShredType, + proof_size: u8, + chained: bool, + resigned: bool, + + fn fromByte(byte: u8) error{ UnknownShredVariant, LegacyShredVariant }!@This() { + return switch (byte & 0xF0) { + 0x40 => .{ + .shred_type = .Code, + .proof_size = byte & 0x0F, + .chained = false, + .resigned = false, + }, + 0x60 => .{ + .shred_type = .Code, + .proof_size = byte & 0x0F, + .chained = true, + .resigned = false, + }, + 0x70 => .{ + .shred_type = .Code, + .proof_size = byte & 0x0F, + .chained = true, + .resigned = true, + }, + 0x80 => .{ + .shred_type = .Data, + .proof_size = byte & 0x0F, + .chained = false, + .resigned = false, + }, + 0x90 => .{ + .shred_type = .Data, + .proof_size = byte & 0x0F, + .chained = true, + .resigned = false, + }, + 0xb0 => .{ + .shred_type = .Data, + .proof_size = byte & 0x0F, + .chained = true, + .resigned = true, + }, + @intFromEnum(ShredType.Code) => error.LegacyShredVariant, + @intFromEnum(ShredType.Data) => error.LegacyShredVariant, + else => error.UnknownShredVariant, + }; + } +}; + +pub const ShredVariantConfig = blk: { + const S = struct { + pub fn serialize(_: anytype, _: anytype, _: bincode.Params) !void { + @panic("todo"); + } + + pub fn deserialize(_: ?std.mem.Allocator, reader: anytype, _: bincode.Params) !ShredVariant { + return try ShredVariant.fromByte(try reader.readByte()); + } + + pub fn free(_: std.mem.Allocator, _: anytype) void {} + }; + + break :blk bincode.FieldConfig(ShredVariant){ + .serializer = S.serialize, + .deserializer = S.deserialize, + .free = S.free, + }; +}; + +pub const ShredFlags = BitFlags(enum(u8) { + shred_tick_reference_mask = 0b0011_1111, + data_complete_shred = 0b0100_0000, + last_shred_in_slot = 0b1100_0000, +}); + +pub const shred_layout = struct { + const SIZE_OF_COMMON_SHRED_HEADER: usize = 83; + const SIZE_OF_DATA_SHRED_HEADERS: usize = 88; + const SIZE_OF_CODING_SHRED_HEADERS: usize = 89; + const SIZE_OF_SIGNATURE: usize = sig.core.SIGNATURE_LENGTH; + const SIZE_OF_SHRED_VARIANT: usize = 1; + const SIZE_OF_SHRED_SLOT: usize = 8; + + const OFFSET_OF_SHRED_VARIANT: usize = SIZE_OF_SIGNATURE; + const OFFSET_OF_SHRED_SLOT: usize = SIZE_OF_SIGNATURE + SIZE_OF_SHRED_VARIANT; + const OFFSET_OF_SHRED_INDEX: usize = OFFSET_OF_SHRED_SLOT + SIZE_OF_SHRED_SLOT; + + pub fn getShred(packet: *const Packet) ?[]const u8 { + if (getShredSize(packet) > packet.data.len) return null; + return packet.data[0..getShredSize(packet)]; + } + + pub fn getShredSize(packet: *const Packet) usize { + return if (packet.isSet(.repair)) + packet.size -| @sizeOf(Nonce) + else + packet.size; + } + + pub fn getSlot(shred: []const u8) ?Slot { + return getInt(Slot, shred, OFFSET_OF_SHRED_SLOT); + } + + pub fn getVersion(shred: []const u8) ?u16 { + return getInt(u16, shred, 77); + } + + pub fn getShredVariant(shred: []const u8) ?ShredVariant { + if (shred.len <= OFFSET_OF_SHRED_VARIANT) return null; + const byte = shred[OFFSET_OF_SHRED_VARIANT]; + return ShredVariant.fromByte(byte) catch null; + } + + pub fn getIndex(shred: []const u8) ?u32 { + return getInt(u32, shred, OFFSET_OF_SHRED_INDEX); + } + + pub fn getSignature(shred: []const u8) ?Signature { + if (shred.len < SIGNATURE_LENGTH) { + return null; + } + return Signature.init(shred[0..SIZE_OF_SIGNATURE].*); + } + + pub fn getSignedData(shred: []const u8) ?Hash { + const variant = getShredVariant(shred) orelse return null; + _ = variant; + // TODO implement this once the leader schedule is available to runShredSigVerify + return Hash.default(); + } + + /// must be a data shred, otherwise the return value will be corrupted and meaningless + pub fn getParentOffset(shred: []const u8) ?u16 { + std.debug.assert(getShredVariant(shred).?.shred_type == .Data); + return getInt(u16, shred, 83); + } + + /// Extracts a little-endian integer from within the slice, + /// starting at start_index. + fn getInt( + comptime Int: type, + data: []const u8, + start_index: usize, + ) ?Int { + const end_index = start_index + @sizeOf(Int); + if (data.len < end_index) return null; + const bytes: *const [@sizeOf(Int)]u8 = @ptrCast(data[start_index..end_index]); + return std.mem.readInt(Int, bytes, .Little); + } +}; + +//new_from_serialized_shred diff --git a/src/tvu/shred_processor.zig b/src/tvu/shred_processor.zig new file mode 100644 index 000000000..6c656a3b2 --- /dev/null +++ b/src/tvu/shred_processor.zig @@ -0,0 +1,39 @@ +const std = @import("std"); +const sig = @import("../lib.zig"); + +const layout = sig.tvu.shred_layout; + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; + +const BasicShredTracker = sig.tvu.BasicShredTracker; +const Channel = sig.sync.Channel; +const Logger = sig.trace.Logger; +const Packet = sig.net.Packet; +const Shred = sig.tvu.Shred; + +/// analogous to `WindowService` +pub fn processShreds( + allocator: Allocator, + logger: Logger, + verified_shreds: *Channel(ArrayList(Packet)), + tracker: *BasicShredTracker, +) !void { + _ = logger; + // TODO unreachables + while (verified_shreds.receive()) |packet_batch| { + for (packet_batch.items) |*packet| if (!packet.isSet(.discard)) { + const shred_payload = layout.getShred(packet) orelse unreachable; + const slot = layout.getSlot(shred_payload) orelse unreachable; + const index = layout.getIndex(shred_payload) orelse unreachable; + tracker.registerShred(slot, index) catch |e| { + if (e != error.SlotUnderflow) return e; + continue; + }; + const shred = try Shred.fromPayload(allocator, shred_payload); + if (shred.isLastInSlot()) { + try tracker.setLastShred(slot, index); + } + }; + } +} diff --git a/src/tvu/shred_receiver.zig b/src/tvu/shred_receiver.zig index 7efe3d116..45df15500 100644 --- a/src/tvu/shred_receiver.zig +++ b/src/tvu/shred_receiver.zig @@ -3,6 +3,7 @@ const sig = @import("../lib.zig"); const network = @import("zig-network"); const bincode = sig.bincode; +const layout = sig.tvu.shred_layout; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; @@ -16,8 +17,38 @@ const Packet = sig.net.Packet; const Ping = sig.gossip.Ping; const Pong = sig.gossip.Pong; const RepairMessage = sig.tvu.RepairMessage; +const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; +/// Use this in a single thread where you want to keep accessing +/// a value that's stored in an atomic, but you don't want to do +/// an expensive `load` operation every time you read it, and +/// you're fine with reading a slightly stale value each time. +/// +/// Periodically call `update` to refresh the value. +/// +/// The `cache` field and `update` methods are NOT thread safe. +/// Do not read the `cache` while executing `update` +pub fn CachedAtomic(comptime T: type) type { + return struct { + atomic: *Atomic(T), + cache: T, + + const Self = @This(); + + pub fn init(atomic: *Atomic(T)) Self { + return .{ + .atomic = atomic, + .cache = atomic.load(.Monotonic), + }; + } + + pub fn update(self: *Self) void { + self.cache = self.atomic.load(.Monotonic); + } + }; +} + /// Analogous to `ShredFetchStage` pub const ShredReceiver = struct { allocator: Allocator, @@ -25,6 +56,8 @@ pub const ShredReceiver = struct { exit: *Atomic(bool), logger: Logger, socket: *Socket, + outgoing_shred_channel: *Channel(ArrayList(Packet)), + shred_version: CachedAtomic(u16), const Self = @This(); @@ -52,27 +85,39 @@ pub const ShredReceiver = struct { while (!self.exit.load(.Unordered)) { var responses = ArrayList(Packet).init(self.allocator); if (try receiver.try_drain()) |batches| { - for (batches) |batch| for (batch.items) |*packet| { - try self.handlePacket(packet, &responses); - }; + for (batches) |batch| { + for (batch.items) |*packet| { + try self.handlePacket(packet, &responses); + } + try self.outgoing_shred_channel.send(batch); + } if (responses.items.len > 0) { try sender.send(responses); } } else { std.time.sleep(10_000_000); } + self.shred_version.update(); } } /// Handle a single packet and return - fn handlePacket(self: *Self, packet: *const Packet, responses: *ArrayList(Packet)) !void { + fn handlePacket(self: *Self, packet: *Packet, responses: *ArrayList(Packet)) !void { if (packet.size == REPAIR_RESPONSE_SERIALIZED_PING_BYTES) { try self.handlePing(packet, responses); + packet.set(.discard); } else { const endpoint_str = try sig.net.endpointToString(self.allocator, &packet.addr); defer endpoint_str.deinit(); - self.logger.field("from_endpoint", endpoint_str.items) - .infof("tvu: recv unknown shred message: {} bytes", .{packet.size}); + // self.logger.field("from_endpoint", endpoint_str.items) + // .debugf("tvu: recv shred message: {} bytes", .{packet.size}); + + // TODO figure out these values + const root = 0; + const max_slot = std.math.maxInt(Slot); + if (shouldDiscardShred(packet, root, self.shred_version.cache, max_slot)) { + packet.set(.discard); + } } } @@ -96,12 +141,57 @@ pub const ShredReceiver = struct { const endpoint_str = try sig.net.endpointToString(self.allocator, &packet.addr); defer endpoint_str.deinit(); - self.logger.field("from_endpoint", endpoint_str.items) - .field("from_pubkey", &ping.from.string()) - .info("tvu: recv repair ping"); + // self.logger.field("from_endpoint", endpoint_str.items) + // .field("from_pubkey", &ping.from.string()) + // .info("tvu: recv repair ping"); } }; +fn shouldDiscardShred( + packet: *const Packet, + root: Slot, + shred_version: u16, + max_slot: Slot, +) bool { + const shred = layout.getShred(packet) orelse return true; + const version = layout.getVersion(shred) orelse return true; + const slot = layout.getSlot(shred) orelse return true; + const index = layout.getIndex(shred) orelse return true; + const variant = layout.getShredVariant(shred) orelse return true; + + if (version != shred_version) return true; + if (slot > max_slot) return true; + switch (variant.shred_type) { + .Code => { + if (index >= sig.tvu.MAX_CODE_SHREDS_PER_SLOT) return true; + if (slot <= root) return true; + }, + .Data => { + if (index >= sig.tvu.MAX_DATA_SHREDS_PER_SLOT) return true; + const parent_offset = layout.getParentOffset(shred) orelse return true; + const parent = slot -| @as(Slot, @intCast(parent_offset)); + if (!verifyShredSlots(slot, parent, root)) return true; + }, + } + + // TODO: should we check for enable_chained_merkle_shreds? + + _ = layout.getSignature(shred) orelse return true; + _ = layout.getSignedData(shred) orelse return true; + + return false; +} + +/// TODO: this may need to move to blockstore +fn verifyShredSlots(slot: Slot, parent: Slot, root: Slot) bool { + if (slot == 0 and parent == 0 and root == 0) { + return true; // valid write to slot zero. + } + // Ignore shreds that chain to slots before the root, + // or have invalid parent >= slot. + return root <= parent and parent < slot; +} + const REPAIR_RESPONSE_SERIALIZED_PING_BYTES = 132; const RepairPing = union(enum) { Ping: Ping }; diff --git a/src/tvu/shred_tracker.zig b/src/tvu/shred_tracker.zig new file mode 100644 index 000000000..f2c2337c3 --- /dev/null +++ b/src/tvu/shred_tracker.zig @@ -0,0 +1,266 @@ +const std = @import("std"); +const sig = @import("../lib.zig"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Atomic = std.atomic.Atomic; +const Ordering = std.atomic.Ordering; +const DefaultRwLock = std.Thread.RwLock.DefaultRwLock; +const Mutex = std.Thread.Mutex; + +const AtomicBitArray = sig.sync.AtomicBitArray; +const ReferenceCounter = sig.sync.ReferenceCounter; +const Slot = sig.core.Slot; + +pub const MAX_SHREDS_PER_SLOT: usize = sig.tvu.MAX_SHREDS_PER_SLOT; + +/// Naively tracks which shreds have been received, so we can request missing shreds. +/// Has no awareness of forking. +/// Placeholder until more sophisticated Blockstore and RepairWeights implementation. +/// +/// This struct is thread safe. Public methods can be called from anywhere at any time. +pub const BasicShredTracker = struct { + allocator: Allocator, + + /// prevents multiple threads from executing a rotation simultaneously + rotation_lock: Mutex = Mutex{}, + + /// The starting slot when this is first created, when the shard_counter = 0 + /// never changes + start_slot: Slot, + + /// The lowest slot currently tracked + first_slot: Atomic(Slot), + /// The highest slot currently tracked + last_slot: Atomic(Slot), + + slots: [num_slots]Atomic(*MonitoredSlot), + + good_until: Atomic(Slot), + max_slot_seen: Atomic(Slot), + + const num_slots: usize = 128; + + const Self = @This(); + + pub fn init(allocator: Allocator, slot: Slot) !Self { + var slots: [num_slots]Atomic(*MonitoredSlot) = undefined; + for (&slots) |*s| s.* = .{ .value = try MonitoredSlot.init(allocator) }; + // TODO is this off by one? + return .{ + .allocator = allocator, + .start_slot = slot, + .good_until = Atomic(Slot).init(slot), + .max_slot_seen = Atomic(Slot).init(slot), + .first_slot = Atomic(Slot).init(slot), + .last_slot = Atomic(Slot).init(slot + num_slots - 1), + .slots = slots, + }; + } + + pub fn deinit(self: *Self) void { + for (self.slots) |s| s.load(.Monotonic).release(); + } + + pub fn registerShred( + self: *Self, + slot: Slot, + shred_index: u64, + ) !void { + try self.rotate(); + _ = self.max_slot_seen.fetchMax(slot, .Monotonic); + const monitored_slot = try self.getSlot(slot); + defer monitored_slot.release(); + try monitored_slot.record(shred_index); + } + + // TODO make use of this + pub fn setLastShred(self: *Self, slot: Slot, index: usize) !void { + const monitored_slot = try self.getSlot(slot); + defer monitored_slot.release(); + monitored_slot.setLastShred(index); + } + + pub fn identifyMissing(self: *Self, allocator: Allocator) !MultiSlotReport { + var found_bad = false; + var slot_reports = ArrayList(SlotReport).init(allocator); + const max_slot_seen = self.max_slot_seen.load(.Monotonic); + for (self.good_until.load(.Monotonic)..max_slot_seen + 1) |slot| { + const monitored_slot = try self.getSlot(slot); + defer monitored_slot.release(); + const missing_shreds = try monitored_slot.identifyMissing(allocator); + if (missing_shreds.items.len > 0) { + found_bad = true; + try slot_reports.append(.{ .slot = slot, .missing_shreds = missing_shreds }); + } + if (!found_bad) { + const old = self.good_until.fetchMax(slot, .Monotonic); + if (old != slot) { + // TODO remove this + std.debug.print("finished slot: {}\n", .{old}); + } + } + } + var last_one = ArrayList(Range).init(allocator); + try last_one.append(.{ .start = 0, .end = null }); + try slot_reports.append(.{ .slot = max_slot_seen + 1, .missing_shreds = last_one }); + return .{ .reports = slot_reports }; + } + + fn getSlot(self: *Self, slot: Slot) error{ SlotUnderflow, SlotOverflow }!*MonitoredSlot { + const slot_index = (slot - self.start_slot) % num_slots; + if (slot > self.last_slot.load(.Acquire)) { + return error.SlotOverflow; + } + const the_slot = self.slots[slot_index].load(.Acquire); + if (slot < self.first_slot.load(.Monotonic)) { + return error.SlotUnderflow; + } + return the_slot.acquire() catch { + return error.SlotUnderflow; + }; + } + + fn rotate(self: *Self) !void { + if (!self.rotation_lock.tryLock()) return; + defer self.rotation_lock.unlock(); + + const good_until = self.good_until.load(.Monotonic); + for (self.first_slot.load(.Monotonic)..self.last_slot.load(.Monotonic)) |slot_num| { + var slot = &self.slots[slot_num % num_slots]; + if (good_until <= slot_num) { // TODO off by one? + break; + } + _ = self.first_slot.fetchAdd(1, .Monotonic); + const new_slot = try MonitoredSlot.init(self.allocator); + slot.swap(new_slot, .Monotonic).release(); + _ = self.last_slot.fetchAdd(1, .Monotonic); + } + } +}; + +pub const MultiSlotReport = struct { + reports: ArrayList(SlotReport), + + pub fn deinit(self: @This()) void { + for (self.reports.items) |report| { + report.missing_shreds.deinit(); + } + self.reports.deinit(); + } +}; + +pub const SlotReport = struct { + slot: Slot, + missing_shreds: ArrayList(Range), +}; + +pub const Range = struct { + start: usize, + end: ?usize, +}; + +/// This is reference counted. +/// Do not use without calling acquire first. +/// Call release when done with a particular usage. +const MonitoredSlot = struct { + allocator: Allocator, + refcount: ReferenceCounter = .{}, + shreds: AtomicBitArray(MAX_SHREDS_PER_SLOT) = .{}, + max_seen: Atomic(usize) = Atomic(usize).init(0), + last_shred: Atomic(usize) = Atomic(usize).init(unknown), + + const unknown = std.math.maxInt(usize); + + const Self = @This(); + + pub fn init(allocator: Allocator) !*Self { + var self = try allocator.create(Self); + self.* = .{ .allocator = allocator }; + return self; + } + + pub fn acquire(self: *Self) !*Self { + if (self.refcount.acquire()) { + return self; + } + return error.Destroyed; + } + + pub fn release(self: *Self) void { + if (self.refcount.release()) { + self.allocator.destroy(self); + } + } + + // TODO: can all these be unordered? + pub fn record(self: *Self, shred_index: usize) !void { + try self.shreds.set(shred_index, .Monotonic); + _ = self.max_seen.fetchMax(shred_index, .Monotonic); + } + + // TODO make use of this + pub fn setLastShred(self: *Self, value: usize) void { + self.last_shred.store(value, .Monotonic); + } + + pub fn identifyMissing(self: *Self, allocator: Allocator) !ArrayList(Range) { + var missing_windows = ArrayList(Range).init(allocator); + var gap_start: ?usize = null; + const last_shred = self.last_shred.load(.Monotonic); + const max_seen = self.max_seen.load(.Monotonic); + for (0..max_seen + 2) |i| { + if (self.shreds.get(i, .Monotonic) catch unreachable) { + if (gap_start) |start| { + try missing_windows.append(.{ .start = start, .end = i }); + gap_start = null; + } + } else if (gap_start == null) { + gap_start = i; + } + } + if (max_seen < last_shred) { + const start = if (gap_start) |x| x else max_seen; // TODO is this redundant? + const end = if (last_shred == unknown) null else last_shred; + try missing_windows.append(.{ .start = start, .end = end }); + } + return missing_windows; + } +}; + +test "tvu.shred_tracker: trivial happy path" { + const allocator = std.testing.allocator; + + var tracker = try BasicShredTracker.init(allocator, 13579); + defer tracker.deinit(); + + const output = try tracker.identifyMissing(allocator); + defer output.deinit(); + + try std.testing.expect(1 == output.reports.items.len); + const report = output.reports.items[0]; + try std.testing.expect(13579 == report.slot); + try std.testing.expect(1 == report.missing_shreds.items.len); + try std.testing.expect(0 == report.missing_shreds.items[0].start); + try std.testing.expect(null == report.missing_shreds.items[0].end); +} + +test "tvu.shred_tracker: 1 registered shred is identified" { + const allocator = std.testing.allocator; + + var tracker = try BasicShredTracker.init(allocator, 13579); + defer tracker.deinit(); + try tracker.registerShred(13579, 123); + + const output = try tracker.identifyMissing(allocator); + defer output.deinit(); + + try std.testing.expect(1 == output.reports.items.len); + const report = output.reports.items[0]; + try std.testing.expect(13579 == report.slot); + try std.testing.expect(2 == report.missing_shreds.items.len); + try std.testing.expect(0 == report.missing_shreds.items[0].start); + try std.testing.expect(123 == report.missing_shreds.items[0].end); + try std.testing.expect(124 == report.missing_shreds.items[1].start); + try std.testing.expect(null == report.missing_shreds.items[1].end); +} diff --git a/src/tvu/shred_verifier.zig b/src/tvu/shred_verifier.zig new file mode 100644 index 000000000..b296cd189 --- /dev/null +++ b/src/tvu/shred_verifier.zig @@ -0,0 +1,113 @@ +const std = @import("std"); +const sig = @import("../lib.zig"); +const network = @import("zig-network"); + +const shred_layout = sig.tvu.shred_layout; + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Atomic = std.atomic.Atomic; + +const Channel = sig.sync.Channel; +const Packet = sig.net.Packet; + +pub fn runShredSigVerify( + exit: *Atomic(bool), + incoming: *Channel(ArrayList(Packet)), + verified: *Channel(ArrayList(Packet)), +) void { + while (incoming.receive()) |packet_batch| { + // TODO parallelize this once it's actually verifying signatures + for (packet_batch.items) |*packet| { + if (!verifyShred(packet, {})) { + packet.set(.discard); + } + } + verified.send(packet_batch) catch unreachable; // TODO + if (exit.load(.Monotonic)) return; + } +} + +/// verify_shred_cpu +/// TODO slot leaders +fn verifyShred(packet: *const Packet, slot_leaders: void) bool { + if (packet.isSet(.discard)) return false; + const shred = shred_layout.getShred(packet) orelse return false; + const slot = shred_layout.getSlot(shred) orelse return false; + const signature = shred_layout.getSignature(shred) orelse return false; + const signed_data = shred_layout.getSignedData(shred) orelse return false; + + // TODO get slot leader pubkey and actually verify signature + _ = slot_leaders; + _ = slot; + if (false) return signature.verify(unreachable, signed_data.data); + + return true; +} + +// pub const EpochLeaderSchedule = struct { +// data: []const sig.core.Pubkey, +// first_slot: sig.core.Slot, + +// fn getLeader(self: *@This(), slot: sig.core.Slot) sig.core.Pubkey { +// const index = @as(usize, @intCast(slot)) - @as(usize, @intCast(self.first_slot)); +// return self.data[index]; +// } +// }; + +fn runLoopService( + config: LoopServiceConfig, + job_to_loop: anytype, + args: anytype, +) !void { + config.logger.infof("starting {}", config.name); + defer config.logger.infof("exiting {}", config.name); + const timer = try std.time.Timer.start(); + var last_iteration = timer.lap(); + while (!config.exit.load(.Unordered)) { + @call(.auto, job_to_loop, args) catch |e| { + switch (config.error_handler) { + .logger, .log_and_return => { + config.logger.errf("Unhandled error in {}: {}", .{ config.name, e }); + }, + else => {}, + } + switch (config.error_handler) { + .only_return, .log_and_return => return e, + else => {}, + } + }; + last_iteration = timer.lap(); + std.time.sleep(config.min_loop_duration_ns -| last_iteration); + } +} + +pub const LoopServiceConfig = struct { + logger: sig.trace.Logger, + exit: *Atomic(bool), + min_loop_duration_ns: u64, + name: []const u8, + error_handler: enum { + logger, + log_and_return, + only_return, + } = .logger, + + function: fn () anyerror!void, +}; + +pub const LoopService = struct { + function: fn () anyerror!void, + + fn init(function: anytype) LoopService { + _ = function; + } +}; + +fn generifun(function: anytype) fn (anytype) anyerror!void { + return struct { + fn genericVersion(args: anytype) anyerror!void { + return @call(.auto, function, args); + } + }.genericVersion; +} diff --git a/src/utils/bitflags.zig b/src/utils/bitflags.zig new file mode 100644 index 000000000..e5ea82d03 --- /dev/null +++ b/src/utils/bitflags.zig @@ -0,0 +1,21 @@ +pub fn BitFlags(comptime FlagEnum: type) type { + return struct { + state: @typeInfo(FlagEnum).Enum.tag_type, + + const Self = @This(); + + pub const Flag = FlagEnum; + + pub fn isSet(self: *const Self, flag: FlagEnum) bool { + return self.state & @intFromEnum(flag) == @intFromEnum(flag); + } + + pub fn set(self: *Self, flag: FlagEnum) void { + self.state |= @intFromEnum(flag); + } + + pub fn unset(self: *Self, flag: FlagEnum) void { + self.state &= ~@intFromEnum(flag); + } + }; +} From c9b63ff71dbd6d34da73ddb3bff9b0e21126a8bf Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 16 Apr 2024 11:50:38 -0400 Subject: [PATCH 02/51] feat(tvu): add tvu socket now ShredReceiver listens on both the TVU and repair ports, treating them equally. --- src/cmd/cmd.zig | 23 +++++++++++++-- src/net/net.zig | 8 ++++++ src/tvu/shred_receiver.zig | 41 +++++++++++++++------------ src/tvu/shred_verifier.zig | 57 -------------------------------------- 4 files changed, 52 insertions(+), 77 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 0e7b49329..3ae6e1d01 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -88,6 +88,14 @@ var repair_port_option = cli.Option{ .value_name = "Repair Port", }; +var tvu_port_option = cli.Option{ + .long_name = "tvu-port", + .help = "The port to run turbine listener - default: 8003", + .value = cli.OptionValue{ .int = 8003 }, + .required = false, + .value_name = "Repair Port", +}; + var test_repair_option = cli.Option{ .long_name = "test-repair-for-slot", .help = "Set a slot here to repeatedly send repair requests for shreds from this slot. This is only intended for use during short-lived tests of the repair service. Do not set this during normal usage.", @@ -307,6 +315,7 @@ fn validator(_: []const []const u8) !void { const ip_echo_data = try getMyDataFromIpEcho(logger, entrypoints.items); const repair_port: u16 = @intCast(repair_port_option.value.int.?); + const tvu_port: u16 = @intCast(tvu_port_option.value.int.?); var gossip_service = try initGossip( logger, @@ -315,7 +324,10 @@ fn validator(_: []const []const u8) !void { entrypoints, ip_echo_data.shred_version, // TODO atomic owned at top level? or owned by gossip is good? ip_echo_data.ip, - &.{.{ .tag = socket_tag.REPAIR, .port = repair_port }}, + &.{ + .{ .tag = socket_tag.REPAIR, .port = repair_port }, + .{ .tag = socket_tag.TVU, .port = tvu_port }, + }, ); defer gossip_service.deinit(); var gossip_handle = try spawnGossip(&gossip_service); @@ -323,9 +335,15 @@ fn validator(_: []const []const u8) !void { const shred_version = sig.tvu.CachedAtomic(u16).init(&gossip_service.my_shred_version); var repair_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); + try sig.net.enablePortReuse(&repair_socket, true); try repair_socket.bindToPort(repair_port); try repair_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); + var tvu_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); + try sig.net.enablePortReuse(&tvu_socket, true); + try tvu_socket.bindToPort(tvu_port); + try tvu_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); + var shred_tracker = try sig.tvu.BasicShredTracker.init(gpa_allocator, @intCast(test_repair_option.value.int orelse 0)); const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); @@ -339,7 +357,8 @@ fn validator(_: []const []const u8) !void { .keypair = &my_keypair, .exit = &exit, .logger = logger, - .socket = &repair_socket, + .repair_socket = &repair_socket, + .tvu_socket = &tvu_socket, .outgoing_shred_channel = unverified_shreds_channel, .shred_version = shred_version, }; diff --git a/src/net/net.zig b/src/net/net.zig index 8fefb1b9b..1d47eabb2 100644 --- a/src/net/net.zig +++ b/src/net/net.zig @@ -461,6 +461,14 @@ pub fn endpointToString(allocator: std.mem.Allocator, endpoint: *const network.E return endpoint_buf; } +/// Socket.enablePortReuse does not actually enable SO_REUSEPORT. It sets SO_REUSEADDR. +/// This is the correct implementation to enable SO_REUSEPORT. +pub fn enablePortReuse(sock: *network.Socket, enabled: bool) !void { + const setsockopt_fn = if (builtin.os.tag == .windows) @panic("windows not supported") else std.os.setsockopt; // TODO windows + var opt: c_int = if (enabled) 1 else 0; + try setsockopt_fn(sock.internal, std.os.SOL.SOCKET, std.os.SO.REUSEPORT, std.mem.asBytes(&opt)); +} + test "net.net: invalid ipv4 socket parsing" { { var addr = "127.0.0.11234"; diff --git a/src/tvu/shred_receiver.zig b/src/tvu/shred_receiver.zig index 45df15500..cd77b8c15 100644 --- a/src/tvu/shred_receiver.zig +++ b/src/tvu/shred_receiver.zig @@ -55,7 +55,8 @@ pub const ShredReceiver = struct { keypair: *const KeyPair, exit: *Atomic(bool), logger: Logger, - socket: *Socket, + repair_socket: *Socket, + tvu_socket: *Socket, outgoing_shred_channel: *Channel(ArrayList(Packet)), shred_version: CachedAtomic(u16), @@ -67,37 +68,41 @@ pub const ShredReceiver = struct { defer self.logger.err("exiting shred receiver"); errdefer self.logger.err("error in shred receiver"); - var sender = try SocketThread.initSender(self.allocator, self.logger, self.socket, self.exit); + var sender = try SocketThread.initSender(self.allocator, self.logger, self.repair_socket, self.exit); defer sender.deinit(); - var receiver = try SocketThread.initReceiver(self.allocator, self.logger, self.socket, self.exit); - defer receiver.deinit(); + var repair_receiver = try SocketThread.initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); + defer repair_receiver.deinit(); + var tvu_receiver = try SocketThread.initReceiver(self.allocator, self.logger, self.tvu_socket, self.exit); + defer tvu_receiver.deinit(); - try self.runPacketHandler(receiver.channel, sender.channel); + try self.runPacketHandler(.{ tvu_receiver.channel, repair_receiver.channel }, sender.channel); } /// Keep looping over packet channel and process the incoming packets. /// Returns when exit is set to true. fn runPacketHandler( self: *Self, - receiver: *Channel(ArrayList(Packet)), + receivers: anytype, sender: *Channel(ArrayList(Packet)), ) !void { while (!self.exit.load(.Unordered)) { - var responses = ArrayList(Packet).init(self.allocator); - if (try receiver.try_drain()) |batches| { - for (batches) |batch| { - for (batch.items) |*packet| { - try self.handlePacket(packet, &responses); + inline for (receivers) |receiver| { + var responses = ArrayList(Packet).init(self.allocator); + if (try receiver.try_drain()) |batches| { + for (batches) |batch| { + for (batch.items) |*packet| { + try self.handlePacket(packet, &responses); + } + try self.outgoing_shred_channel.send(batch); } - try self.outgoing_shred_channel.send(batch); - } - if (responses.items.len > 0) { - try sender.send(responses); + if (responses.items.len > 0) { + try sender.send(responses); + } + } else { + std.time.sleep(10 * std.time.ns_per_ms); } - } else { - std.time.sleep(10_000_000); + self.shred_version.update(); } - self.shred_version.update(); } } diff --git a/src/tvu/shred_verifier.zig b/src/tvu/shred_verifier.zig index b296cd189..c0ccba212 100644 --- a/src/tvu/shred_verifier.zig +++ b/src/tvu/shred_verifier.zig @@ -54,60 +54,3 @@ fn verifyShred(packet: *const Packet, slot_leaders: void) bool { // return self.data[index]; // } // }; - -fn runLoopService( - config: LoopServiceConfig, - job_to_loop: anytype, - args: anytype, -) !void { - config.logger.infof("starting {}", config.name); - defer config.logger.infof("exiting {}", config.name); - const timer = try std.time.Timer.start(); - var last_iteration = timer.lap(); - while (!config.exit.load(.Unordered)) { - @call(.auto, job_to_loop, args) catch |e| { - switch (config.error_handler) { - .logger, .log_and_return => { - config.logger.errf("Unhandled error in {}: {}", .{ config.name, e }); - }, - else => {}, - } - switch (config.error_handler) { - .only_return, .log_and_return => return e, - else => {}, - } - }; - last_iteration = timer.lap(); - std.time.sleep(config.min_loop_duration_ns -| last_iteration); - } -} - -pub const LoopServiceConfig = struct { - logger: sig.trace.Logger, - exit: *Atomic(bool), - min_loop_duration_ns: u64, - name: []const u8, - error_handler: enum { - logger, - log_and_return, - only_return, - } = .logger, - - function: fn () anyerror!void, -}; - -pub const LoopService = struct { - function: fn () anyerror!void, - - fn init(function: anytype) LoopService { - _ = function; - } -}; - -fn generifun(function: anytype) fn (anytype) anyerror!void { - return struct { - fn genericVersion(args: anytype) anyerror!void { - return @call(.auto, function, args); - } - }.genericVersion; -} From ee5fd7c0c732219144423706892bb976f207866e Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 29 Apr 2024 23:52:11 -0400 Subject: [PATCH 03/51] feat(repair): basic repair works w/ good perf --- src/cmd/cmd.zig | 71 ++++----- src/lib.zig | 3 +- src/net/socket_utils.zig | 1 + src/sync/bit_array.zig | 69 --------- src/sync/channel.zig | 21 +++ src/tvu/repair_message.zig | 7 +- src/tvu/repair_service.zig | 220 +++++++++++++++++++++------ src/tvu/shred.zig | 33 ++-- src/tvu/shred_processor.zig | 41 +++-- src/tvu/shred_receiver.zig | 40 ++++- src/tvu/shred_tracker.zig | 292 ++++++++++++++++-------------------- src/tvu/shred_verifier.zig | 53 ++++--- src/utils/arraylist.zig | 62 ++++++++ src/utils/bitflags.zig | 2 +- src/utils/thread.zig | 141 ++++++++++++++++- 15 files changed, 669 insertions(+), 387 deletions(-) delete mode 100644 src/sync/bit_array.zig diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 3ae6e1d01..60331d707 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -30,6 +30,7 @@ const enumFromName = sig.utils.enumFromName; const getOrInitIdentity = helpers.getOrInitIdentity; const globalRegistry = sig.prometheus.globalRegistry; const getWallclockMs = sig.gossip.getWallclockMs; +const initRepair = sig.tvu.initRepair; const requestIpEcho = sig.net.requestIpEcho; const servePrometheus = sig.prometheus.servePrometheus; @@ -344,11 +345,26 @@ fn validator(_: []const []const u8) !void { try tvu_socket.bindToPort(tvu_port); try tvu_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); - var shred_tracker = try sig.tvu.BasicShredTracker.init(gpa_allocator, @intCast(test_repair_option.value.int orelse 0)); + var shred_tracker = try sig.tvu.BasicShredTracker.init( + gpa_allocator, + @intCast(test_repair_option.value.int orelse 0), + logger, + ); const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); - var repair_svc = try initRepair(logger, &my_keypair, &exit, rand.random(), &gossip_service, &repair_socket, &shred_tracker); + var repair_svc = try initRepair( + gpa_allocator, + logger, + &my_keypair, + &exit, + rand.random(), + &gossip_service.gossip_table_rw, + &gossip_service.my_shred_version, + &repair_socket, + shred_tracker, + if (test_repair_option.value.int) |n| @intCast(n) else null, + ); defer repair_svc.deinit(); var repair_handle = try std.Thread.spawn(.{}, RepairService.run, .{&repair_svc}); @@ -363,9 +379,21 @@ fn validator(_: []const []const u8) !void { .shred_version = shred_version, }; - var shred_receive_handle = try std.Thread.spawn(.{}, ShredReceiver.run, .{&shred_receiver}); - var verify_shreds_handle = try std.Thread.spawn(.{}, sig.tvu.runShredSigVerify, .{ &exit, unverified_shreds_channel, verified_shreds_channel }); - var process_shreds_handle = try std.Thread.spawn(.{}, sig.tvu.processShreds, .{ gpa_allocator, logger, verified_shreds_channel, &shred_tracker }); + var shred_receive_handle = try std.Thread.spawn( + .{}, + ShredReceiver.run, + .{&shred_receiver}, + ); + var verify_shreds_handle = try std.Thread.spawn( + .{}, + sig.tvu.runShredSigVerify, + .{ &exit, unverified_shreds_channel, verified_shreds_channel, .{} }, + ); + var process_shreds_handle = try std.Thread.spawn( + .{}, + sig.tvu.processShreds, + .{ gpa_allocator, verified_shreds_channel, shred_tracker }, + ); _ = process_shreds_handle; _ = verify_shreds_handle; @@ -405,39 +433,6 @@ fn initGossip( ); } -fn initRepair( - logger: Logger, - my_keypair: *const KeyPair, - exit: *Atomic(bool), - random: Random, - gossip_service: *GossipService, - socket: *Socket, - shred_tracker: *sig.tvu.BasicShredTracker, -) !RepairService { - var peer_provider = try RepairPeerProvider.init( - gpa_allocator, - random, - &gossip_service.gossip_table_rw, - Pubkey.fromPublicKey(&my_keypair.public_key), - &gossip_service.my_shred_version, - ); - return RepairService{ - .allocator = gpa_allocator, - .requester = RepairRequester{ - .allocator = gpa_allocator, - .rng = random, - .udp_send_socket = socket, - .keypair = my_keypair, - .logger = logger, - }, - .peer_provider = peer_provider, - .shred_tracker = shred_tracker, - .logger = logger, - .exit = exit, - .slot_to_request = if (test_repair_option.value.int) |n| @intCast(n) else null, - }; -} - /// Spawn a thread to run gossip and configure with CLI arguments fn spawnGossip(gossip_service: *GossipService) std.Thread.SpawnError!std.Thread { const spy_node = gossip_spy_node_option.value.bool; diff --git a/src/lib.zig b/src/lib.zig index 296c3428b..98ee159d1 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -49,7 +49,6 @@ pub const version = struct { }; pub const sync = struct { - pub usingnamespace @import("sync/bit_array.zig"); pub usingnamespace @import("sync/channel.zig"); pub usingnamespace @import("sync/mpmc.zig"); pub usingnamespace @import("sync/ref.zig"); @@ -60,8 +59,10 @@ pub const sync = struct { }; pub const utils = struct { + pub usingnamespace @import("utils/arraylist.zig"); pub usingnamespace @import("utils/bitflags.zig"); pub usingnamespace @import("utils/shortvec.zig"); + pub usingnamespace @import("utils/thread.zig"); pub usingnamespace @import("utils/types.zig"); pub usingnamespace @import("utils/varint.zig"); }; diff --git a/src/net/socket_utils.zig b/src/net/socket_utils.zig index 7db0116df..5b35267a7 100644 --- a/src/net/socket_utils.zig +++ b/src/net/socket_utils.zig @@ -179,6 +179,7 @@ pub const SocketThread = struct { pub fn deinit(self: Self) void { self.exit.store(true, .Unordered); self.handle.join(); + self.channel.deinit(); } }; diff --git a/src/sync/bit_array.zig b/src/sync/bit_array.zig deleted file mode 100644 index 287bb3227..000000000 --- a/src/sync/bit_array.zig +++ /dev/null @@ -1,69 +0,0 @@ -const std = @import("std"); - -const Atomic = std.atomic.Atomic; -const Ordering = std.atomic.Ordering; - -pub fn AtomicBitArray(comptime size: usize) type { - const num_bytes = (size + 7) / 8; - return struct { - bytes: [num_bytes]Atomic(u8) = .{.{ .value = 0 }} ** num_bytes, - - pub const len = size; - - const Self = @This(); - - pub fn get(self: *Self, index: usize, comptime ordering: Ordering) !bool { - if (index >= size) return error.OutOfBounds; - const bitmask = mask(index); - return self.bytes[index / 8].load(ordering) & bitmask == bitmask; - } - - pub fn set(self: *Self, index: usize, comptime ordering: Ordering) !void { - if (index >= size) return error.OutOfBounds; - _ = self.bytes[index / 8].fetchOr(mask(index), ordering); - } - - pub fn unset(self: *Self, index: usize, comptime ordering: Ordering) !void { - if (index >= size) return error.OutOfBounds; - _ = self.bytes[index / 8].fetchAnd(~mask(index), ordering); - } - - fn mask(index: usize) u8 { - return @as(u8, 1) << @intCast(index % 8); - } - }; -} - -test "sync.bit_array" { - var x = AtomicBitArray(3){}; - try std.testing.expect(!try x.get(0, .Monotonic)); - try std.testing.expect(!try x.get(1, .Monotonic)); - try std.testing.expect(!try x.get(2, .Monotonic)); - - try x.set(1, .Monotonic); - - try std.testing.expect(!try x.get(0, .Monotonic)); - try std.testing.expect(try x.get(1, .Monotonic)); - try std.testing.expect(!try x.get(2, .Monotonic)); - - try x.set(0, .Monotonic); - try x.set(1, .Monotonic); - try x.set(2, .Monotonic); - - try std.testing.expect(try x.get(0, .Monotonic)); - try std.testing.expect(try x.get(1, .Monotonic)); - try std.testing.expect(try x.get(2, .Monotonic)); - - try x.unset(2, .Monotonic); - try x.unset(1, .Monotonic); - try x.unset(2, .Monotonic); - - try std.testing.expect(try x.get(0, .Monotonic)); - try std.testing.expect(!try x.get(1, .Monotonic)); - try std.testing.expect(!try x.get(2, .Monotonic)); - - if (x.get(3, .Monotonic)) |_| @panic("") else |_| {} - if (x.set(3, .Monotonic)) |_| @panic("") else |_| {} - if (x.unset(3, .Monotonic)) |_| @panic("") else |_| {} - if (x.get(3, .Monotonic)) |_| @panic("") else |_| {} -} diff --git a/src/sync/channel.zig b/src/sync/channel.zig index e9a9fcce2..ceee47cb5 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -125,6 +125,27 @@ pub fn Channel(comptime T: type) type { return out; } + pub fn tryDrainRecycle( + self: *Self, + buf: *std.ArrayList(T), + ) error{ ChannelClosed, OutOfMemory }!void { + var buffer = self.buffer.lock(); + defer buffer.unlock(); + buf.clearRetainingCapacity(); + + if (self.closed.load(.SeqCst)) { + return error.ChannelClosed; + } + + var num_items_to_drain = buffer.get().items.len; + if (num_items_to_drain == 0) { + return; + } + + try buf.appendSlice(buffer.get().items); + buffer.mut().clearRetainingCapacity(); + } + pub fn close(self: *Self) void { self.closed.store(true, .SeqCst); self.has_value.broadcast(); diff --git a/src/tvu/repair_message.zig b/src/tvu/repair_message.zig index 674bf9ebc..f5131618b 100644 --- a/src/tvu/repair_message.zig +++ b/src/tvu/repair_message.zig @@ -48,7 +48,7 @@ pub const RepairRequest = union(enum) { /// /// Analogous to `ServeRepair::map_repair_request` pub fn serializeRepairRequest( - allocator: Allocator, + buf: []u8, request: RepairRequest, keypair: *const KeyPair, recipient: Pubkey, @@ -78,10 +78,7 @@ pub fn serializeRepairRequest( .slot = r, } }, }; - var buf = try allocator.alloc(u8, RepairMessage.MAX_SERIALIZED_SIZE); - var stream = std.io.fixedBufferStream(buf); - try bincode.write(null, stream.writer(), msg, .{}); - var serialized = try allocator.realloc(buf, stream.pos); + var serialized = try bincode.writeToSlice(buf, msg, .{}); var signer = try keypair.signer(null); // TODO noise signer.update(serialized[0..4]); diff --git a/src/tvu/repair_service.zig b/src/tvu/repair_service.zig index 5e73ff2f2..8a213339c 100644 --- a/src/tvu/repair_service.zig +++ b/src/tvu/repair_service.zig @@ -12,17 +12,62 @@ const Socket = zig_network.Socket; const BasicShredTracker = sig.tvu.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GossipTable = sig.gossip.GossipTable; +const HomogeneousThreadPool = sig.utils.HomogeneousThreadPool; const Logger = sig.trace.Logger; const LruCacheCustom = sig.common.LruCacheCustom; +const MultiSlotReport = sig.tvu.MultiSlotReport; const Nonce = sig.core.Nonce; +const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; const RwMux = sig.sync.RwMux; const SocketAddr = sig.net.SocketAddr; +const SocketThread = sig.net.SocketThread; const Slot = sig.core.Slot; const RepairRequest = sig.tvu.RepairRequest; +const RepairMessage = sig.tvu.RepairMessage; + const serializeRepairRequest = sig.tvu.serializeRepairRequest; +/// TODO: redundant? +pub fn initRepair( + allocator: Allocator, + logger: Logger, + my_keypair: *const KeyPair, + exit: *Atomic(bool), + random: Random, + gossip_table_rw: *RwMux(GossipTable), + my_shred_version: *Atomic(u16), + socket: *Socket, + shred_tracker: *BasicShredTracker, + start_slot: ?Slot, +) !RepairService { + const peer_provider = try RepairPeerProvider.init( + allocator, + random, + gossip_table_rw, + Pubkey.fromPublicKey(&my_keypair.public_key), + my_shred_version, + ); + const requester = try RepairRequester.init( + allocator, + logger, + random, + my_keypair, + socket, + exit, + ); + return RepairService.init( + allocator, + logger, + exit, + requester, + peer_provider, + shred_tracker, + start_slot, + ); +} + /// Identifies which repairs are needed and sends them /// - delegates to RepairPeerProvider to identify repair peers. /// - delegates to RepairRequester to send the requests. @@ -33,10 +78,49 @@ pub const RepairService = struct { shred_tracker: *BasicShredTracker, logger: Logger, exit: *Atomic(bool), - slot_to_request: ?u64, + start_slot: ?Slot, + + /// memory to re-use across iterations. initialized to empty + report: MultiSlotReport, + + thread_pool: RequestBatchThreadPool, + + pub const RequestBatchThreadPool = HomogeneousThreadPool(struct { + requester: *RepairRequester, + requests: []AddressedRepairRequest, + + pub fn run(self: *@This()) !void { + return self.requester.sendRepairRequestBatch( + self.requester.allocator, + self.requests, + ); + } + }); const Self = @This(); + pub fn init( + allocator: Allocator, + logger: Logger, + exit: *Atomic(bool), + requester: RepairRequester, + peer_provider: RepairPeerProvider, + shred_tracker: *BasicShredTracker, + start_slot: ?Slot, + ) Self { + return RepairService{ + .allocator = allocator, + .requester = requester, + .peer_provider = peer_provider, + .shred_tracker = shred_tracker, + .logger = logger, + .exit = exit, + .start_slot = start_slot, // TODO: do nothing if null + .report = MultiSlotReport.init(allocator), + .thread_pool = RequestBatchThreadPool.init(allocator, 4), + }; + } + pub fn deinit(self: *Self) void { self.peer_provider.deinit(); } @@ -45,60 +129,70 @@ pub const RepairService = struct { pub fn run(self: *Self) !void { self.logger.info("starting repair service"); defer self.logger.info("exiting repair service"); + var timer = try std.time.Timer.start(); while (!self.exit.load(.Unordered)) { try self.sendNecessaryRepairs(); - // TODO sleep? - std.time.sleep(100_000_000); + std.time.sleep(100 * std.time.ns_per_ms -| timer.lap()); } } /// Identifies which repairs are needed based on the current state, /// and sends those repairs, then returns. fn sendNecessaryRepairs(self: *Self) !void { - // if (try self.initialSnapshotRepair()) |request| { - // try self.requester.sendRepairRequest(request); - // } const repair_requests = try self.getRepairs(); defer repair_requests.deinit(); const addressed_requests = try self.assignRequestsToPeers(repair_requests.items); defer addressed_requests.deinit(); - for (addressed_requests.items) |addressed_request| { - try self.requester.sendRepairRequest(addressed_request); - } - } - - fn initialSnapshotRepair(self: *Self) !?AddressedRepairRequest { - if (self.slot_to_request == null) return null; - const request: RepairRequest = .{ .HighestShred = .{ self.slot_to_request.?, 0 } }; - const maybe_peer = try self.peer_provider.getRandomPeer(self.slot_to_request.?); - if (maybe_peer) |peer| return .{ - .request = request, - .recipient = peer.pubkey, - .recipient_addr = peer.serve_repair_socket, + if (addressed_requests.items.len < 4) { + try self.requester.sendRepairRequestBatch(self.allocator, addressed_requests.items); } else { - return null; + for (0..4) |i| { + const start = (addressed_requests.items.len * i) / 4; + const end = (addressed_requests.items.len * (i + 1)) / 4; + try self.thread_pool.schedule(.{ + .requester = &self.requester, + .requests = addressed_requests.items[start..end], + }); + try self.thread_pool.joinFallible(); + } } + + // TODO less often + self.logger.infof("sent {} repair requests", .{addressed_requests.items.len}); } + const MAX_SHRED_REPAIRS = 1000; + const MAX_HIGHEST_REPAIRS = 100; + fn getRepairs(self: *Self) !ArrayList(RepairRequest) { - const all_missing = try self.shred_tracker.identifyMissing(self.allocator); - defer all_missing.deinit(); var repairs = ArrayList(RepairRequest).init(self.allocator); + try self.shred_tracker.identifyMissing(&self.report); var individual_count: usize = 0; - for (all_missing.reports.items) |report| { - const slot = report.slot; + var highest_count: usize = 0; + var slot: Slot = 0; + for (self.report.items()) |*report| outer: { + slot = report.slot; for (report.missing_shreds.items) |shred_window| { if (shred_window.end) |end| { for (shred_window.start..end) |i| { - if (individual_count > 500) break; individual_count += 1; try repairs.append(.{ .Shred = .{ slot, i } }); + if (individual_count > MAX_SHRED_REPAIRS) { + break :outer; + } } - } else { - try repairs.append(.{ .HighestShred = .{ slot, shred_window.start } }); } } + if (highest_count < MAX_HIGHEST_REPAIRS) { + highest_count += 1; + try repairs.append(.{ .HighestShred = .{ slot, 0 } }); + } + } + if (highest_count < MAX_HIGHEST_REPAIRS) { + for (slot..slot + MAX_HIGHEST_REPAIRS - highest_count) |s| { + try repairs.append(.{ .HighestShred = .{ s, 0 } }); + } } return repairs; } @@ -116,7 +210,7 @@ pub const RepairService = struct { .recipient_addr = peer.serve_repair_socket, }); } - // TODO do something if a peer is not found + // TODO do something if a peer is not found? } return addressed; } @@ -125,35 +219,61 @@ pub const RepairService = struct { /// Signs and serializes repair requests. Sends them over the network. pub const RepairRequester = struct { allocator: Allocator, + logger: Logger, rng: Random, keypair: *const KeyPair, - udp_send_socket: *Socket, - logger: Logger, + sender: SocketThread, const Self = @This(); - /// TODO: send batch - pub fn sendRepairRequest( + pub fn init( + allocator: Allocator, + logger: Logger, + rng: Random, + keypair: *const KeyPair, + udp_send_socket: *Socket, + exit: *Atomic(bool), + ) !Self { + const sndr = try SocketThread.initSender(allocator, logger, udp_send_socket, exit); + return .{ + .allocator = allocator, + .logger = logger, + .rng = rng, + .keypair = keypair, + .sender = sndr, + }; + } + + pub fn deinit(self: Self) void { + self.sender.deinit(); + } + + pub fn sendRepairRequestBatch( self: *const Self, - request: AddressedRepairRequest, + allocator: Allocator, + requests: []AddressedRepairRequest, ) !void { + var packet_batch = try std.ArrayList(Packet).initCapacity(allocator, requests.len); const timestamp = std.time.milliTimestamp(); - const data = try serializeRepairRequest( - self.allocator, - request.request, - self.keypair, - request.recipient, - @intCast(timestamp), - self.rng.int(Nonce), - ); - defer self.allocator.free(data); - const addr = request.recipient_addr.toString(); - _ = addr; - // self.logger.infof( - // "sending repair request to {s} - {}", - // .{ addr[0][0..addr[1]], request.request }, - // ); - _ = try self.udp_send_socket.sendTo(request.recipient_addr.toEndpoint(), data); + for (requests) |request| { + const packet = packet_batch.addOneAssumeCapacity(); + packet.* = Packet{ + .addr = request.recipient_addr.toEndpoint(), + .flags = 0, + .data = undefined, + .size = undefined, + }; + const data = try serializeRepairRequest( + &packet.data, + request.request, + self.keypair, + request.recipient, + @intCast(timestamp), + self.rng.int(Nonce), + ); + packet.size = data.len; + } + try self.sender.channel.send(packet_batch); } }; @@ -358,7 +478,7 @@ test "tvu.repair_service: RepairService sends repair request to gossip peer" { .peer_provider = peers, .logger = logger, .exit = &exit, - .slot_to_request = 13579, + .start_slot = 13579, .shred_tracker = &tracker, }; defer service.deinit(); diff --git a/src/tvu/shred.zig b/src/tvu/shred.zig index f4e1050b6..91243f508 100644 --- a/src/tvu/shred.zig +++ b/src/tvu/shred.zig @@ -19,6 +19,7 @@ pub const MAX_DATA_SHREDS_PER_SLOT: usize = 32_768; pub const MAX_CODE_SHREDS_PER_SLOT: usize = MAX_DATA_SHREDS_PER_SLOT; pub const MAX_SHREDS_PER_SLOT: usize = MAX_CODE_SHREDS_PER_SLOT + MAX_DATA_SHREDS_PER_SLOT; +/// TODO this can be restructured with shared code lifted pub const Shred = union(enum) { code: ShredCode, data: ShredData, @@ -33,6 +34,12 @@ pub const Shred = union(enum) { }; } + pub fn header(self: *const Self) *const ShredCommonHeader { + return switch (self.*) { + .code, .data => |s| &s.common_header, + }; + } + pub fn isLastInSlot(self: *const Self) bool { return switch (self.*) { .code => false, @@ -44,14 +51,14 @@ pub const Shred = union(enum) { pub const ShredData = struct { common_header: ShredCommonHeader, custom_header: DataShredHeader, - payload: ArrayList(u8), + // payload: ArrayList(u8), const SIZE_OF_PAYLOAD: usize = 1203; // TODO this can be calculated like solana const Self = @This(); pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { - return try someShredFromPayload(Self, DataShredHeader, SIZE_OF_PAYLOAD, allocator, payload); + return try eitherShredFromPayload(Self, DataShredHeader, SIZE_OF_PAYLOAD, allocator, payload); } pub fn isLastInSlot(self: *const Self) bool { @@ -67,14 +74,14 @@ pub const ShredData = struct { pub const ShredCode = struct { common_header: ShredCommonHeader, custom_header: CodingShredHeader, - payload: ArrayList(u8), + // payload: ArrayList(u8), const SIZE_OF_PAYLOAD: usize = 1228; // TODO this can be calculated like solana const Self = @This(); pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { - return try someShredFromPayload(Self, CodingShredHeader, SIZE_OF_PAYLOAD, allocator, payload); + return try eitherShredFromPayload(Self, CodingShredHeader, SIZE_OF_PAYLOAD, allocator, payload); } fn sanitize(self: *const Self) !void { @@ -83,7 +90,7 @@ pub const ShredCode = struct { } }; -fn someShredFromPayload( +fn eitherShredFromPayload( comptime Self: type, comptime Header: type, comptime SIZE_OF_PAYLOAD: usize, @@ -97,12 +104,12 @@ fn someShredFromPayload( var buf = std.io.fixedBufferStream(exact_payload); const common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}); const custom_header = try bincode.read(allocator, Header, buf.reader(), .{}); - var owned_payload = ArrayList(u8).init(allocator); - try owned_payload.appendSlice(exact_payload); // TODO this is expensive, but necessary, unless packet allocations are changed + // var owned_payload = ArrayList(u8).init(allocator); // TODO: find a cheaper way to get the payload in here + // try owned_payload.appendSlice(exact_payload); var self = Self{ .common_header = common_header, .custom_header = custom_header, - .payload = owned_payload, + // .payload = owned_payload, }; try self.sanitize(); return self; @@ -189,8 +196,12 @@ pub const ShredVariant = struct { pub const ShredVariantConfig = blk: { const S = struct { - pub fn serialize(_: anytype, _: anytype, _: bincode.Params) !void { - @panic("todo"); + pub fn serialize(writer: anytype, data: anytype, params: bincode.Params) !void { + _ = writer; + _ = params; + _ = data; + @panic("todo - not implemented"); // TODO + // try writer.writeByte(0); } pub fn deserialize(_: ?std.mem.Allocator, reader: anytype, _: bincode.Params) !ShredVariant { @@ -288,5 +299,3 @@ pub const shred_layout = struct { return std.mem.readInt(Int, bytes, .Little); } }; - -//new_from_serialized_shred diff --git a/src/tvu/shred_processor.zig b/src/tvu/shred_processor.zig index 6c656a3b2..53d6badd0 100644 --- a/src/tvu/shred_processor.zig +++ b/src/tvu/shred_processor.zig @@ -15,25 +15,36 @@ const Shred = sig.tvu.Shred; /// analogous to `WindowService` pub fn processShreds( allocator: Allocator, - logger: Logger, verified_shreds: *Channel(ArrayList(Packet)), tracker: *BasicShredTracker, ) !void { - _ = logger; // TODO unreachables - while (verified_shreds.receive()) |packet_batch| { - for (packet_batch.items) |*packet| if (!packet.isSet(.discard)) { - const shred_payload = layout.getShred(packet) orelse unreachable; - const slot = layout.getSlot(shred_payload) orelse unreachable; - const index = layout.getIndex(shred_payload) orelse unreachable; - tracker.registerShred(slot, index) catch |e| { - if (e != error.SlotUnderflow) return e; - continue; + var processed_count: usize = 0; + var buf = ArrayList(ArrayList(Packet)).init(allocator); + while (true) { + try verified_shreds.tryDrainRecycle(&buf); + if (buf.items.len == 0) { + std.time.sleep(10 * std.time.ns_per_ms); + continue; + } + for (buf.items) |packet_batch| { + for (packet_batch.items) |*packet| if (!packet.isSet(.discard)) { + const shred_payload = layout.getShred(packet) orelse unreachable; + const slot = layout.getSlot(shred_payload) orelse unreachable; + const index = layout.getIndex(shred_payload) orelse unreachable; + tracker.registerShred(slot, index) catch |err| switch (err) { + error.SlotUnderflow, error.SlotOverflow => continue, + else => return err, + }; + const shred = try Shred.fromPayload(allocator, shred_payload); + if (shred.isLastInSlot()) { + tracker.setLastShred(slot, index) catch |err| switch (err) { + error.SlotUnderflow, error.SlotOverflow => continue, + else => return err, + }; + } + processed_count += 1; }; - const shred = try Shred.fromPayload(allocator, shred_payload); - if (shred.isLastInSlot()) { - try tracker.setLastShred(slot, index); - } - }; + } } } diff --git a/src/tvu/shred_receiver.zig b/src/tvu/shred_receiver.zig index cd77b8c15..6dfbac472 100644 --- a/src/tvu/shred_receiver.zig +++ b/src/tvu/shred_receiver.zig @@ -49,7 +49,7 @@ pub fn CachedAtomic(comptime T: type) type { }; } -/// Analogous to `ShredFetchStage` +/// Analogous to `ShredFetchStage` TODO permalinks pub const ShredReceiver = struct { allocator: Allocator, keypair: *const KeyPair, @@ -68,14 +68,36 @@ pub const ShredReceiver = struct { defer self.logger.err("exiting shred receiver"); errdefer self.logger.err("error in shred receiver"); - var sender = try SocketThread.initSender(self.allocator, self.logger, self.repair_socket, self.exit); + var sender = try SocketThread + .initSender(self.allocator, self.logger, self.repair_socket, self.exit); defer sender.deinit(); - var repair_receiver = try SocketThread.initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); + var repair_receiver = try SocketThread + .initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); defer repair_receiver.deinit(); - var tvu_receiver = try SocketThread.initReceiver(self.allocator, self.logger, self.tvu_socket, self.exit); - defer tvu_receiver.deinit(); - try self.runPacketHandler(.{ tvu_receiver.channel, repair_receiver.channel }, sender.channel); + const num_tvu_receivers = 2; + var tvu_receivers: [num_tvu_receivers]*Channel(ArrayList(Packet)) = undefined; + for (0..num_tvu_receivers) |i| { + tvu_receivers[i] = (try SocketThread.initReceiver( + self.allocator, + self.logger, + self.tvu_socket, + self.exit, + )).channel; + } + defer for (tvu_receivers) |r| r.deinit(); + const x = try std.Thread.spawn( + .{}, + Self.runPacketHandler, + .{ self, tvu_receivers, sender.channel }, + ); + const y = try std.Thread.spawn( + .{}, + Self.runPacketHandler, + .{ self, .{repair_receiver.channel}, sender.channel }, + ); + x.join(); + y.join(); } /// Keep looping over packet channel and process the incoming packets. @@ -85,11 +107,13 @@ pub const ShredReceiver = struct { receivers: anytype, sender: *Channel(ArrayList(Packet)), ) !void { + var buf = ArrayList(ArrayList(Packet)).init(self.allocator); while (!self.exit.load(.Unordered)) { inline for (receivers) |receiver| { var responses = ArrayList(Packet).init(self.allocator); - if (try receiver.try_drain()) |batches| { - for (batches) |batch| { + try receiver.tryDrainRecycle(&buf); + if (buf.items.len > 0) { + for (buf.items) |batch| { for (batch.items) |*packet| { try self.handlePacket(packet, &responses); } diff --git a/src/tvu/shred_tracker.zig b/src/tvu/shred_tracker.zig index f2c2337c3..7c85572e1 100644 --- a/src/tvu/shred_tracker.zig +++ b/src/tvu/shred_tracker.zig @@ -3,63 +3,53 @@ const sig = @import("../lib.zig"); const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; -const Atomic = std.atomic.Atomic; -const Ordering = std.atomic.Ordering; -const DefaultRwLock = std.Thread.RwLock.DefaultRwLock; const Mutex = std.Thread.Mutex; -const AtomicBitArray = sig.sync.AtomicBitArray; -const ReferenceCounter = sig.sync.ReferenceCounter; const Slot = sig.core.Slot; +const Shred = sig.tvu.Shred; -pub const MAX_SHREDS_PER_SLOT: usize = sig.tvu.MAX_SHREDS_PER_SLOT; +const MAX_SHREDS_PER_SLOT: usize = sig.tvu.MAX_SHREDS_PER_SLOT; + +pub const Range = struct { + start: usize, + end: ?usize, +}; -/// Naively tracks which shreds have been received, so we can request missing shreds. -/// Has no awareness of forking. -/// Placeholder until more sophisticated Blockstore and RepairWeights implementation. -/// -/// This struct is thread safe. Public methods can be called from anywhere at any time. pub const BasicShredTracker = struct { allocator: Allocator, - - /// prevents multiple threads from executing a rotation simultaneously - rotation_lock: Mutex = Mutex{}, - - /// The starting slot when this is first created, when the shard_counter = 0 - /// never changes + logger: sig.trace.Logger, + mux: Mutex = .{}, + /// The slot that this struct was initialized with at index 0 start_slot: Slot, + /// The oldest slot still being tracked, which hasn't yet been finished + current_bottom_slot: Slot, + /// The highest slot for which a shred has been received and processed successfully. + max_slot_seen: Slot, + /// ring buffer + slots: [num_slots]MonitoredSlot = .{.{}} ** num_slots, - /// The lowest slot currently tracked - first_slot: Atomic(Slot), - /// The highest slot currently tracked - last_slot: Atomic(Slot), - - slots: [num_slots]Atomic(*MonitoredSlot), - - good_until: Atomic(Slot), - max_slot_seen: Atomic(Slot), - - const num_slots: usize = 128; + const num_slots: usize = 1024; const Self = @This(); - pub fn init(allocator: Allocator, slot: Slot) !Self { - var slots: [num_slots]Atomic(*MonitoredSlot) = undefined; - for (&slots) |*s| s.* = .{ .value = try MonitoredSlot.init(allocator) }; - // TODO is this off by one? - return .{ + pub fn init( + allocator: Allocator, + slot: Slot, + logger: sig.trace.Logger, + ) !*Self { + var self = try allocator.create(Self); + self.* = .{ .allocator = allocator, .start_slot = slot, - .good_until = Atomic(Slot).init(slot), - .max_slot_seen = Atomic(Slot).init(slot), - .first_slot = Atomic(Slot).init(slot), - .last_slot = Atomic(Slot).init(slot + num_slots - 1), - .slots = slots, + .current_bottom_slot = slot, + .max_slot_seen = slot -| 1, + .logger = logger, }; + return self; } pub fn deinit(self: *Self) void { - for (self.slots) |s| s.load(.Monotonic).release(); + self.allocator.destroy(self); } pub fn registerShred( @@ -67,196 +57,172 @@ pub const BasicShredTracker = struct { slot: Slot, shred_index: u64, ) !void { - try self.rotate(); - _ = self.max_slot_seen.fetchMax(slot, .Monotonic); + self.mux.lock(); + defer self.mux.unlock(); + const monitored_slot = try self.getSlot(slot); - defer monitored_slot.release(); - try monitored_slot.record(shred_index); + const new = try monitored_slot.record(shred_index); + if (new) self.logger.debugf("new slot: {}", .{slot}); + self.max_slot_seen = @max(self.max_slot_seen, slot); } - // TODO make use of this pub fn setLastShred(self: *Self, slot: Slot, index: usize) !void { + self.mux.lock(); + defer self.mux.unlock(); + const monitored_slot = try self.getSlot(slot); - defer monitored_slot.release(); - monitored_slot.setLastShred(index); + if (monitored_slot.last_shred) |old_last| { + monitored_slot.last_shred = @min(old_last, index); + } else { + monitored_slot.last_shred = index; + } } - pub fn identifyMissing(self: *Self, allocator: Allocator) !MultiSlotReport { - var found_bad = false; - var slot_reports = ArrayList(SlotReport).init(allocator); - const max_slot_seen = self.max_slot_seen.load(.Monotonic); - for (self.good_until.load(.Monotonic)..max_slot_seen + 1) |slot| { + pub fn identifyMissing(self: *Self, slot_reports: *MultiSlotReport) !void { + self.mux.lock(); + defer self.mux.unlock(); + + var found_an_incomplete_slot = false; + slot_reports.clearRetainingCapacity(); + const timestamp = std.time.milliTimestamp(); + const last_slot_to_check = @max(self.max_slot_seen, self.current_bottom_slot); + for (self.current_bottom_slot..last_slot_to_check + 1) |slot| { const monitored_slot = try self.getSlot(slot); - defer monitored_slot.release(); - const missing_shreds = try monitored_slot.identifyMissing(allocator); - if (missing_shreds.items.len > 0) { - found_bad = true; - try slot_reports.append(.{ .slot = slot, .missing_shreds = missing_shreds }); + if (monitored_slot.first_received_timestamp_ms + 1000 > timestamp) { + continue; } - if (!found_bad) { - const old = self.good_until.fetchMax(slot, .Monotonic); - if (old != slot) { - // TODO remove this - std.debug.print("finished slot: {}\n", .{old}); - } + var slot_report = try slot_reports.addOne(); + slot_report.slot = slot; + try monitored_slot.identifyMissing(&slot_report.missing_shreds); + if (slot_report.missing_shreds.items.len > 0) { + found_an_incomplete_slot = true; + } else { + slot_reports.drop(1); + } + if (!found_an_incomplete_slot) { + self.logger.debugf("finished slot: {}", .{slot}); + self.current_bottom_slot = @max(self.current_bottom_slot, slot + 1); + monitored_slot.* = .{}; } } - var last_one = ArrayList(Range).init(allocator); - try last_one.append(.{ .start = 0, .end = null }); - try slot_reports.append(.{ .slot = max_slot_seen + 1, .missing_shreds = last_one }); - return .{ .reports = slot_reports }; } fn getSlot(self: *Self, slot: Slot) error{ SlotUnderflow, SlotOverflow }!*MonitoredSlot { - const slot_index = (slot - self.start_slot) % num_slots; - if (slot > self.last_slot.load(.Acquire)) { + if (slot > self.current_bottom_slot + num_slots - 1) { return error.SlotOverflow; } - const the_slot = self.slots[slot_index].load(.Acquire); - if (slot < self.first_slot.load(.Monotonic)) { + if (slot < self.current_bottom_slot) { return error.SlotUnderflow; } - return the_slot.acquire() catch { - return error.SlotUnderflow; - }; - } - - fn rotate(self: *Self) !void { - if (!self.rotation_lock.tryLock()) return; - defer self.rotation_lock.unlock(); - - const good_until = self.good_until.load(.Monotonic); - for (self.first_slot.load(.Monotonic)..self.last_slot.load(.Monotonic)) |slot_num| { - var slot = &self.slots[slot_num % num_slots]; - if (good_until <= slot_num) { // TODO off by one? - break; - } - _ = self.first_slot.fetchAdd(1, .Monotonic); - const new_slot = try MonitoredSlot.init(self.allocator); - slot.swap(new_slot, .Monotonic).release(); - _ = self.last_slot.fetchAdd(1, .Monotonic); - } + const slot_index = (slot - self.start_slot) % num_slots; + return &self.slots[slot_index]; } }; -pub const MultiSlotReport = struct { - reports: ArrayList(SlotReport), - - pub fn deinit(self: @This()) void { - for (self.reports.items) |report| { - report.missing_shreds.deinit(); - } - self.reports.deinit(); - } -}; +pub const MultiSlotReport = sig.utils.RecyclingList( + SlotReport, + SlotReport.initBlank, + SlotReport.reset, + SlotReport.deinit, +); pub const SlotReport = struct { slot: Slot, missing_shreds: ArrayList(Range), -}; - -pub const Range = struct { - start: usize, - end: ?usize, -}; - -/// This is reference counted. -/// Do not use without calling acquire first. -/// Call release when done with a particular usage. -const MonitoredSlot = struct { - allocator: Allocator, - refcount: ReferenceCounter = .{}, - shreds: AtomicBitArray(MAX_SHREDS_PER_SLOT) = .{}, - max_seen: Atomic(usize) = Atomic(usize).init(0), - last_shred: Atomic(usize) = Atomic(usize).init(unknown), - - const unknown = std.math.maxInt(usize); - - const Self = @This(); - pub fn init(allocator: Allocator) !*Self { - var self = try allocator.create(Self); - self.* = .{ .allocator = allocator }; - return self; + fn initBlank(allocator: Allocator) SlotReport { + return .{ + .slot = undefined, + .missing_shreds = ArrayList(Range).init(allocator), + }; } - pub fn acquire(self: *Self) !*Self { - if (self.refcount.acquire()) { - return self; - } - return error.Destroyed; + fn deinit(self: SlotReport) void { + self.missing_shreds.deinit(); } - pub fn release(self: *Self) void { - if (self.refcount.release()) { - self.allocator.destroy(self); - } + fn reset(self: *SlotReport) void { + self.missing_shreds.clearRetainingCapacity(); } +}; - // TODO: can all these be unordered? - pub fn record(self: *Self, shred_index: usize) !void { - try self.shreds.set(shred_index, .Monotonic); - _ = self.max_seen.fetchMax(shred_index, .Monotonic); - } +const ShredSet = std.bit_set.ArrayBitSet(usize, MAX_SHREDS_PER_SLOT / 10); - // TODO make use of this - pub fn setLastShred(self: *Self, value: usize) void { - self.last_shred.store(value, .Monotonic); +const MonitoredSlot = struct { + shreds: ShredSet = ShredSet.initEmpty(), + max_seen: ?usize = null, + last_shred: ?usize = null, + first_received_timestamp_ms: i64 = 0, + + const Self = @This(); + + pub fn record(self: *Self, shred_index: usize) !bool { + self.shreds.set(shred_index); + if (self.max_seen == null) { + self.max_seen = shred_index; + self.first_received_timestamp_ms = std.time.milliTimestamp(); + return true; + } + self.max_seen = @max(self.max_seen.?, shred_index); + return false; } - pub fn identifyMissing(self: *Self, allocator: Allocator) !ArrayList(Range) { - var missing_windows = ArrayList(Range).init(allocator); + pub fn identifyMissing(self: *Self, missing_shreds: *ArrayList(Range)) !void { + missing_shreds.clearRetainingCapacity(); + const highest_shred_to_check = self.last_shred orelse self.max_seen orelse 0; var gap_start: ?usize = null; - const last_shred = self.last_shred.load(.Monotonic); - const max_seen = self.max_seen.load(.Monotonic); - for (0..max_seen + 2) |i| { - if (self.shreds.get(i, .Monotonic) catch unreachable) { + for (0..highest_shred_to_check + 1) |i| { + if (self.shreds.isSet(i)) { if (gap_start) |start| { - try missing_windows.append(.{ .start = start, .end = i }); + try missing_shreds.append(.{ .start = start, .end = i }); gap_start = null; } } else if (gap_start == null) { gap_start = i; } } - if (max_seen < last_shred) { - const start = if (gap_start) |x| x else max_seen; // TODO is this redundant? - const end = if (last_shred == unknown) null else last_shred; - try missing_windows.append(.{ .start = start, .end = end }); + if (self.last_shred == null or self.max_seen == null) { + try missing_shreds.append(.{ .start = 0, .end = null }); + } else if (self.max_seen.? < self.last_shred.?) { + try missing_shreds.append(.{ .start = self.max_seen.? + 1, .end = self.last_shred }); } - return missing_windows; } }; -test "tvu.shred_tracker: trivial happy path" { +test "tvu.shred_tracker2: trivial happy path" { const allocator = std.testing.allocator; + var msr = MultiSlotReport.init(allocator); + defer msr.deinit(); + var tracker = try BasicShredTracker.init(allocator, 13579); defer tracker.deinit(); - const output = try tracker.identifyMissing(allocator); - defer output.deinit(); + try tracker.identifyMissing(&msr); - try std.testing.expect(1 == output.reports.items.len); - const report = output.reports.items[0]; + try std.testing.expect(1 == msr.reports.items.len); + const report = msr.reports.items[0]; try std.testing.expect(13579 == report.slot); try std.testing.expect(1 == report.missing_shreds.items.len); try std.testing.expect(0 == report.missing_shreds.items[0].start); try std.testing.expect(null == report.missing_shreds.items[0].end); } -test "tvu.shred_tracker: 1 registered shred is identified" { +test "tvu.shred_tracker2: 1 registered shred is identified" { const allocator = std.testing.allocator; + var msr = MultiSlotReport.init(allocator); + defer msr.deinit(); + var tracker = try BasicShredTracker.init(allocator, 13579); defer tracker.deinit(); try tracker.registerShred(13579, 123); + std.time.sleep(210 * std.time.ns_per_ms); - const output = try tracker.identifyMissing(allocator); - defer output.deinit(); + try tracker.identifyMissing(&msr); - try std.testing.expect(1 == output.reports.items.len); - const report = output.reports.items[0]; + try std.testing.expect(1 == msr.len); + const report = msr.reports.items[0]; try std.testing.expect(13579 == report.slot); try std.testing.expect(2 == report.missing_shreds.items.len); try std.testing.expect(0 == report.missing_shreds.items[0].start); diff --git a/src/tvu/shred_verifier.zig b/src/tvu/shred_verifier.zig index c0ccba212..bc8173223 100644 --- a/src/tvu/shred_verifier.zig +++ b/src/tvu/shred_verifier.zig @@ -15,42 +15,51 @@ pub fn runShredSigVerify( exit: *Atomic(bool), incoming: *Channel(ArrayList(Packet)), verified: *Channel(ArrayList(Packet)), + leader_schedule: LeaderScheduleCalculator, ) void { - while (incoming.receive()) |packet_batch| { - // TODO parallelize this once it's actually verifying signatures - for (packet_batch.items) |*packet| { - if (!verifyShred(packet, {})) { - packet.set(.discard); + // TODO: unreachable + var verified_count: usize = 0; + var buf: ArrayList(ArrayList(Packet)) = ArrayList(ArrayList(Packet)).init(incoming.allocator); + while (true) { + incoming.tryDrainRecycle(&buf) catch unreachable; + if (buf.items.len == 0) { + std.time.sleep(10 * std.time.ns_per_ms); + continue; + } + for (buf.items) |packet_batch| { + // TODO parallelize this once it's actually verifying signatures + for (packet_batch.items) |*packet| { + if (!verifyShred(packet, &leader_schedule)) { + packet.set(.discard); + } else { + verified_count += 1; + } } + verified.send(packet_batch) catch unreachable; // TODO + if (exit.load(.Monotonic)) return; } - verified.send(packet_batch) catch unreachable; // TODO - if (exit.load(.Monotonic)) return; } } /// verify_shred_cpu -/// TODO slot leaders -fn verifyShred(packet: *const Packet, slot_leaders: void) bool { +fn verifyShred(packet: *const Packet, leader_schedule: *const LeaderScheduleCalculator) bool { if (packet.isSet(.discard)) return false; const shred = shred_layout.getShred(packet) orelse return false; const slot = shred_layout.getSlot(shred) orelse return false; const signature = shred_layout.getSignature(shred) orelse return false; const signed_data = shred_layout.getSignedData(shred) orelse return false; - // TODO get slot leader pubkey and actually verify signature - _ = slot_leaders; - _ = slot; - if (false) return signature.verify(unreachable, signed_data.data); + // TODO: once implemented, this should no longer be optional + if (leader_schedule.getLeader(slot)) |leader| { + return signature.verify(leader, &signed_data.data); + } return true; } -// pub const EpochLeaderSchedule = struct { -// data: []const sig.core.Pubkey, -// first_slot: sig.core.Slot, - -// fn getLeader(self: *@This(), slot: sig.core.Slot) sig.core.Pubkey { -// const index = @as(usize, @intCast(slot)) - @as(usize, @intCast(self.first_slot)); -// return self.data[index]; -// } -// }; +// TODO +pub const LeaderScheduleCalculator = struct { + fn getLeader(_: *const @This(), _: sig.core.Slot) ?sig.core.Pubkey { + return null; + } +}; diff --git a/src/utils/arraylist.zig b/src/utils/arraylist.zig index 3c5177637..bc675b19b 100644 --- a/src/utils/arraylist.zig +++ b/src/utils/arraylist.zig @@ -1,6 +1,9 @@ const std = @import("std"); const bincode = @import("../bincode/bincode.zig"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; + pub fn ArrayListConfig(comptime Child: type) bincode.FieldConfig(std.ArrayList(Child)) { const S = struct { pub fn serialize(writer: anytype, data: anytype, params: bincode.Params) !void { @@ -53,3 +56,62 @@ pub fn defaultArrayListOnEOFConfig(comptime T: type) bincode.FieldConfig(std.Arr .default_fn = S.defaultEOF, }; } + +/// A list that recycles items that were removed from the list. +/// +/// Useful for types that are expensive to instantiate, like +/// those that include allocations. +/// +/// When you call `addOne`, it returns a pointer to an item of type +/// type T, which could either be a new item created with initBlank, +/// or one that was previously removed from the list and had +/// resetItem called on it. +pub fn RecyclingList( + comptime T: type, + comptime initBlank: fn (Allocator) T, + comptime resetItem: fn (*T) void, + comptime deinitOne: fn (T) void, +) type { + return struct { + /// Contains valid items up to `len` + /// Any other items beyond len in this arraylist are not valid. + private: ArrayList(T), + len: usize = 0, + + const Self = @This(); + + pub fn init(allocator: Allocator) Self { + return .{ .private = ArrayList(T).init(allocator) }; + } + + pub fn deinit(self: Self) void { + for (self.private.items) |item| deinitOne(item); + self.private.deinit(); + } + + pub fn items(self: *const Self) []const T { + return self.private.items[0..self.len]; + } + + pub fn clearRetainingCapacity(self: *Self) void { + self.len = 0; + } + + pub fn addOne(self: *Self) !*T { + if (self.len < self.private.items.len) { + const item = &self.private.items[self.len]; + resetItem(item); + self.len += 1; + return item; + } + var item = try self.private.addOne(); + item.* = initBlank(self.private.allocator); + self.len += 1; + return item; + } + + pub fn drop(self: *Self, n: usize) void { + self.len -|= n; + } + }; +} diff --git a/src/utils/bitflags.zig b/src/utils/bitflags.zig index e5ea82d03..69b723894 100644 --- a/src/utils/bitflags.zig +++ b/src/utils/bitflags.zig @@ -1,5 +1,5 @@ pub fn BitFlags(comptime FlagEnum: type) type { - return struct { + return packed struct { state: @typeInfo(FlagEnum).Enum.tag_type, const Self = @This(); diff --git a/src/utils/thread.zig b/src/utils/thread.zig index 20c8bcc6c..0c105977a 100644 --- a/src/utils/thread.zig +++ b/src/utils/thread.zig @@ -1,7 +1,11 @@ const std = @import("std"); +const Allocator = std.mem.Allocator; +const Atomic = std.atomic.Atomic; +const Condition = std.Thread.Condition; +const Mutex = std.Thread.Mutex; + const ThreadPool = @import("../sync/thread_pool.zig").ThreadPool; -const Task = ThreadPool.Task; const Batch = ThreadPool.Batch; /// Spawns tasks and returns a list of threads @@ -43,7 +47,7 @@ pub fn ThreadPoolTask( comptime EntryType: type, ) type { return struct { - task: Task, + task: ThreadPool.Task, entry: EntryType, done: std.atomic.Atomic(bool) = std.atomic.Atomic(bool).init(true), @@ -60,7 +64,7 @@ pub fn ThreadPoolTask( return tasks; } - fn callback(task: *Task) void { + fn callback(task: *ThreadPool.Task) void { var self = @fieldParentPtr(Self, "task", task); std.debug.assert(!self.done.load(std.atomic.Ordering.Acquire)); defer { @@ -87,3 +91,134 @@ pub fn ThreadPoolTask( } }; } + +/// Wrapper for ThreadPool to run many tasks of the same type. +/// +/// TaskType should have a method `run (*TaskType) void` +/// +/// TODO: this should be able to work with a pre-existing thread pool. +/// Ideally this could also impose its own constraint of concurrent tasks of its own, +/// without having to spawn extra threads to monitor those threads, and without +/// blocking callers. not sure if possible, but try to balance those values. +pub fn HomogeneousThreadPool(comptime TaskType: type) type { + // the task's return type + const TaskResult = @typeInfo(@TypeOf(TaskType.run)).Fn.return_type.?; + + // compatibility layer between user-defined TaskType and ThreadPool's Task type, + const TaskAdapter = struct { + /// logic to pass to underlying thread pool + pool_task: ThreadPool.Task = .{ .callback = Self.run }, + + /// whether the task has completed. + /// do not touch without locking the mutex. + done: bool = false, + /// locks done to avoid infinite wait on the condition + /// due to a potential race condition. + done_lock: Mutex = .{}, + /// broadcasts to joiners when done becomes true + done_notifier: Condition = .{}, + + /// The task's inputs and state. + /// TaskType.run is the task's logic, which uses the data in this struct. + typed_task: TaskType, + + /// the return value of the task + /// - points to undefined data until the task is complete + /// - memory address may become invalid after task is joined, if caller decides to deinit results + result: *TaskResult, + + const Self = @This(); + + fn run(pool_task: *ThreadPool.Task) void { + var self = @fieldParentPtr(Self, "pool_task", pool_task); + + self.result.* = self.typed_task.run(); + + // signal completion + self.done_lock.lock(); + self.done = true; + self.done_notifier.broadcast(); + self.done_lock.unlock(); + } + + /// blocks until the task is complete. + fn join(self: *Self) void { + self.done_lock.lock(); + while (!self.done) self.done_notifier.wait(&self.done_lock); + self.done_lock.unlock(); + } + }; + + return struct { + allocator: std.mem.Allocator, + pool: ThreadPool, + tasks: std.ArrayList(TaskAdapter), + results: std.ArrayList(TaskResult), + + pub const Task = TaskType; + + const Self = @This(); + + pub fn init(allocator: std.mem.Allocator, num_threads: u32) Self { + return .{ + .allocator = allocator, + .pool = ThreadPool.init(.{ .max_threads = num_threads }), + .tasks = std.ArrayList(TaskAdapter).init(allocator), + .results = std.ArrayList(TaskResult).init(allocator), + }; + } + + pub fn deinit(self: *Self) void { + self.pool.shutdown(); + self.tasks.deinit(); + self.results.deinit(); + self.pool.deinit(); + } + + pub fn schedule(self: *Self, typed_task: TaskType) Allocator.Error!void { + const result = try self.results.addOne(); + var task = try self.tasks.addOne(); + task.* = .{ .typed_task = typed_task, .result = result }; + self.pool.schedule(Batch.from(&task.pool_task)); + } + + /// blocks until all tasks are complete + /// returns a list of any results for tasks that did not have a pointer provided + pub fn join(self: *Self) std.ArrayList(TaskResult) { + for (self.tasks.items) |*task| task.join(); + const results = self.results; + self.results = std.ArrayList(TaskResult).init(self.allocator); + self.tasks.clearRetainingCapacity(); + return results; + } + + /// Like join, but it returns an error if any tasks failed, and otherwise discards task output. + pub fn joinFallible(self: *Self) !void { + for (self.join().items) |result| try result; + } + }; +} + +test "typed thread pool" { + const AdditionTask = struct { + a: u64, + b: u64, + pub fn run(self: *const @This()) u64 { + return self.a + self.b; + } + }; + + var pool = HomogeneousThreadPool(AdditionTask).init(std.testing.allocator, 2); + defer pool.deinit(); + try pool.schedule(.{ .a = 1, .b = 1 }); + try pool.schedule(.{ .a = 1, .b = 2 }); + try pool.schedule(.{ .a = 1, .b = 4 }); + + const results = pool.join(); + defer results.deinit(); + + try std.testing.expect(3 == results.items.len); + try std.testing.expect(2 == results.items[0]); + try std.testing.expect(3 == results.items[1]); + try std.testing.expect(5 == results.items[2]); +} From 40f5955b97a93bd08c55d398cb7d6ad47a7e2821 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 30 Apr 2024 09:57:17 -0400 Subject: [PATCH 04/51] fix: 0.12 upgrade/fixes --- src/cmd/cmd.zig | 20 ++++++++++---------- src/cmd/config.zig | 7 ++++--- src/net/net.zig | 6 +++--- src/sync/channel.zig | 4 ++-- src/sync/ref_counter.zig | 4 ++-- src/tvu/repair_service.zig | 8 ++++++-- src/tvu/shred.zig | 2 +- src/tvu/shred_receiver.zig | 8 +++++--- src/tvu/shred_tracker.zig | 2 +- src/tvu/shred_verifier.zig | 4 ++-- src/utils/arraylist.zig | 2 +- src/utils/thread.zig | 4 ++-- 12 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 20df7bcc9..95c384778 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -93,7 +93,7 @@ var gossip_port_option = cli.Option{ var repair_port_option = cli.Option{ .long_name = "repair-port", .help = "The port to run tvu repair listener - default: 8002", - .value_ref = cli.mkRef(&config.current.repair.port), + .value_ref = cli.mkRef(&config.current.tvu.repair_port), .required = false, .value_name = "Repair Port", }; @@ -101,15 +101,15 @@ var repair_port_option = cli.Option{ var tvu_port_option = cli.Option{ .long_name = "tvu-port", .help = "The port to run turbine listener - default: 8003", - .value = cli.OptionValue{ .int = 8003 }, + .value_ref = cli.mkRef(&config.current.tvu.tvu_port), .required = false, - .value_name = "Repair Port", + .value_name = "TVU Port", }; var test_repair_option = cli.Option{ .long_name = "test-repair-for-slot", .help = "Set a slot here to repeatedly send repair requests for shreds from this slot. This is only intended for use during short-lived tests of the repair service. Do not set this during normal usage.", - .value_ref = cli.mkRef(&config.current.repair.test_repair_slot), + .value_ref = cli.mkRef(&config.current.tvu.test_repair_slot), .required = false, .value_name = "slot number", }; @@ -411,8 +411,8 @@ fn validator() !void { defer entrypoints.deinit(); const ip_echo_data = try getMyDataFromIpEcho(logger, entrypoints.items); - const repair_port: u16 = config.current.repair.port; - const tvu_port: u16 = config.current.tvu.port; + const repair_port: u16 = config.current.tvu.repair_port; + const tvu_port: u16 = config.current.tvu.repair_port; // gossip var gossip_service = try initGossip( @@ -444,7 +444,7 @@ fn validator() !void { const shred_tracker = try sig.tvu.BasicShredTracker.init( gpa_allocator, - @intCast(test_repair_option.value.int orelse 0), + @intCast(config.current.tvu.test_repair_slot orelse 0), logger, ); const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); @@ -460,7 +460,7 @@ fn validator() !void { &gossip_service.my_shred_version, &repair_socket, shred_tracker, - if (test_repair_option.value.int) |n| @intCast(n) else null, + if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, ); defer repair_svc.deinit(); var repair_handle = try std.Thread.spawn(.{}, RepairService.run, .{&repair_svc}); @@ -491,8 +491,6 @@ fn validator() !void { sig.tvu.processShreds, .{ gpa_allocator, verified_shreds_channel, shred_tracker }, ); - _ = process_shreds_handle; - _ = verify_shreds_handle; // accounts db var snapshots = try getOrDownloadSnapshots( @@ -567,6 +565,8 @@ fn validator() !void { gossip_handle.join(); repair_handle.join(); shred_receive_handle.join(); + process_shreds_handle.join(); + verify_shreds_handle.join(); } /// Initialize an instance of GossipService and configure with CLI arguments diff --git a/src/cmd/config.zig b/src/cmd/config.zig index 39201f257..f09dee53f 100644 --- a/src/cmd/config.zig +++ b/src/cmd/config.zig @@ -3,7 +3,7 @@ const ACCOUNT_INDEX_BINS = @import("../accountsdb/db.zig").ACCOUNT_INDEX_BINS; pub const Config = struct { identity: IdentityConfig = .{}, gossip: GossipConfig = .{}, - repair: RepairConfig = .{}, + tvu: TvuConfig = .{}, accounts_db: AccountsDbConfig = .{}, // general config log_level: []const u8 = "debug", @@ -24,8 +24,9 @@ const GossipConfig = struct { trusted_validators: [][]const u8 = &.{}, }; -const RepairConfig = struct { - port: u16 = 8002, +const TvuConfig = struct { + tvu_port: u16 = 8002, + repair_port: u16 = 8003, test_repair_slot: ?u64 = null, }; diff --git a/src/net/net.zig b/src/net/net.zig index 5cd5ed242..7bdebdb27 100644 --- a/src/net/net.zig +++ b/src/net/net.zig @@ -463,10 +463,10 @@ pub fn endpointToString(allocator: std.mem.Allocator, endpoint: *const network.E /// Socket.enablePortReuse does not actually enable SO_REUSEPORT. It sets SO_REUSEADDR. /// This is the correct implementation to enable SO_REUSEPORT. -pub fn enablePortReuse(sock: *network.Socket, enabled: bool) !void { - const setsockopt_fn = if (builtin.os.tag == .windows) @panic("windows not supported") else std.os.setsockopt; // TODO windows +pub fn enablePortReuse(self: *network.Socket, enabled: bool) !void { + const setsockopt_fn = if (builtin.os.tag == .windows) @panic("windows not supported") else std.posix.setsockopt; var opt: c_int = if (enabled) 1 else 0; - try setsockopt_fn(sock.internal, std.os.SOL.SOCKET, std.os.SO.REUSEPORT, std.mem.asBytes(&opt)); + try setsockopt_fn(self.internal, std.posix.SOL.SOCKET, std.posix.SO.REUSEPORT, std.mem.asBytes(&opt)); } test "net.net: invalid ipv4 socket parsing" { diff --git a/src/sync/channel.zig b/src/sync/channel.zig index a03918963..f6e8b7ff6 100644 --- a/src/sync/channel.zig +++ b/src/sync/channel.zig @@ -133,11 +133,11 @@ pub fn Channel(comptime T: type) type { defer buffer.unlock(); buf.clearRetainingCapacity(); - if (self.closed.load(.SeqCst)) { + if (self.closed.load(.seq_cst)) { return error.ChannelClosed; } - var num_items_to_drain = buffer.get().items.len; + const num_items_to_drain = buffer.get().items.len; if (num_items_to_drain == 0) { return; } diff --git a/src/sync/ref_counter.zig b/src/sync/ref_counter.zig index 5fb83e7d3..6b732ab6c 100644 --- a/src/sync/ref_counter.zig +++ b/src/sync/ref_counter.zig @@ -44,11 +44,11 @@ pub const ReferenceCounter = struct { .Acquire, )); if (prior.refs > prior.acquirers) { - _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1 }), .Monotonic); + _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1 }), .monotonic); return true; } // resource was already destroyed - _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1, .refs = 1 }), .Monotonic); + _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1, .refs = 1 }), .monotonic); return false; } diff --git a/src/tvu/repair_service.zig b/src/tvu/repair_service.zig index 8ac9fa2a9..e2ebf095d 100644 --- a/src/tvu/repair_service.zig +++ b/src/tvu/repair_service.zig @@ -2,6 +2,9 @@ const std = @import("std"); const zig_network = @import("zig-network"); const sig = @import("../lib.zig"); +const bincode = sig.bincode; +const socket_tag = sig.gossip.socket_tag; + const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; @@ -20,6 +23,7 @@ const Nonce = sig.core.Nonce; const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; const RwMux = sig.sync.RwMux; +const SignedGossipData = sig.gossip.SignedGossipData; const SocketAddr = sig.net.SocketAddr; const SocketThread = sig.net.SocketThread; const Slot = sig.core.Slot; @@ -130,7 +134,7 @@ pub const RepairService = struct { self.logger.info("starting repair service"); defer self.logger.info("exiting repair service"); var timer = try std.time.Timer.start(); - while (!self.exit.load(.Unordered)) { + while (!self.exit.load(.unordered)) { try self.sendNecessaryRepairs(); std.time.sleep(100 * std.time.ns_per_ms -| timer.lap()); } @@ -604,7 +608,7 @@ const TestPeerGenerator = struct { try self.gossip.insert(try SignedGossipData.initSigned(.{ .ContactInfo = contact_info }, &keypair), wallclock); switch (peer_type) { inline .HasSlot, .MissingSlot => { - var lowest_slot = LowestSlot.random(self.random); + var lowest_slot = sig.gossip.LowestSlot.random(self.random); lowest_slot.from = pubkey; lowest_slot.lowest = switch (peer_type) { .MissingSlot => self.slot + 1, diff --git a/src/tvu/shred.zig b/src/tvu/shred.zig index 91243f508..496efbd13 100644 --- a/src/tvu/shred.zig +++ b/src/tvu/shred.zig @@ -296,6 +296,6 @@ pub const shred_layout = struct { const end_index = start_index + @sizeOf(Int); if (data.len < end_index) return null; const bytes: *const [@sizeOf(Int)]u8 = @ptrCast(data[start_index..end_index]); - return std.mem.readInt(Int, bytes, .Little); + return std.mem.readInt(Int, bytes, .little); } }; diff --git a/src/tvu/shred_receiver.zig b/src/tvu/shred_receiver.zig index db9ceaf70..ee11ede58 100644 --- a/src/tvu/shred_receiver.zig +++ b/src/tvu/shred_receiver.zig @@ -20,6 +20,8 @@ const RepairMessage = sig.tvu.RepairMessage; const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; +const endpointToString = sig.net.endpointToString; + /// Use this in a single thread where you want to keep accessing /// a value that's stored in an atomic, but you don't want to do /// an expensive `load` operation every time you read it, and @@ -39,12 +41,12 @@ pub fn CachedAtomic(comptime T: type) type { pub fn init(atomic: *Atomic(T)) Self { return .{ .atomic = atomic, - .cache = atomic.load(.Monotonic), + .cache = atomic.load(.monotonic), }; } pub fn update(self: *Self) void { - self.cache = self.atomic.load(.Monotonic); + self.cache = self.atomic.load(.monotonic); } }; } @@ -108,7 +110,7 @@ pub const ShredReceiver = struct { sender: *Channel(ArrayList(Packet)), ) !void { var buf = ArrayList(ArrayList(Packet)).init(self.allocator); - while (!self.exit.load(.Unordered)) { + while (!self.exit.load(.unordered)) { inline for (receivers) |receiver| { var responses = ArrayList(Packet).init(self.allocator); try receiver.tryDrainRecycle(&buf); diff --git a/src/tvu/shred_tracker.zig b/src/tvu/shred_tracker.zig index 7c85572e1..d6d8888e0 100644 --- a/src/tvu/shred_tracker.zig +++ b/src/tvu/shred_tracker.zig @@ -37,7 +37,7 @@ pub const BasicShredTracker = struct { slot: Slot, logger: sig.trace.Logger, ) !*Self { - var self = try allocator.create(Self); + const self = try allocator.create(Self); self.* = .{ .allocator = allocator, .start_slot = slot, diff --git a/src/tvu/shred_verifier.zig b/src/tvu/shred_verifier.zig index bc8173223..836c630cd 100644 --- a/src/tvu/shred_verifier.zig +++ b/src/tvu/shred_verifier.zig @@ -6,7 +6,7 @@ const shred_layout = sig.tvu.shred_layout; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; -const Atomic = std.atomic.Atomic; +const Atomic = std.atomic.Value; const Channel = sig.sync.Channel; const Packet = sig.net.Packet; @@ -36,7 +36,7 @@ pub fn runShredSigVerify( } } verified.send(packet_batch) catch unreachable; // TODO - if (exit.load(.Monotonic)) return; + if (exit.load(.monotonic)) return; } } } diff --git a/src/utils/arraylist.zig b/src/utils/arraylist.zig index bd3e29a3a..4ea73f5c2 100644 --- a/src/utils/arraylist.zig +++ b/src/utils/arraylist.zig @@ -104,7 +104,7 @@ pub fn RecyclingList( self.len += 1; return item; } - var item = try self.private.addOne(); + const item = try self.private.addOne(); item.* = initBlank(self.private.allocator); self.len += 1; return item; diff --git a/src/utils/thread.zig b/src/utils/thread.zig index b8f048dcd..3601aa6aa 100644 --- a/src/utils/thread.zig +++ b/src/utils/thread.zig @@ -121,7 +121,7 @@ pub fn HomogeneousThreadPool(comptime TaskType: type) type { /// The task's inputs and state. /// TaskType.run is the task's logic, which uses the data in this struct. typed_task: TaskType, - + /// the return value of the task /// - points to undefined data until the task is complete /// - memory address may become invalid after task is joined, if caller decides to deinit results @@ -130,7 +130,7 @@ pub fn HomogeneousThreadPool(comptime TaskType: type) type { const Self = @This(); fn run(pool_task: *ThreadPool.Task) void { - var self = @fieldParentPtr(Self, "pool_task", pool_task); + var self: *Self = @fieldParentPtr("pool_task", pool_task); self.result.* = self.typed_task.run(); From 7f4e2b3d823127aebc3405dd88a299f0af067370 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 30 Apr 2024 09:59:24 -0400 Subject: [PATCH 05/51] refactor(sync): cleanup unused --- src/sync/ref_counter.zig | 77 ---------------------------------------- src/utils/thread.zig | 1 - 2 files changed, 78 deletions(-) delete mode 100644 src/sync/ref_counter.zig diff --git a/src/sync/ref_counter.zig b/src/sync/ref_counter.zig deleted file mode 100644 index 6b732ab6c..000000000 --- a/src/sync/ref_counter.zig +++ /dev/null @@ -1,77 +0,0 @@ -const std = @import("std"); - -const Atomic = std.atomic.Atomic; - -/// Thread-safe counter to track the lifetime of a shared resource. -/// This does not manage the resource directly. It is just a tool -/// that can be used by multiple contexts to communicate with each -/// other about the lifetime of a shared resource. -/// -/// This can be used to determine whether a resource: -/// - is still alive and safe to use. -/// - is safe to deinitialize. -/// -/// Initializes with refs = 1, assuming there is currently exactly -/// one valid reference, which will need `release` called when it -/// is no longer in use. Call `acquire` to register additional -/// references beyond the first. -pub const ReferenceCounter = struct { - state: Atomic(u64) = Atomic(u64).init(@bitCast(State{ .refs = 1 })), - - const State = packed struct { - /// While the resource is still alive, this is the number of active references. - /// After the resource dies, this value no longer has the same meaning. - /// Check `refs == acquirers` to see if the resource is dead. - refs: i32 = 0, - /// Number of threads currently in the process of attempting to acquire the resource. - acquirers: i32 = 0, - }; - - const Self = @This(); - - /// Acquire access to the shared resource in a new context. - /// Call `release` when you are done using the resource in this context. - /// - /// If successfully acquired, the resource will be safe - /// to use until you call `release` in the same context. - /// - /// Returns: - /// - true: access granted, counter has incremented - /// - false: access denied, already destroyed - pub fn acquire(self: *Self) bool { - const prior: State = @bitCast(self.state.fetchAdd( - @bitCast(State{ .acquirers = 1, .refs = 1 }), - .Acquire, - )); - if (prior.refs > prior.acquirers) { - _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1 }), .monotonic); - return true; - } - // resource was already destroyed - _ = self.state.fetchSub(@bitCast(State{ .acquirers = 1, .refs = 1 }), .monotonic); - return false; - } - - /// Release a reference from a context where it is no longer in use. - /// - /// Returns: - /// - true: this was the last reference. you should now destroy the resource. - /// - false: there are still more references. don't do anything. - pub fn release(self: *Self) bool { - const prior: State = @bitCast(self.state.fetchSub(@bitCast(State{ .refs = 1 }), .AcqRel)); - // if this fails, the resource is already dead (analogous to double-free) - std.debug.assert(prior.refs > prior.acquirers); - return prior.refs == 1; - } -}; - -test "sync.ref_counter: ReferenceCounter works" { - var x = ReferenceCounter{}; - try std.testing.expect(x.acquire()); - try std.testing.expect(x.acquire()); - try std.testing.expect(x.acquire()); - try std.testing.expect(!x.release()); - try std.testing.expect(!x.release()); - try std.testing.expect(!x.release()); - try std.testing.expect(x.release()); -} diff --git a/src/utils/thread.zig b/src/utils/thread.zig index 3601aa6aa..00bdc6f6f 100644 --- a/src/utils/thread.zig +++ b/src/utils/thread.zig @@ -1,7 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const Atomic = std.atomic.Atomic; const Condition = std.Thread.Condition; const Mutex = std.Thread.Mutex; From 821abc7858b7f938c2d3ce5b6eb7f840dbee325b Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 30 Apr 2024 10:16:16 -0400 Subject: [PATCH 06/51] fix(lib): remove stale/invalid export --- src/lib.zig | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.zig b/src/lib.zig index 887ca2487..0c4a1778a 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -55,7 +55,6 @@ pub const sync = struct { pub usingnamespace @import("sync/channel.zig"); pub usingnamespace @import("sync/mpmc.zig"); pub usingnamespace @import("sync/ref.zig"); - pub usingnamespace @import("sync/ref_counter.zig"); pub usingnamespace @import("sync/mux.zig"); pub usingnamespace @import("sync/once_cell.zig"); pub usingnamespace @import("sync/thread_pool.zig"); From 882eebeef4de6d2d74ad76c1854893741dd84190 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 30 Apr 2024 10:34:39 -0400 Subject: [PATCH 07/51] refactor(cmd): organize imports --- src/cmd/cmd.zig | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 95c384778..eee2e5540 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -5,55 +5,45 @@ const dns = @import("zigdig"); const network = @import("zig-network"); const helpers = @import("helpers.zig"); const sig = @import("../lib.zig"); +const config = @import("config.zig"); const Atomic = std.atomic.Value; const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = network.Socket; +const AccountsDB = sig.accounts_db.AccountsDB; +const AccountsDBConfig = sig.accounts_db.AccountsDBConfig; +const AllSnapshotFields = sig.accounts_db.AllSnapshotFields; +const Bank = sig.accounts_db.Bank; const BasicShredTracker = sig.tvu.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; +const GenesisConfig = sig.accounts_db.GenesisConfig; const GossipService = sig.gossip.GossipService; const IpAddr = sig.net.IpAddr; const Level = sig.trace.Level; const Logger = sig.trace.Logger; const Pubkey = sig.core.Pubkey; -const Registry = sig.prometheus.Registry; const RepairService = sig.tvu.RepairService; -const RepairPeerProvider = sig.tvu.RepairPeerProvider; -const RepairRequester = sig.tvu.RepairRequester; const ShredReceiver = sig.tvu.ShredReceiver; -const Slot = sig.core.Slot; +const SnapshotFieldsAndPaths = sig.accounts_db.SnapshotFieldsAndPaths; +const SnapshotFiles = sig.accounts_db.SnapshotFiles; const SocketAddr = sig.net.SocketAddr; +const StatusCache = sig.accounts_db.StatusCache; +const downloadSnapshotsFromGossip = sig.accounts_db.downloadSnapshotsFromGossip; const enumFromName = sig.utils.enumFromName; const getOrInitIdentity = helpers.getOrInitIdentity; const globalRegistry = sig.prometheus.globalRegistry; const getWallclockMs = sig.gossip.getWallclockMs; const initRepair = sig.tvu.initRepair; +const parallelUnpackZstdTarBall = sig.accounts_db.parallelUnpackZstdTarBall; const requestIpEcho = sig.net.requestIpEcho; const servePrometheus = sig.prometheus.servePrometheus; const socket_tag = sig.gossip.socket_tag; - -const SnapshotFiles = @import("../accountsdb/snapshots.zig").SnapshotFiles; -const SnapshotFieldsAndPaths = @import("../accountsdb/snapshots.zig").SnapshotFieldsAndPaths; -const AllSnapshotFields = @import("../accountsdb/snapshots.zig").AllSnapshotFields; -const AccountsDB = @import("../accountsdb/db.zig").AccountsDB; -const AccountsDBConfig = @import("../accountsdb/db.zig").AccountsDBConfig; -const GenesisConfig = @import("../accountsdb/genesis_config.zig").GenesisConfig; -const StatusCache = @import("../accountsdb/snapshots.zig").StatusCache; -const SnapshotFields = @import("../accountsdb/snapshots.zig").SnapshotFields; -const Bank = @import("../accountsdb/bank.zig").Bank; - -const parallelUnpackZstdTarBall = @import("../accountsdb/snapshots.zig").parallelUnpackZstdTarBall; -const downloadSnapshotsFromGossip = @import("../accountsdb/download.zig").downloadSnapshotsFromGossip; -const SOCKET_TIMEOUT = @import("../net/socket_utils.zig").SOCKET_TIMEOUT; - -const config = @import("config.zig"); -// var validator_config = config.current; - -const ACCOUNT_INDEX_BINS = @import("../accountsdb/db.zig").ACCOUNT_INDEX_BINS; +const SOCKET_TIMEOUT = sig.net.SOCKET_TIMEOUT; +const ACCOUNT_INDEX_BINS = sig.accounts_db.ACCOUNT_INDEX_BINS; var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const gpa_allocator = gpa.allocator(); From b154f6ea6c72b77655ea7b82466fa4aa0dab21b8 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Sat, 4 May 2024 15:45:34 -0400 Subject: [PATCH 08/51] refactor(tvu): cleaner init with ServiceManager - spawn tvu in tvu.zig instead of dumping everything in cmd.zig - create ServiceManager to manage threads and their state --- remove_unused.py | 115 +++++++++------------ src/cmd/cmd.zig | 140 +++++++++++++------------ src/lib.zig | 2 + src/net/packet.zig | 2 +- src/tvu/repair_message.zig | 14 +-- src/tvu/repair_service.zig | 29 ++++-- src/tvu/shred_processor.zig | 10 +- src/tvu/shred_receiver.zig | 53 +++------- src/tvu/shred_tracker.zig | 17 +-- src/tvu/shred_verifier.zig | 11 +- src/tvu/tvu.zig | 141 +++++++++++++++++++++++++ src/utils/service.zig | 201 ++++++++++++++++++++++++++++++++++++ 12 files changed, 522 insertions(+), 213 deletions(-) create mode 100644 src/tvu/tvu.zig create mode 100644 src/utils/service.zig diff --git a/remove_unused.py b/remove_unused.py index b32f260e9..c08f7e661 100644 --- a/remove_unused.py +++ b/remove_unused.py @@ -1,6 +1,7 @@ -# parse arg of file name +# parse arg of file name import sys import os +import re if len(sys.argv) != 2: print("Usage: python remove_unused.py ") @@ -8,76 +9,54 @@ zig_files = [] dirs = [sys.argv[1]] -while 1: +while 1: d = dirs.pop() files = os.listdir(d) - for file in files: + for file in files: full_path = os.path.join(d, file) - if os.path.isdir(full_path): + if os.path.isdir(full_path): dirs.append(full_path) - else: - # if file ends in .zig - if file.endswith('.zig'): + else: + # if file ends in .zig + if file.endswith(".zig"): zig_files.append(full_path) - if len(dirs) == 0: - break - -total_removes = 0 -n_remove_iter = 0 -n_removes = 1 -while n_removes > 0: - n_removes = 0 - print(f"iteration: {n_remove_iter}, lines removed: {n_removes}") - n_remove_iter += 1 - - for filename in zig_files: - print(filename) - - # open and read lines of file - with open(filename, 'r') as f: - full_lines = f.readlines() - - # parse the value {VAR} name in 'const {VAR} = @import ...' - import_var_names = [] - for (i, line) in enumerate(full_lines): - if not (line.startswith('const') or line.startswith('pub const')): - continue - - if '@import' not in line: - continue - - start_index = line.index("const ") - end_index = line.index(" = ") - var_name = line[start_index + 6:end_index] - import_var_names.append((var_name, i)) - - unused_vars = import_var_names.copy() - for i, line in enumerate(full_lines): - - for var, line_num in import_var_names: - if (var in line) and (i != line_num): - if (var, line_num) in unused_vars: - unused_vars.remove((var, line_num)) - - new_lines = [] - lines_to_remove = [i for (_, i) in unused_vars] - n_removes += len(lines_to_remove) - total_removes += len(lines_to_remove) - - for (i, line) in enumerate(full_lines): - if i in lines_to_remove: - continue - new_lines.append(line) - - if (len(lines_to_remove) > 0): - print(filename) - print(unused_vars) - - # write - with open(filename, 'w') as f: - f.writelines(new_lines) - -print("total iterations: ", n_remove_iter) -print("total lines removed: ", total_removes) - \ No newline at end of file + if len(dirs) == 0: + break + +import_line_regex = re.compile( + r'const ([a-zA-Z0-9_]+) = (@import\("[a-zA-Z0-9_]+"\))?[a-zA-Z0-9_.]*;' +) + +total_num_lines_removed = 0 + +for path in zig_files: + with open(path) as f: + orig_file = f.read() + orig_lines = orig_file.split("\n") + if orig_lines[-1] == "": + orig_lines = orig_lines[0:-1] + non_import_lines = [] + imported_names = [] + for line_num, line in enumerate(orig_lines): + match = import_line_regex.match(line) + if match: + imported_names.append((match.groups()[0], line_num)) + else: + non_import_lines.append(line) + non_import_file = "\n".join(non_import_lines) + lines_to_drop = set() + num_lines_to_remove = 0 + for name in imported_names: + if re.search(f"[^a-zA-Z0-9_]{name[0]}[^a-zA-Z0-9_]", non_import_file) is None: + lines_to_drop.add(name[1]) + num_lines_to_remove += 1 + with open(path, "w") as f: + f.writelines( + f"{line}\n" for i, line in enumerate(orig_lines) if i not in lines_to_drop + ) + lines_to_drop + print(path, num_lines_to_remove) + total_num_lines_removed += num_lines_to_remove + +print("total lines removed:", total_num_lines_removed) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index eee2e5540..84b0fb0dd 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -420,67 +420,82 @@ fn validator() !void { defer gossip_service.deinit(); var gossip_handle = try spawnGossip(&gossip_service); - const shred_version = sig.tvu.CachedAtomic(u16).init(&gossip_service.my_shred_version); - - var repair_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); - try sig.net.enablePortReuse(&repair_socket, true); - try repair_socket.bindToPort(repair_port); - try repair_socket.setReadTimeout(SOCKET_TIMEOUT); - - var tvu_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); - try sig.net.enablePortReuse(&tvu_socket, true); - try tvu_socket.bindToPort(tvu_port); - try tvu_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); - - const shred_tracker = try sig.tvu.BasicShredTracker.init( - gpa_allocator, - @intCast(config.current.tvu.test_repair_slot orelse 0), - logger, - ); - const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); - const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); - - var repair_svc = try initRepair( - gpa_allocator, - logger, - &my_keypair, - &exit, - rand.random(), - &gossip_service.gossip_table_rw, - &gossip_service.my_shred_version, - &repair_socket, - shred_tracker, - if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, - ); - defer repair_svc.deinit(); - var repair_handle = try std.Thread.spawn(.{}, RepairService.run, .{&repair_svc}); - - var shred_receiver = ShredReceiver{ - .allocator = gpa_allocator, - .keypair = &my_keypair, - .exit = &exit, - .logger = logger, - .repair_socket = &repair_socket, - .tvu_socket = &tvu_socket, - .outgoing_shred_channel = unverified_shreds_channel, - .shred_version = shred_version, - }; - - var shred_receive_handle = try std.Thread.spawn( - .{}, - ShredReceiver.run, - .{&shred_receiver}, - ); - var verify_shreds_handle = try std.Thread.spawn( - .{}, - sig.tvu.runShredSigVerify, - .{ &exit, unverified_shreds_channel, verified_shreds_channel, .{} }, - ); - var process_shreds_handle = try std.Thread.spawn( - .{}, - sig.tvu.processShreds, - .{ gpa_allocator, verified_shreds_channel, shred_tracker }, + var tvu = try sig.tvu.spawnTvu( + .{ + .allocator = gpa_allocator, + .logger = logger, + .random = rand.random(), + .my_keypair = &my_keypair, + .exit = &exit, + .gossip_table_rw = &gossip_service.gossip_table_rw, + .my_shred_version = &gossip_service.my_shred_version, + }, + .{ + .start_slot = if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, + .repair_port = repair_port, + .tvu_port = tvu_port, + }, ); + defer tvu.deinit(); + + // var repair_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); + // try sig.net.enablePortReuse(&repair_socket, true); + // try repair_socket.bindToPort(repair_port); + // try repair_socket.setReadTimeout(SOCKET_TIMEOUT); + + // var tvu_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); + // try sig.net.enablePortReuse(&tvu_socket, true); + // try tvu_socket.bindToPort(tvu_port); + // try tvu_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); + + // const shred_tracker = try sig.tvu.BasicShredTracker.init( + // gpa_allocator, + // @intCast(config.current.tvu.test_repair_slot orelse 0), + // logger, + // ); + // const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); + // const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); + + // const repair_svc = try initRepair( + // gpa_allocator, + // logger, + // &my_keypair, + // &exit, + // rand.random(), + // &gossip_service.gossip_table_rw, + // &gossip_service.my_shred_version, + // &repair_socket, + // shred_tracker, + // if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, + // ); + // var repair_handle = try std.Thread.spawn(.{}, RepairService.run, .{repair_svc}); + + // var shred_receiver = ShredReceiver{ + // .allocator = gpa_allocator, + // .keypair = &my_keypair, + // .exit = &exit, + // .logger = logger, + // .repair_socket = &repair_socket, + // .tvu_socket = &tvu_socket, + // .outgoing_shred_channel = unverified_shreds_channel, + // .shred_version = shred_version, + // }; + + // var shred_receive_handle = try std.Thread.spawn( + // .{}, + // ShredReceiver.run, + // .{&shred_receiver}, + // ); + // var verify_shreds_handle = try std.Thread.spawn( + // .{}, + // sig.tvu.runShredSigVerify, + // .{ &exit, unverified_shreds_channel, verified_shreds_channel, .{} }, + // ); + // var process_shreds_handle = try std.Thread.spawn( + // .{}, + // sig.tvu.processShreds, + // .{ gpa_allocator, verified_shreds_channel, shred_tracker }, + // ); // accounts db var snapshots = try getOrDownloadSnapshots( @@ -553,10 +568,7 @@ fn validator() !void { logger.infof("accounts-db setup done...", .{}); gossip_handle.join(); - repair_handle.join(); - shred_receive_handle.join(); - process_shreds_handle.join(); - verify_shreds_handle.join(); + tvu.join(); } /// Initialize an instance of GossipService and configure with CLI arguments diff --git a/src/lib.zig b/src/lib.zig index 0c4a1778a..def2ad082 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -64,6 +64,7 @@ pub const utils = struct { pub usingnamespace @import("utils/arraylist.zig"); pub usingnamespace @import("utils/bitflags.zig"); pub usingnamespace @import("utils/shortvec.zig"); + pub usingnamespace @import("utils/service.zig"); pub usingnamespace @import("utils/thread.zig"); pub usingnamespace @import("utils/types.zig"); pub usingnamespace @import("utils/varint.zig"); @@ -113,4 +114,5 @@ pub const tvu = struct { pub usingnamespace @import("tvu/shred.zig"); pub usingnamespace @import("tvu/shred_tracker.zig"); pub usingnamespace @import("tvu/shred_processor.zig"); + pub usingnamespace @import("tvu/tvu.zig"); }; diff --git a/src/net/packet.zig b/src/net/packet.zig index cdc22adfa..fa23c41c2 100644 --- a/src/net/packet.zig +++ b/src/net/packet.zig @@ -42,7 +42,7 @@ pub const Packet = struct { /// TODO this violates separation of concerns. it's unusual for network-specific /// type definitions to include information that's specific to application /// components (like repair) -/// +/// /// it would be nice to find another approach that is equally easy to use, /// without sacrificing safety, performance, or readability. pub const Flag = enum(u8) { diff --git a/src/tvu/repair_message.zig b/src/tvu/repair_message.zig index ae1613fc0..8d59868ae 100644 --- a/src/tvu/repair_message.zig +++ b/src/tvu/repair_message.zig @@ -1,16 +1,16 @@ const std = @import("std"); +const sig = @import("../lib.zig"); const bincode = @import("../bincode/bincode.zig"); -const Allocator = std.mem.Allocator; const KeyPair = std.crypto.sign.Ed25519.KeyPair; -const LegacyContactInfo = @import("../gossip/data.zig").LegacyContactInfo; -const Nonce = @import("../core/shred.zig").Nonce; -const Pong = @import("../gossip/ping_pong.zig").Pong; -const Pubkey = @import("../core/pubkey.zig").Pubkey; -const Signature = @import("../core/signature.zig").Signature; -const Slot = @import("../core/time.zig").Slot; +const Nonce = sig.core.Nonce; +const Pong = sig.gossip.Pong; +const Pubkey = sig.core.Pubkey; +const Signature = sig.core.Signature; +const Slot = sig.core.Slot; + const SIGNATURE_LENGTH = @import("../core/signature.zig").SIGNATURE_LENGTH; /// Analogous to `SIGNED_REPAIR_TIME_WINDOW` diff --git a/src/tvu/repair_service.zig b/src/tvu/repair_service.zig index e2ebf095d..34f2bc1d1 100644 --- a/src/tvu/repair_service.zig +++ b/src/tvu/repair_service.zig @@ -27,6 +27,7 @@ const SignedGossipData = sig.gossip.SignedGossipData; const SocketAddr = sig.net.SocketAddr; const SocketThread = sig.net.SocketThread; const Slot = sig.core.Slot; +const TaskLooper = sig.utils.ServiceRunner; const RepairRequest = sig.tvu.RepairRequest; const RepairMessage = sig.tvu.RepairMessage; @@ -103,6 +104,11 @@ pub const RepairService = struct { const Self = @This(); + pub const run_config = sig.utils.RunConfig{ + .name = "repair service", + .min_loop_duration_ns = 100 * std.time.ns_per_ms, + }; + pub fn init( allocator: Allocator, logger: Logger, @@ -127,22 +133,29 @@ pub const RepairService = struct { pub fn deinit(self: *Self) void { self.peer_provider.deinit(); + self.requester.deinit(); } /// Start the long-running service and block until it exits. + /// This function claims ownership of Self, and deinits the + /// struct on exit. pub fn run(self: *Self) !void { - self.logger.info("starting repair service"); - defer self.logger.info("exiting repair service"); - var timer = try std.time.Timer.start(); - while (!self.exit.load(.unordered)) { + while (!self.exit.load(.monotonic)) { try self.sendNecessaryRepairs(); - std.time.sleep(100 * std.time.ns_per_ms -| timer.lap()); } + var this = self; + var looper = TaskLooper{ .logger = this.logger, .exit = this.exit }; + defer this.deinit(); + try looper.runService( + .{ .name = "repair service", .min_loop_duration_ns = 100 * std.time.ns_per_ms }, + Self.sendNecessaryRepairs, + .{&this}, + ); } /// Identifies which repairs are needed based on the current state, /// and sends those repairs, then returns. - fn sendNecessaryRepairs(self: *Self) !void { + pub fn sendNecessaryRepairs(self: *Self) !void { const repair_requests = try self.getRepairs(); defer repair_requests.deinit(); const addressed_requests = try self.assignRequestsToPeers(repair_requests.items); @@ -163,7 +176,9 @@ pub const RepairService = struct { } // TODO less often - self.logger.infof("sent {} repair requests", .{addressed_requests.items.len}); + if (addressed_requests.items.len > 0) { + self.logger.debugf("sent {} repair requests", .{addressed_requests.items.len}); + } } const MAX_SHRED_REPAIRS = 1000; diff --git a/src/tvu/shred_processor.zig b/src/tvu/shred_processor.zig index 53d6badd0..ebad33850 100644 --- a/src/tvu/shred_processor.zig +++ b/src/tvu/shred_processor.zig @@ -8,17 +8,15 @@ const ArrayList = std.ArrayList; const BasicShredTracker = sig.tvu.BasicShredTracker; const Channel = sig.sync.Channel; -const Logger = sig.trace.Logger; const Packet = sig.net.Packet; const Shred = sig.tvu.Shred; -/// analogous to `WindowService` +/// analogous to `WindowService` TODO permalink pub fn processShreds( allocator: Allocator, verified_shreds: *Channel(ArrayList(Packet)), tracker: *BasicShredTracker, ) !void { - // TODO unreachables var processed_count: usize = 0; var buf = ArrayList(ArrayList(Packet)).init(allocator); while (true) { @@ -29,9 +27,9 @@ pub fn processShreds( } for (buf.items) |packet_batch| { for (packet_batch.items) |*packet| if (!packet.isSet(.discard)) { - const shred_payload = layout.getShred(packet) orelse unreachable; - const slot = layout.getSlot(shred_payload) orelse unreachable; - const index = layout.getIndex(shred_payload) orelse unreachable; + const shred_payload = layout.getShred(packet) orelse continue; + const slot = layout.getSlot(shred_payload) orelse continue; + const index = layout.getIndex(shred_payload) orelse continue; tracker.registerShred(slot, index) catch |err| switch (err) { error.SlotUnderflow, error.SlotOverflow => continue, else => return err, diff --git a/src/tvu/shred_receiver.zig b/src/tvu/shred_receiver.zig index ee11ede58..a93cc1682 100644 --- a/src/tvu/shred_receiver.zig +++ b/src/tvu/shred_receiver.zig @@ -22,36 +22,7 @@ const SocketThread = sig.net.SocketThread; const endpointToString = sig.net.endpointToString; -/// Use this in a single thread where you want to keep accessing -/// a value that's stored in an atomic, but you don't want to do -/// an expensive `load` operation every time you read it, and -/// you're fine with reading a slightly stale value each time. -/// -/// Periodically call `update` to refresh the value. -/// -/// The `cache` field and `update` methods are NOT thread safe. -/// Do not read the `cache` while executing `update` -pub fn CachedAtomic(comptime T: type) type { - return struct { - atomic: *Atomic(T), - cache: T, - - const Self = @This(); - - pub fn init(atomic: *Atomic(T)) Self { - return .{ - .atomic = atomic, - .cache = atomic.load(.monotonic), - }; - } - - pub fn update(self: *Self) void { - self.cache = self.atomic.load(.monotonic); - } - }; -} - -/// Analogous to `ShredFetchStage` TODO permalinks +/// Analogous to `ShredFetchStage` TODO permalinks TODO deinit? pub const ShredReceiver = struct { allocator: Allocator, keypair: *const KeyPair, @@ -60,7 +31,7 @@ pub const ShredReceiver = struct { repair_socket: *Socket, tvu_socket: *Socket, outgoing_shred_channel: *Channel(ArrayList(Packet)), - shred_version: CachedAtomic(u16), + shred_version: *const Atomic(u16), const Self = @This(); @@ -115,9 +86,10 @@ pub const ShredReceiver = struct { var responses = ArrayList(Packet).init(self.allocator); try receiver.tryDrainRecycle(&buf); if (buf.items.len > 0) { + const shred_version = self.shred_version.load(.monotonic); for (buf.items) |batch| { for (batch.items) |*packet| { - try self.handlePacket(packet, &responses); + try self.handlePacket(packet, &responses, shred_version); } try self.outgoing_shred_channel.send(batch); } @@ -127,13 +99,17 @@ pub const ShredReceiver = struct { } else { std.time.sleep(10 * std.time.ns_per_ms); } - self.shred_version.update(); } } } /// Handle a single packet and return - fn handlePacket(self: *Self, packet: *Packet, responses: *ArrayList(Packet)) !void { + fn handlePacket( + self: *Self, + packet: *Packet, + responses: *ArrayList(Packet), + shred_version: u16, + ) !void { if (packet.size == REPAIR_RESPONSE_SERIALIZED_PING_BYTES) { try self.handlePing(packet, responses); packet.set(.discard); @@ -146,7 +122,7 @@ pub const ShredReceiver = struct { // TODO figure out these values const root = 0; const max_slot = std.math.maxInt(Slot); - if (shouldDiscardShred(packet, root, self.shred_version.cache, max_slot)) { + if (shouldDiscardShred(packet, root, shred_version, max_slot)) { packet.set(.discard); } } @@ -172,9 +148,6 @@ pub const ShredReceiver = struct { const endpoint_str = try endpointToString(self.allocator, &packet.addr); defer endpoint_str.deinit(); - // self.logger.field("from_endpoint", endpoint_str.items) - // .field("from_pubkey", &ping.from.string()) - // .info("tvu: recv repair ping"); } }; @@ -226,3 +199,7 @@ fn verifyShredSlots(slot: Slot, parent: Slot, root: Slot) bool { const REPAIR_RESPONSE_SERIALIZED_PING_BYTES = 132; const RepairPing = union(enum) { Ping: Ping }; + +test "asd quend" { + std.debug.print("{s}", .{@typeName(@TypeOf(ShredReceiver.run))}); +} diff --git a/src/tvu/shred_tracker.zig b/src/tvu/shred_tracker.zig index d6d8888e0..f1c907065 100644 --- a/src/tvu/shred_tracker.zig +++ b/src/tvu/shred_tracker.zig @@ -6,7 +6,6 @@ const ArrayList = std.ArrayList; const Mutex = std.Thread.Mutex; const Slot = sig.core.Slot; -const Shred = sig.tvu.Shred; const MAX_SHREDS_PER_SLOT: usize = sig.tvu.MAX_SHREDS_PER_SLOT; @@ -16,7 +15,6 @@ pub const Range = struct { }; pub const BasicShredTracker = struct { - allocator: Allocator, logger: sig.trace.Logger, mux: Mutex = .{}, /// The slot that this struct was initialized with at index 0 @@ -32,24 +30,13 @@ pub const BasicShredTracker = struct { const Self = @This(); - pub fn init( - allocator: Allocator, - slot: Slot, - logger: sig.trace.Logger, - ) !*Self { - const self = try allocator.create(Self); - self.* = .{ - .allocator = allocator, + pub fn init(slot: Slot, logger: sig.trace.Logger) Self { + return .{ .start_slot = slot, .current_bottom_slot = slot, .max_slot_seen = slot -| 1, .logger = logger, }; - return self; - } - - pub fn deinit(self: *Self) void { - self.allocator.destroy(self); } pub fn registerShred( diff --git a/src/tvu/shred_verifier.zig b/src/tvu/shred_verifier.zig index 836c630cd..984d88040 100644 --- a/src/tvu/shred_verifier.zig +++ b/src/tvu/shred_verifier.zig @@ -1,27 +1,24 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const network = @import("zig-network"); const shred_layout = sig.tvu.shred_layout; -const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; const Channel = sig.sync.Channel; const Packet = sig.net.Packet; -pub fn runShredSigVerify( +pub fn runShredSignatureVerification( exit: *Atomic(bool), incoming: *Channel(ArrayList(Packet)), verified: *Channel(ArrayList(Packet)), leader_schedule: LeaderScheduleCalculator, -) void { - // TODO: unreachable +) !void { var verified_count: usize = 0; var buf: ArrayList(ArrayList(Packet)) = ArrayList(ArrayList(Packet)).init(incoming.allocator); while (true) { - incoming.tryDrainRecycle(&buf) catch unreachable; + try incoming.tryDrainRecycle(&buf); if (buf.items.len == 0) { std.time.sleep(10 * std.time.ns_per_ms); continue; @@ -35,7 +32,7 @@ pub fn runShredSigVerify( verified_count += 1; } } - verified.send(packet_batch) catch unreachable; // TODO + try verified.send(packet_batch); if (exit.load(.monotonic)) return; } } diff --git a/src/tvu/tvu.zig b/src/tvu/tvu.zig new file mode 100644 index 000000000..b664ac445 --- /dev/null +++ b/src/tvu/tvu.zig @@ -0,0 +1,141 @@ +const std = @import("std"); +const network = @import("zig-network"); +const sig = @import("../lib.zig"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Atomic = std.atomic.Value; +const KeyPair = std.crypto.sign.Ed25519.KeyPair; +const Random = std.rand.Random; +const Socket = network.Socket; + +const BasicShredTracker = sig.tvu.BasicShredTracker; +const Channel = sig.sync.Channel; +const GossipTable = sig.gossip.GossipTable; +const Logger = sig.trace.Logger; +const Packet = sig.net.Packet; +const Pubkey = sig.core.Pubkey; +const RepairPeerProvider = sig.tvu.RepairPeerProvider; +const RepairRequester = sig.tvu.RepairRequester; +const RepairService = sig.tvu.RepairService; +const RwMux = sig.sync.RwMux; +const ServiceManager = sig.utils.ServiceManager; +const ShredReceiver = sig.tvu.ShredReceiver; +const Slot = sig.core.Slot; + +const SOCKET_TIMEOUT = sig.net.SOCKET_TIMEOUT; + +pub const TvuDependencies = struct { + allocator: Allocator, + logger: Logger, + random: Random, + /// This validator's keypair + my_keypair: *const KeyPair, + /// Shared exit indicator, used to shutdown the TVU. + exit: *Atomic(bool), + /// Shared state from gossip + gossip_table_rw: *RwMux(GossipTable), + /// Shared state from gossip + my_shred_version: *const Atomic(u16), +}; + +/// communication with non-tvu components +pub const TvuCommunication = struct {}; // TODO take from deps + +pub const TvuConfig = struct { + start_slot: ?Slot, + repair_port: u16, + tvu_port: u16, +}; + +pub fn spawnTvu(deps: TvuDependencies, conf: TvuConfig) !ServiceManager { + var tvu_manager = ServiceManager.init(deps.allocator, deps.logger, deps.exit); + + var repair_socket = try bindUdpReusable(conf.repair_port); + var tvu_socket = try bindUdpReusable(conf.tvu_port); + + // tracker (shared state) + const shred_tracker = try tvu_manager.create(sig.tvu.BasicShredTracker, null); + shred_tracker.* = sig.tvu.BasicShredTracker.init( + conf.start_slot orelse 0, // TODO + deps.logger, + ); + + // repair (thread) + const repair_peer_provider = try RepairPeerProvider.init( + deps.allocator, + deps.random, + deps.gossip_table_rw, + Pubkey.fromPublicKey(&deps.my_keypair.public_key), + deps.my_shred_version, + ); + const repair_requester = try RepairRequester.init( + deps.allocator, + deps.logger, + deps.random, + deps.my_keypair, + &repair_socket, + deps.exit, + ); + const repair_svc = try tvu_manager.create(RepairService, RepairService.deinit); + repair_svc.* = RepairService.init( + deps.allocator, + deps.logger, + deps.exit, + repair_requester, + repair_peer_provider, + shred_tracker, + conf.start_slot, + ); + try tvu_manager.spawn( + .{ .name = "Repair Service", .min_loop_duration_ns = 100 * std.time.ns_per_ms }, + RepairService.sendNecessaryRepairs, + .{repair_svc}, + ); + + // receiver (thread) + const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( + deps.allocator, + 1000, + ); + const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( + deps.allocator, + 1000, + ); + const shred_receiver = try tvu_manager.create(ShredReceiver, null); + shred_receiver.* = ShredReceiver{ + .allocator = deps.allocator, + .keypair = deps.my_keypair, + .exit = deps.exit, + .logger = deps.logger, + .repair_socket = &repair_socket, + .tvu_socket = &tvu_socket, + .outgoing_shred_channel = unverified_shreds_channel, + .shred_version = deps.my_shred_version, + }; + try tvu_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); + + // verifier (thread) + try tvu_manager.spawn( + .{ .name = "Shred Verifier" }, + sig.tvu.runShredSignatureVerification, + .{ deps.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, + ); + + // processor (thread) + try tvu_manager.spawn( + .{ .name = "Shred Processor" }, + sig.tvu.processShreds, + .{ deps.allocator, verified_shreds_channel, shred_tracker }, + ); + + return tvu_manager; +} + +fn bindUdpReusable(port: u16) !Socket { + var socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); + try sig.net.enablePortReuse(&socket, true); + try socket.bindToPort(port); + try socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); + return socket; +} diff --git a/src/utils/service.zig b/src/utils/service.zig new file mode 100644 index 000000000..2fedb4437 --- /dev/null +++ b/src/utils/service.zig @@ -0,0 +1,201 @@ +const std = @import("std"); +const network = @import("zig-network"); +const sig = @import("../lib.zig"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Atomic = std.atomic.Value; + +const Logger = sig.trace.Logger; + +/// High level manager for long-running threads and the state +/// shared by those threads. +/// +/// Provides facilities to wait for the threads to complete, +/// and to clean up their shared state. +/// +/// Typically, this should only manage state that must be shared +/// by multiple threads, when it would not be safe for any individual +/// thread to own it. +pub const ServiceManager = struct { + allocator: Allocator, + exit: *Atomic(bool), + runner: ServiceRunner, + threads: std.ArrayList(std.Thread), + shared_state: std.ArrayList(AnonBox), + + const Self = @This(); + + pub fn init(allocator: Allocator, logger: Logger, exit: *Atomic(bool)) Self { + return .{ + .allocator = allocator, + .exit = exit, + .runner = .{ .logger = logger, .exit = exit }, + .threads = std.ArrayList(std.Thread).init(allocator), + .shared_state = std.ArrayList(AnonBox).init(allocator), + }; + } + + /// Allocate shared state to manage with this struct. + /// Use for state that should outlive the managed threads. + /// Typically this would be state that is shared by multiple threads, + /// or state used to orchestrate an individual thread. + pub fn create( + self: *Self, + comptime T: type, + comptime deinitFn: ?fn (*T) void, + ) Allocator.Error!*T { + const ptr, const box = try AnonBox.init(T, deinitFn, self.allocator); + try self.shared_state.append(box); + return ptr; + } + + /// Spawn a thread to be managed. + /// The function may be restarted periodically, according to the config. + pub fn spawn( + self: *Self, + config: RunConfig, + comptime function: anytype, + args: anytype, + ) !void { + var thread = try std.Thread.spawn( + .{}, + ServiceRunner.runService, + .{ &self.runner, config, function, args }, + ); + if (config.name) |name| thread.setName(name) catch {}; + try self.threads.append(thread); + } + + /// Wait for all threads to exit, then return. + pub fn join(self: *Self) void { + for (self.threads.items) |t| t.join(); + self.threads.clearRetainingCapacity(); + } + + /// 1. Signal the threads to exit. + /// 2. Wait for threads to exit. + /// 3. Deinit the shared state from those threads. + pub fn deinit(self: Self) void { + self.exit.store(true, .monotonic); + for (self.threads.items) |t| t.join(); + for (self.shared_state.items) |s| s.deinit(); + self.threads.deinit(); + self.shared_state.deinit(); + } +}; + +/// Convert a short-lived task into a long-lived service by looping it, +/// or make a service resilient by restarting it on failure. +pub const ServiceRunner = struct { + logger: Logger, + exit: *Atomic(bool), + service_counter: Atomic(usize) = .{ .raw = 0 }, + + const Self = @This(); + + pub fn runService( + self: *Self, + config: RunConfig, + function: anytype, + args: anytype, + ) !void { + var buf: [16]u8 = undefined; + const name = config.name orelse try std.fmt.bufPrint( + &buf, + "thread {d}", + .{std.Thread.getCurrentId()}, + ); + self.logger.infof("Starting {s}", .{name}); + var timer = try std.time.Timer.start(); + var last_iteration: u64 = 0; + while (!self.exit.load(.unordered)) { + if (@call(.auto, function, args)) |ok| { + switch (config.error_handler) { + .keep_looping => {}, + .just_return => { + self.logger.errf("Exiting {s} due to return", .{name}); + return ok; + }, + .set_exit_and_return => { + self.logger.errf("Signalling exit due to return from {s}", .{name}); + self.exit.store(true, .monotonic); + return ok; + }, + } + } else |err| { + switch (config.error_handler) { + .keep_looping => self.logger.errf("Unhandled error in {s}: {}", .{ name, err }), + .just_return => { + self.logger.errf("Exiting {s} due to error: {}", .{ name, err }); + return err; + }, + .set_exit_and_return => { + self.logger.errf("Signalling exit due to error in {s}: {}", .{ name, err }); + self.exit.store(true, .monotonic); + return err; + }, + } + } + last_iteration = timer.lap(); + std.time.sleep(@max( + config.min_pause_ns, + config.min_loop_duration_ns -| last_iteration, + )); + } + } +}; + +pub const RunConfig = struct { + name: ?[]const u8 = null, + /// what to do when the task returns without error + return_handler: ReturnHandler = .keep_looping, + /// what to do when the task returns with an error + error_handler: ReturnHandler = .keep_looping, + /// The minimum amount of time to spend on the entire loop, + /// including the logic plus the pause. + min_loop_duration_ns: u64 = 0, + /// The minimum amount of time to pause after one iteration + /// of the function completes, before restarting the function. + min_pause_ns: u64 = 0, +}; + +pub const ReturnHandler = enum { + keep_looping, + just_return, + set_exit_and_return, +}; + +/// Create a pointer and manage its lifetime, without concern for its type. +/// +/// Useful when you need to manage the lifetime of data in a different +/// context from where it is allocated or used. +pub const AnonBox = struct { + allocator: Allocator, + state: *anyopaque, + deinitFn: *const fn (*anyopaque) void, + + const Self = @This(); + + pub fn init( + comptime T: type, + comptime deinitFn: ?fn (*T) void, + allocator: Allocator, + ) Allocator.Error!struct { *T, Self } { + const ptr = try allocator.create(T); + const self = .{ + .allocator = allocator, + .state = @as(*anyopaque, @ptrCast(@alignCast(ptr))), + .deinitFn = struct { + fn deinit(opaque_ptr: *anyopaque) void { + if (deinitFn) |f| f(@ptrCast(@alignCast(opaque_ptr))) else {} + } + }.deinit, + }; + return .{ ptr, self }; + } + + pub fn deinit(self: Self) void { + self.deinitFn(self.state); + } +}; From 3db9c70f993febb3ff32ca0f51ebf2b363b27999 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Sat, 4 May 2024 16:05:43 -0400 Subject: [PATCH 09/51] refactor(shred_collector): rename tvu to shred_collector also remove some unused code --- src/cmd/cmd.zig | 73 ++----------------- src/lib.zig | 18 ++--- .../repair_message.zig | 0 .../repair_service.zig | 29 ++------ .../tvu.zig => shred_collector/service.zig} | 30 ++++---- src/{tvu => shred_collector}/shred.zig | 0 .../shred_processor.zig | 6 +- .../shred_receiver.zig | 8 +- .../shred_tracker.zig | 2 +- .../shred_verifier.zig | 2 +- src/utils/service.zig | 8 +- 11 files changed, 49 insertions(+), 127 deletions(-) rename src/{tvu => shred_collector}/repair_message.zig (100%) rename src/{tvu => shred_collector}/repair_service.zig (96%) rename src/{tvu/tvu.zig => shred_collector/service.zig} (81%) rename src/{tvu => shred_collector}/shred.zig (100%) rename src/{tvu => shred_collector}/shred_processor.zig (91%) rename src/{tvu => shred_collector}/shred_receiver.zig (96%) rename src/{tvu => shred_collector}/shred_tracker.zig (98%) rename src/{tvu => shred_collector}/shred_verifier.zig (97%) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 84b0fb0dd..9a870ba24 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -16,7 +16,7 @@ const AccountsDB = sig.accounts_db.AccountsDB; const AccountsDBConfig = sig.accounts_db.AccountsDBConfig; const AllSnapshotFields = sig.accounts_db.AllSnapshotFields; const Bank = sig.accounts_db.Bank; -const BasicShredTracker = sig.tvu.BasicShredTracker; +const BasicShredTracker = sig.shred_collector.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GenesisConfig = sig.accounts_db.GenesisConfig; const GossipService = sig.gossip.GossipService; @@ -24,8 +24,8 @@ const IpAddr = sig.net.IpAddr; const Level = sig.trace.Level; const Logger = sig.trace.Logger; const Pubkey = sig.core.Pubkey; -const RepairService = sig.tvu.RepairService; -const ShredReceiver = sig.tvu.ShredReceiver; +const RepairService = sig.shred_collector.RepairService; +const ShredReceiver = sig.shred_collector.ShredReceiver; const SnapshotFieldsAndPaths = sig.accounts_db.SnapshotFieldsAndPaths; const SnapshotFiles = sig.accounts_db.SnapshotFiles; const SocketAddr = sig.net.SocketAddr; @@ -36,7 +36,7 @@ const enumFromName = sig.utils.enumFromName; const getOrInitIdentity = helpers.getOrInitIdentity; const globalRegistry = sig.prometheus.globalRegistry; const getWallclockMs = sig.gossip.getWallclockMs; -const initRepair = sig.tvu.initRepair; +const initRepair = sig.shred_collector.initRepair; const parallelUnpackZstdTarBall = sig.accounts_db.parallelUnpackZstdTarBall; const requestIpEcho = sig.net.requestIpEcho; const servePrometheus = sig.prometheus.servePrometheus; @@ -420,7 +420,7 @@ fn validator() !void { defer gossip_service.deinit(); var gossip_handle = try spawnGossip(&gossip_service); - var tvu = try sig.tvu.spawnTvu( + var shred_collector = try sig.shred_collector.spawnShredCollector( .{ .allocator = gpa_allocator, .logger = logger, @@ -436,66 +436,7 @@ fn validator() !void { .tvu_port = tvu_port, }, ); - defer tvu.deinit(); - - // var repair_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); - // try sig.net.enablePortReuse(&repair_socket, true); - // try repair_socket.bindToPort(repair_port); - // try repair_socket.setReadTimeout(SOCKET_TIMEOUT); - - // var tvu_socket = try Socket.create(network.AddressFamily.ipv4, network.Protocol.udp); - // try sig.net.enablePortReuse(&tvu_socket, true); - // try tvu_socket.bindToPort(tvu_port); - // try tvu_socket.setReadTimeout(sig.net.SOCKET_TIMEOUT); - - // const shred_tracker = try sig.tvu.BasicShredTracker.init( - // gpa_allocator, - // @intCast(config.current.tvu.test_repair_slot orelse 0), - // logger, - // ); - // const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); - // const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init(gpa_allocator, 1000); - - // const repair_svc = try initRepair( - // gpa_allocator, - // logger, - // &my_keypair, - // &exit, - // rand.random(), - // &gossip_service.gossip_table_rw, - // &gossip_service.my_shred_version, - // &repair_socket, - // shred_tracker, - // if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, - // ); - // var repair_handle = try std.Thread.spawn(.{}, RepairService.run, .{repair_svc}); - - // var shred_receiver = ShredReceiver{ - // .allocator = gpa_allocator, - // .keypair = &my_keypair, - // .exit = &exit, - // .logger = logger, - // .repair_socket = &repair_socket, - // .tvu_socket = &tvu_socket, - // .outgoing_shred_channel = unverified_shreds_channel, - // .shred_version = shred_version, - // }; - - // var shred_receive_handle = try std.Thread.spawn( - // .{}, - // ShredReceiver.run, - // .{&shred_receiver}, - // ); - // var verify_shreds_handle = try std.Thread.spawn( - // .{}, - // sig.tvu.runShredSigVerify, - // .{ &exit, unverified_shreds_channel, verified_shreds_channel, .{} }, - // ); - // var process_shreds_handle = try std.Thread.spawn( - // .{}, - // sig.tvu.processShreds, - // .{ gpa_allocator, verified_shreds_channel, shred_tracker }, - // ); + defer shred_collector.deinit(); // accounts db var snapshots = try getOrDownloadSnapshots( @@ -568,7 +509,7 @@ fn validator() !void { logger.infof("accounts-db setup done...", .{}); gossip_handle.join(); - tvu.join(); + shred_collector.join(); } /// Initialize an instance of GossipService and configure with CLI arguments diff --git a/src/lib.zig b/src/lib.zig index def2ad082..228ed13e8 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -106,13 +106,13 @@ pub const prometheus = struct { pub usingnamespace @import("prometheus/registry.zig"); }; -pub const tvu = struct { - pub usingnamespace @import("tvu/repair_message.zig"); - pub usingnamespace @import("tvu/repair_service.zig"); - pub usingnamespace @import("tvu/shred_receiver.zig"); - pub usingnamespace @import("tvu/shred_verifier.zig"); - pub usingnamespace @import("tvu/shred.zig"); - pub usingnamespace @import("tvu/shred_tracker.zig"); - pub usingnamespace @import("tvu/shred_processor.zig"); - pub usingnamespace @import("tvu/tvu.zig"); +pub const shred_collector = struct { + pub usingnamespace @import("shred_collector/repair_message.zig"); + pub usingnamespace @import("shred_collector/repair_service.zig"); + pub usingnamespace @import("shred_collector/shred_receiver.zig"); + pub usingnamespace @import("shred_collector/shred_verifier.zig"); + pub usingnamespace @import("shred_collector/shred.zig"); + pub usingnamespace @import("shred_collector/shred_tracker.zig"); + pub usingnamespace @import("shred_collector/shred_processor.zig"); + pub usingnamespace @import("shred_collector/service.zig"); }; diff --git a/src/tvu/repair_message.zig b/src/shred_collector/repair_message.zig similarity index 100% rename from src/tvu/repair_message.zig rename to src/shred_collector/repair_message.zig diff --git a/src/tvu/repair_service.zig b/src/shred_collector/repair_service.zig similarity index 96% rename from src/tvu/repair_service.zig rename to src/shred_collector/repair_service.zig index 34f2bc1d1..c7090b647 100644 --- a/src/tvu/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -12,13 +12,13 @@ const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = zig_network.Socket; -const BasicShredTracker = sig.tvu.BasicShredTracker; +const BasicShredTracker = sig.shred_collector.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GossipTable = sig.gossip.GossipTable; const HomogeneousThreadPool = sig.utils.HomogeneousThreadPool; const Logger = sig.trace.Logger; const LruCacheCustom = sig.common.LruCacheCustom; -const MultiSlotReport = sig.tvu.MultiSlotReport; +const MultiSlotReport = sig.shred_collector.MultiSlotReport; const Nonce = sig.core.Nonce; const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; @@ -29,10 +29,10 @@ const SocketThread = sig.net.SocketThread; const Slot = sig.core.Slot; const TaskLooper = sig.utils.ServiceRunner; -const RepairRequest = sig.tvu.RepairRequest; -const RepairMessage = sig.tvu.RepairMessage; +const RepairRequest = sig.shred_collector.RepairRequest; +const RepairMessage = sig.shred_collector.RepairMessage; -const serializeRepairRequest = sig.tvu.serializeRepairRequest; +const serializeRepairRequest = sig.shred_collector.serializeRepairRequest; /// TODO: redundant? pub fn initRepair( @@ -105,7 +105,7 @@ pub const RepairService = struct { const Self = @This(); pub const run_config = sig.utils.RunConfig{ - .name = "repair service", + .name = "Repair Service", .min_loop_duration_ns = 100 * std.time.ns_per_ms, }; @@ -136,23 +136,6 @@ pub const RepairService = struct { self.requester.deinit(); } - /// Start the long-running service and block until it exits. - /// This function claims ownership of Self, and deinits the - /// struct on exit. - pub fn run(self: *Self) !void { - while (!self.exit.load(.monotonic)) { - try self.sendNecessaryRepairs(); - } - var this = self; - var looper = TaskLooper{ .logger = this.logger, .exit = this.exit }; - defer this.deinit(); - try looper.runService( - .{ .name = "repair service", .min_loop_duration_ns = 100 * std.time.ns_per_ms }, - Self.sendNecessaryRepairs, - .{&this}, - ); - } - /// Identifies which repairs are needed based on the current state, /// and sends those repairs, then returns. pub fn sendNecessaryRepairs(self: *Self) !void { diff --git a/src/tvu/tvu.zig b/src/shred_collector/service.zig similarity index 81% rename from src/tvu/tvu.zig rename to src/shred_collector/service.zig index b664ac445..ac94d0ab7 100644 --- a/src/tvu/tvu.zig +++ b/src/shred_collector/service.zig @@ -9,23 +9,25 @@ const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = network.Socket; -const BasicShredTracker = sig.tvu.BasicShredTracker; const Channel = sig.sync.Channel; const GossipTable = sig.gossip.GossipTable; const Logger = sig.trace.Logger; const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; -const RepairPeerProvider = sig.tvu.RepairPeerProvider; -const RepairRequester = sig.tvu.RepairRequester; -const RepairService = sig.tvu.RepairService; const RwMux = sig.sync.RwMux; const ServiceManager = sig.utils.ServiceManager; -const ShredReceiver = sig.tvu.ShredReceiver; const Slot = sig.core.Slot; +const this = sig.shred_collector; +const BasicShredTracker = this.BasicShredTracker; +const RepairPeerProvider = this.RepairPeerProvider; +const RepairRequester = this.RepairRequester; +const RepairService = this.RepairService; +const ShredReceiver = this.ShredReceiver; + const SOCKET_TIMEOUT = sig.net.SOCKET_TIMEOUT; -pub const TvuDependencies = struct { +pub const ShredCollectorDependencies = struct { allocator: Allocator, logger: Logger, random: Random, @@ -40,23 +42,23 @@ pub const TvuDependencies = struct { }; /// communication with non-tvu components -pub const TvuCommunication = struct {}; // TODO take from deps +pub const ShredCollectorCommunication = struct {}; // TODO take from deps -pub const TvuConfig = struct { +pub const ShredCollectorConfig = struct { start_slot: ?Slot, repair_port: u16, tvu_port: u16, }; -pub fn spawnTvu(deps: TvuDependencies, conf: TvuConfig) !ServiceManager { +pub fn spawnShredCollector(deps: ShredCollectorDependencies, conf: ShredCollectorConfig) !ServiceManager { var tvu_manager = ServiceManager.init(deps.allocator, deps.logger, deps.exit); var repair_socket = try bindUdpReusable(conf.repair_port); var tvu_socket = try bindUdpReusable(conf.tvu_port); // tracker (shared state) - const shred_tracker = try tvu_manager.create(sig.tvu.BasicShredTracker, null); - shred_tracker.* = sig.tvu.BasicShredTracker.init( + const shred_tracker = try tvu_manager.create(sig.shred_collector.BasicShredTracker, null); + shred_tracker.* = sig.shred_collector.BasicShredTracker.init( conf.start_slot orelse 0, // TODO deps.logger, ); @@ -88,7 +90,7 @@ pub fn spawnTvu(deps: TvuDependencies, conf: TvuConfig) !ServiceManager { conf.start_slot, ); try tvu_manager.spawn( - .{ .name = "Repair Service", .min_loop_duration_ns = 100 * std.time.ns_per_ms }, + RepairService.run_config, RepairService.sendNecessaryRepairs, .{repair_svc}, ); @@ -118,14 +120,14 @@ pub fn spawnTvu(deps: TvuDependencies, conf: TvuConfig) !ServiceManager { // verifier (thread) try tvu_manager.spawn( .{ .name = "Shred Verifier" }, - sig.tvu.runShredSignatureVerification, + sig.shred_collector.runShredSignatureVerification, .{ deps.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, ); // processor (thread) try tvu_manager.spawn( .{ .name = "Shred Processor" }, - sig.tvu.processShreds, + sig.shred_collector.processShreds, .{ deps.allocator, verified_shreds_channel, shred_tracker }, ); diff --git a/src/tvu/shred.zig b/src/shred_collector/shred.zig similarity index 100% rename from src/tvu/shred.zig rename to src/shred_collector/shred.zig diff --git a/src/tvu/shred_processor.zig b/src/shred_collector/shred_processor.zig similarity index 91% rename from src/tvu/shred_processor.zig rename to src/shred_collector/shred_processor.zig index ebad33850..efcf97ba3 100644 --- a/src/tvu/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -1,15 +1,15 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const layout = sig.tvu.shred_layout; +const layout = sig.shred_collector.shred_layout; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; -const BasicShredTracker = sig.tvu.BasicShredTracker; +const BasicShredTracker = sig.shred_collector.BasicShredTracker; const Channel = sig.sync.Channel; const Packet = sig.net.Packet; -const Shred = sig.tvu.Shred; +const Shred = sig.shred_collector.Shred; /// analogous to `WindowService` TODO permalink pub fn processShreds( diff --git a/src/tvu/shred_receiver.zig b/src/shred_collector/shred_receiver.zig similarity index 96% rename from src/tvu/shred_receiver.zig rename to src/shred_collector/shred_receiver.zig index a93cc1682..95ded885c 100644 --- a/src/tvu/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -3,7 +3,7 @@ const network = @import("zig-network"); const sig = @import("../lib.zig"); const bincode = sig.bincode; -const layout = sig.tvu.shred_layout; +const layout = sig.shred_collector.shred_layout; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; @@ -16,7 +16,7 @@ const Logger = sig.trace.Logger; const Packet = sig.net.Packet; const Ping = sig.gossip.Ping; const Pong = sig.gossip.Pong; -const RepairMessage = sig.tvu.RepairMessage; +const RepairMessage = sig.shred_collector.RepairMessage; const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; @@ -167,11 +167,11 @@ fn shouldDiscardShred( if (slot > max_slot) return true; switch (variant.shred_type) { .Code => { - if (index >= sig.tvu.MAX_CODE_SHREDS_PER_SLOT) return true; + if (index >= sig.shred_collector.MAX_CODE_SHREDS_PER_SLOT) return true; if (slot <= root) return true; }, .Data => { - if (index >= sig.tvu.MAX_DATA_SHREDS_PER_SLOT) return true; + if (index >= sig.shred_collector.MAX_DATA_SHREDS_PER_SLOT) return true; const parent_offset = layout.getParentOffset(shred) orelse return true; const parent = slot -| @as(Slot, @intCast(parent_offset)); if (!verifyShredSlots(slot, parent, root)) return true; diff --git a/src/tvu/shred_tracker.zig b/src/shred_collector/shred_tracker.zig similarity index 98% rename from src/tvu/shred_tracker.zig rename to src/shred_collector/shred_tracker.zig index f1c907065..dbb721d23 100644 --- a/src/tvu/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -7,7 +7,7 @@ const Mutex = std.Thread.Mutex; const Slot = sig.core.Slot; -const MAX_SHREDS_PER_SLOT: usize = sig.tvu.MAX_SHREDS_PER_SLOT; +const MAX_SHREDS_PER_SLOT: usize = sig.shred_collector.MAX_SHREDS_PER_SLOT; pub const Range = struct { start: usize, diff --git a/src/tvu/shred_verifier.zig b/src/shred_collector/shred_verifier.zig similarity index 97% rename from src/tvu/shred_verifier.zig rename to src/shred_collector/shred_verifier.zig index 984d88040..265590764 100644 --- a/src/tvu/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -1,7 +1,7 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const shred_layout = sig.tvu.shred_layout; +const shred_layout = sig.shred_collector.shred_layout; const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; diff --git a/src/utils/service.zig b/src/utils/service.zig index 2fedb4437..07f22714a 100644 --- a/src/utils/service.zig +++ b/src/utils/service.zig @@ -13,10 +13,6 @@ const Logger = sig.trace.Logger; /// /// Provides facilities to wait for the threads to complete, /// and to clean up their shared state. -/// -/// Typically, this should only manage state that must be shared -/// by multiple threads, when it would not be safe for any individual -/// thread to own it. pub const ServiceManager = struct { allocator: Allocator, exit: *Atomic(bool), @@ -36,11 +32,11 @@ pub const ServiceManager = struct { }; } - /// Allocate shared state to manage with this struct. + /// Allocate state to manage with this struct. /// Use for state that should outlive the managed threads. /// Typically this would be state that is shared by multiple threads, /// or state used to orchestrate an individual thread. - pub fn create( + pub fn create( // TODO: arena instead? self: *Self, comptime T: type, comptime deinitFn: ?fn (*T) void, From 2b086e7e820705d5b5cc9024b12b8bc78ccfc748 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Sun, 5 May 2024 14:26:43 -0400 Subject: [PATCH 10/51] refactor(utils): simplify ServiceManager use more modular and basic primitives to manage services, rather than having an inflexible and fully baked solution stuck within a single custom struct. this includes: - directly use arena allocator instead of re-implementing a custom arena-like behavior - generalize defer behavior and separate it out into types that can be used independently (such as Lazy which is a widely applicable pattern) this also makes some tweaks in shred collector, primarily to distinguish between its basic dependencies, as opposed to interface with other components in the validator --- src/cmd/cmd.zig | 32 ++-- src/lib.zig | 1 + src/shred_collector/repair_service.zig | 51 +----- src/shred_collector/service.zig | 80 +++++---- src/utils/lazy.zig | 55 ++++++ src/utils/service.zig | 222 ++++++++++++------------- src/utils/types.zig | 31 ++++ 7 files changed, 268 insertions(+), 204 deletions(-) create mode 100644 src/utils/lazy.zig diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 9a870ba24..de4ff8dbf 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -36,7 +36,6 @@ const enumFromName = sig.utils.enumFromName; const getOrInitIdentity = helpers.getOrInitIdentity; const globalRegistry = sig.prometheus.globalRegistry; const getWallclockMs = sig.gossip.getWallclockMs; -const initRepair = sig.shred_collector.initRepair; const parallelUnpackZstdTarBall = sig.accounts_db.parallelUnpackZstdTarBall; const requestIpEcho = sig.net.requestIpEcho; const servePrometheus = sig.prometheus.servePrometheus; @@ -420,22 +419,21 @@ fn validator() !void { defer gossip_service.deinit(); var gossip_handle = try spawnGossip(&gossip_service); - var shred_collector = try sig.shred_collector.spawnShredCollector( - .{ - .allocator = gpa_allocator, - .logger = logger, - .random = rand.random(), - .my_keypair = &my_keypair, - .exit = &exit, - .gossip_table_rw = &gossip_service.gossip_table_rw, - .my_shred_version = &gossip_service.my_shred_version, - }, - .{ - .start_slot = if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, - .repair_port = repair_port, - .tvu_port = tvu_port, - }, - ); + // shred collector + var shred_collector = try sig.shred_collector.start(.{ + .start_slot = if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, + .repair_port = repair_port, + .tvu_port = tvu_port, + }, .{ + .allocator = gpa_allocator, + .logger = logger, + .random = rand.random(), + .my_keypair = &my_keypair, + }, .{ + .exit = &exit, + .gossip_table_rw = &gossip_service.gossip_table_rw, + .my_shred_version = &gossip_service.my_shred_version, + }); defer shred_collector.deinit(); // accounts db diff --git a/src/lib.zig b/src/lib.zig index 228ed13e8..77cc8084d 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -63,6 +63,7 @@ pub const sync = struct { pub const utils = struct { pub usingnamespace @import("utils/arraylist.zig"); pub usingnamespace @import("utils/bitflags.zig"); + pub usingnamespace @import("utils/lazy.zig"); pub usingnamespace @import("utils/shortvec.zig"); pub usingnamespace @import("utils/service.zig"); pub usingnamespace @import("utils/thread.zig"); diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index c7090b647..239fd437c 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -27,52 +27,12 @@ const SignedGossipData = sig.gossip.SignedGossipData; const SocketAddr = sig.net.SocketAddr; const SocketThread = sig.net.SocketThread; const Slot = sig.core.Slot; -const TaskLooper = sig.utils.ServiceRunner; const RepairRequest = sig.shred_collector.RepairRequest; const RepairMessage = sig.shred_collector.RepairMessage; const serializeRepairRequest = sig.shred_collector.serializeRepairRequest; -/// TODO: redundant? -pub fn initRepair( - allocator: Allocator, - logger: Logger, - my_keypair: *const KeyPair, - exit: *Atomic(bool), - random: Random, - gossip_table_rw: *RwMux(GossipTable), - my_shred_version: *Atomic(u16), - socket: *Socket, - shred_tracker: *BasicShredTracker, - start_slot: ?Slot, -) !RepairService { - const peer_provider = try RepairPeerProvider.init( - allocator, - random, - gossip_table_rw, - Pubkey.fromPublicKey(&my_keypair.public_key), - my_shred_version, - ); - const requester = try RepairRequester.init( - allocator, - logger, - random, - my_keypair, - socket, - exit, - ); - return RepairService.init( - allocator, - logger, - exit, - requester, - peer_provider, - shred_tracker, - start_slot, - ); -} - /// Identifies which repairs are needed and sends them /// - delegates to RepairPeerProvider to identify repair peers. /// - delegates to RepairRequester to send the requests. @@ -104,11 +64,6 @@ pub const RepairService = struct { const Self = @This(); - pub const run_config = sig.utils.RunConfig{ - .name = "Repair Service", - .min_loop_duration_ns = 100 * std.time.ns_per_ms, - }; - pub fn init( allocator: Allocator, logger: Logger, @@ -136,6 +91,12 @@ pub const RepairService = struct { self.requester.deinit(); } + /// Used to run RepairService continuously. + pub const run_config = sig.utils.RunConfig{ + .name = "Repair Service", + .min_loop_duration_ns = 100 * std.time.ns_per_ms, + }; + /// Identifies which repairs are needed based on the current state, /// and sends those repairs, then returns. pub fn sendNecessaryRepairs(self: *Self) !void { diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index ac94d0ab7..f21efc226 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -27,37 +27,54 @@ const ShredReceiver = this.ShredReceiver; const SOCKET_TIMEOUT = sig.net.SOCKET_TIMEOUT; +/// Settings which tell the Shred Collector how to behave. +pub const ShredCollectorConfig = struct { + start_slot: ?Slot, + repair_port: u16, + tvu_port: u16, +}; + +/// Basic resources that are required for +/// the Shred Collector to operate. pub const ShredCollectorDependencies = struct { allocator: Allocator, logger: Logger, random: Random, /// This validator's keypair my_keypair: *const KeyPair, - /// Shared exit indicator, used to shutdown the TVU. +}; + +/// Interface between the Shred Collector and other components +/// that are external to the Shred Collector. +pub const ShredCollectorInterface = struct { + /// Shared exit indicator, used to shutdown the Shred Collector. exit: *Atomic(bool), - /// Shared state from gossip + /// Shared state that is read from gossip gossip_table_rw: *RwMux(GossipTable), - /// Shared state from gossip + /// Shared state that is read from gossip my_shred_version: *const Atomic(u16), }; -/// communication with non-tvu components -pub const ShredCollectorCommunication = struct {}; // TODO take from deps - -pub const ShredCollectorConfig = struct { - start_slot: ?Slot, - repair_port: u16, - tvu_port: u16, -}; - -pub fn spawnShredCollector(deps: ShredCollectorDependencies, conf: ShredCollectorConfig) !ServiceManager { - var tvu_manager = ServiceManager.init(deps.allocator, deps.logger, deps.exit); +/// Start the Shred Collector. +/// +/// Initializes all state and spawns all threads. +/// Returns as soon as all the threads are running. +/// +/// Returns a ServiceManager representing the Shred Collector. +/// This can be used to join and deinit the Shred Collector. +pub fn start( + conf: ShredCollectorConfig, + deps: ShredCollectorDependencies, + interface: ShredCollectorInterface, +) !ServiceManager { + var shred_collector = ServiceManager.init(deps.allocator, deps.logger, interface.exit); + var arena = shred_collector.arena(); var repair_socket = try bindUdpReusable(conf.repair_port); var tvu_socket = try bindUdpReusable(conf.tvu_port); - // tracker (shared state) - const shred_tracker = try tvu_manager.create(sig.shred_collector.BasicShredTracker, null); + // tracker (shared state, internal to Shred Collector) + const shred_tracker = try arena.create(sig.shred_collector.BasicShredTracker); shred_tracker.* = sig.shred_collector.BasicShredTracker.init( conf.start_slot orelse 0, // TODO deps.logger, @@ -67,9 +84,9 @@ pub fn spawnShredCollector(deps: ShredCollectorDependencies, conf: ShredCollecto const repair_peer_provider = try RepairPeerProvider.init( deps.allocator, deps.random, - deps.gossip_table_rw, + interface.gossip_table_rw, Pubkey.fromPublicKey(&deps.my_keypair.public_key), - deps.my_shred_version, + interface.my_shred_version, ); const repair_requester = try RepairRequester.init( deps.allocator, @@ -77,25 +94,26 @@ pub fn spawnShredCollector(deps: ShredCollectorDependencies, conf: ShredCollecto deps.random, deps.my_keypair, &repair_socket, - deps.exit, + interface.exit, ); - const repair_svc = try tvu_manager.create(RepairService, RepairService.deinit); + const repair_svc = try arena.create(RepairService); + try shred_collector.defers.deferCall(RepairService.deinit, .{repair_svc}); repair_svc.* = RepairService.init( deps.allocator, deps.logger, - deps.exit, + interface.exit, repair_requester, repair_peer_provider, shred_tracker, conf.start_slot, ); - try tvu_manager.spawn( + try shred_collector.spawn( RepairService.run_config, RepairService.sendNecessaryRepairs, .{repair_svc}, ); - // receiver (thread) + // receiver (threads) const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( deps.allocator, 1000, @@ -104,34 +122,34 @@ pub fn spawnShredCollector(deps: ShredCollectorDependencies, conf: ShredCollecto deps.allocator, 1000, ); - const shred_receiver = try tvu_manager.create(ShredReceiver, null); + const shred_receiver = try arena.create(ShredReceiver); shred_receiver.* = ShredReceiver{ .allocator = deps.allocator, .keypair = deps.my_keypair, - .exit = deps.exit, + .exit = interface.exit, .logger = deps.logger, .repair_socket = &repair_socket, .tvu_socket = &tvu_socket, .outgoing_shred_channel = unverified_shreds_channel, - .shred_version = deps.my_shred_version, + .shred_version = interface.my_shred_version, }; - try tvu_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); + try shred_collector.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); // verifier (thread) - try tvu_manager.spawn( + try shred_collector.spawn( .{ .name = "Shred Verifier" }, sig.shred_collector.runShredSignatureVerification, - .{ deps.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, + .{ interface.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, ); // processor (thread) - try tvu_manager.spawn( + try shred_collector.spawn( .{ .name = "Shred Processor" }, sig.shred_collector.processShreds, .{ deps.allocator, verified_shreds_channel, shred_tracker }, ); - return tvu_manager; + return shred_collector; } fn bindUdpReusable(port: u16) !Socket { diff --git a/src/utils/lazy.zig b/src/utils/lazy.zig new file mode 100644 index 000000000..21a2e80ec --- /dev/null +++ b/src/utils/lazy.zig @@ -0,0 +1,55 @@ +const std = @import("std"); +const sig = @import("../lib.zig"); + +const Allocator = std.mem.Allocator; +const ParamsTuple = sig.utils.ParamsTuple; + +/// A lazily evaluated instance of type T. +/// +/// Initialized with a function and its arguments. T is +/// only evaluated when `call` is called, which calls the +/// function with the previously provided arguments, +/// and returns the value returned by the function. +/// +/// Uses dynamic dispatch, so a context using a Lazy(T) +/// doesn't need to worry about how the T is created. +pub fn Lazy(comptime T: type) type { + return struct { + allocator: Allocator, + genericFn: *const fn (*anyopaque) T, + state: *anyopaque, + destroy: *const fn (Allocator, *anyopaque) void, + + const Self = @This(); + + pub fn init( + allocator: Allocator, + comptime function: anytype, + args: ParamsTuple(function), + ) Allocator.Error!Self { + const args_ptr = try allocator.create(ParamsTuple(function)); + args_ptr.* = args; + return .{ + .allocator = allocator, + .genericFn = struct { + fn genericFn(opaque_ptr: *anyopaque) T { + const args_back: *ParamsTuple(function) = @ptrCast(@alignCast(opaque_ptr)); + @call(.auto, function, args_back.*); + } + }.genericFn, + .state = @as(*anyopaque, @ptrCast(@alignCast(args_ptr))), + .destroy = struct { + fn destroy(alloc: Allocator, opaque_ptr: *anyopaque) void { + const ptr: *ParamsTuple(function) = @ptrCast(@alignCast(opaque_ptr)); + alloc.destroy(ptr); + } + }.destroy, + }; + } + + pub fn call(self: Self) T { + defer self.destroy(self.allocator, self.state); + return self.genericFn(self.state); + } + }; +} diff --git a/src/utils/service.zig b/src/utils/service.zig index 07f22714a..3f4e8956f 100644 --- a/src/utils/service.zig +++ b/src/utils/service.zig @@ -3,47 +3,50 @@ const network = @import("zig-network"); const sig = @import("../lib.zig"); const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; const Logger = sig.trace.Logger; +const Lazy = sig.utils.Lazy; /// High level manager for long-running threads and the state /// shared by those threads. /// -/// Provides facilities to wait for the threads to complete, -/// and to clean up their shared state. +/// You can add threads or state, then await all threads and +/// clean up their state. pub const ServiceManager = struct { - allocator: Allocator, + logger: Logger, + /// Signal that is expected to tell all threads to exit. exit: *Atomic(bool), - runner: ServiceRunner, + /// Threads to join. threads: std.ArrayList(std.Thread), - shared_state: std.ArrayList(AnonBox), + /// State to free after all threads join. + _arena: ArenaAllocator, + /// Logic to run after all threads join. + defers: DeferList, const Self = @This(); - pub fn init(allocator: Allocator, logger: Logger, exit: *Atomic(bool)) Self { + pub fn init(allocator_: Allocator, logger: Logger, exit: *Atomic(bool)) Self { return .{ - .allocator = allocator, + .logger = logger, .exit = exit, - .runner = .{ .logger = logger, .exit = exit }, - .threads = std.ArrayList(std.Thread).init(allocator), - .shared_state = std.ArrayList(AnonBox).init(allocator), + .threads = std.ArrayList(std.Thread).init(allocator_), + ._arena = ArenaAllocator.init(allocator_), + .defers = DeferList.init(allocator_), }; } - /// Allocate state to manage with this struct. - /// Use for state that should outlive the managed threads. - /// Typically this would be state that is shared by multiple threads, - /// or state used to orchestrate an individual thread. - pub fn create( // TODO: arena instead? - self: *Self, - comptime T: type, - comptime deinitFn: ?fn (*T) void, - ) Allocator.Error!*T { - const ptr, const box = try AnonBox.init(T, deinitFn, self.allocator); - try self.shared_state.append(box); - return ptr; + /// Allocator for state to manage with this struct. + /// + /// Use this for state that should outlive the managed threads, + /// but may be freed as soon as those threads are joined. + /// + /// You must ensure that this is not used to allocate anything + /// that will be used after this struct is deinitialized. + pub fn arena(self: *Self) Allocator { + return self._arena.allocator(); } /// Spawn a thread to be managed. @@ -56,8 +59,8 @@ pub const ServiceManager = struct { ) !void { var thread = try std.Thread.spawn( .{}, - ServiceRunner.runService, - .{ &self.runner, config, function, args }, + runService, + .{ self.logger, self.exit, config, function, args }, ); if (config.name) |name| thread.setName(name) catch {}; try self.threads.append(thread); @@ -75,70 +78,9 @@ pub const ServiceManager = struct { pub fn deinit(self: Self) void { self.exit.store(true, .monotonic); for (self.threads.items) |t| t.join(); - for (self.shared_state.items) |s| s.deinit(); self.threads.deinit(); - self.shared_state.deinit(); - } -}; - -/// Convert a short-lived task into a long-lived service by looping it, -/// or make a service resilient by restarting it on failure. -pub const ServiceRunner = struct { - logger: Logger, - exit: *Atomic(bool), - service_counter: Atomic(usize) = .{ .raw = 0 }, - - const Self = @This(); - - pub fn runService( - self: *Self, - config: RunConfig, - function: anytype, - args: anytype, - ) !void { - var buf: [16]u8 = undefined; - const name = config.name orelse try std.fmt.bufPrint( - &buf, - "thread {d}", - .{std.Thread.getCurrentId()}, - ); - self.logger.infof("Starting {s}", .{name}); - var timer = try std.time.Timer.start(); - var last_iteration: u64 = 0; - while (!self.exit.load(.unordered)) { - if (@call(.auto, function, args)) |ok| { - switch (config.error_handler) { - .keep_looping => {}, - .just_return => { - self.logger.errf("Exiting {s} due to return", .{name}); - return ok; - }, - .set_exit_and_return => { - self.logger.errf("Signalling exit due to return from {s}", .{name}); - self.exit.store(true, .monotonic); - return ok; - }, - } - } else |err| { - switch (config.error_handler) { - .keep_looping => self.logger.errf("Unhandled error in {s}: {}", .{ name, err }), - .just_return => { - self.logger.errf("Exiting {s} due to error: {}", .{ name, err }); - return err; - }, - .set_exit_and_return => { - self.logger.errf("Signalling exit due to error in {s}: {}", .{ name, err }); - self.exit.store(true, .monotonic); - return err; - }, - } - } - last_iteration = timer.lap(); - std.time.sleep(@max( - config.min_pause_ns, - config.min_loop_duration_ns -| last_iteration, - )); - } + self.defers.deinit(); + self._arena.deinit(); } }; @@ -162,36 +104,94 @@ pub const ReturnHandler = enum { set_exit_and_return, }; -/// Create a pointer and manage its lifetime, without concern for its type. +/// Convert a short-lived task into a long-lived service by looping it, +/// or make a service resilient by restarting it on failure. +pub fn runService( + logger: Logger, + exit: *Atomic(bool), + config: RunConfig, + function: anytype, + args: anytype, +) !void { + var buf: [16]u8 = undefined; + const name = config.name orelse try std.fmt.bufPrint( + &buf, + "thread {d}", + .{std.Thread.getCurrentId()}, + ); + logger.infof("Starting {s}", .{name}); + var timer = try std.time.Timer.start(); + var last_iteration: u64 = 0; + while (!exit.load(.unordered)) { + if (@call(.auto, function, args)) |ok| { + switch (config.error_handler) { + .keep_looping => {}, + .just_return => { + logger.errf("Exiting {s} due to return", .{name}); + return ok; + }, + .set_exit_and_return => { + logger.errf("Signalling exit due to return from {s}", .{name}); + exit.store(true, .monotonic); + return ok; + }, + } + } else |err| { + switch (config.error_handler) { + .keep_looping => logger.errf("Unhandled error in {s}: {}", .{ name, err }), + .just_return => { + logger.errf("Exiting {s} due to error: {}", .{ name, err }); + return err; + }, + .set_exit_and_return => { + logger.errf("Signalling exit due to error in {s}: {}", .{ name, err }); + exit.store(true, .monotonic); + return err; + }, + } + } + last_iteration = timer.lap(); + std.time.sleep(@max( + config.min_pause_ns, + config.min_loop_duration_ns -| last_iteration, + )); + } +} + +/// Defer actions until later. +/// +/// The `defer` keyword always defers to the end of the current +/// scope, which can sometimes be overly constraining. /// -/// Useful when you need to manage the lifetime of data in a different -/// context from where it is allocated or used. -pub const AnonBox = struct { - allocator: Allocator, - state: *anyopaque, - deinitFn: *const fn (*anyopaque) void, +/// Use `DeferList` when you need to defer actions to execute +/// in a broader scope. +/// +/// 1. Add defers using `deferCall`. +/// 2. Return this struct to the broader scope. +/// 3. Call `deinit` to run all the defers. +pub const DeferList = struct { + defers: std.ArrayList(Lazy(void)), const Self = @This(); - pub fn init( - comptime T: type, - comptime deinitFn: ?fn (*T) void, - allocator: Allocator, - ) Allocator.Error!struct { *T, Self } { - const ptr = try allocator.create(T); - const self = .{ - .allocator = allocator, - .state = @as(*anyopaque, @ptrCast(@alignCast(ptr))), - .deinitFn = struct { - fn deinit(opaque_ptr: *anyopaque) void { - if (deinitFn) |f| f(@ptrCast(@alignCast(opaque_ptr))) else {} - } - }.deinit, - }; - return .{ ptr, self }; + pub fn init(allocator: Allocator) Self { + return .{ .defers = std.ArrayList(Lazy(void)).init(allocator) }; } + pub fn deferCall( + self: *Self, + comptime function: anytype, + args: anytype, + ) !void { + const lazy = try Lazy(void).init(self.defers.allocator, function, args); + try self.defers.append(lazy); + } + + /// Runs all the defers, then deinits this struct. pub fn deinit(self: Self) void { - self.deinitFn(self.state); + for (1..self.defers.items.len + 1) |i| { + self.defers.items[self.defers.items.len - i].call(); + } + self.defers.deinit(); } }; diff --git a/src/utils/types.zig b/src/utils/types.zig index 32dc8e438..bc57b459c 100644 --- a/src/utils/types.zig +++ b/src/utils/types.zig @@ -12,3 +12,34 @@ pub fn enumFromName(comptime T: type, variant_name: []const u8) error{UnknownVar } return error.UnknownVariant; } + +/// Tuple type representing the args of a function. This is +/// the type you are required to pass into the @call builtin. +/// +/// ```zig +/// fn doThing(name: []const u8, count: usize) !u64 { ... } +/// +/// ParamsTuple(doThing) == struct { []const u8, usize } +/// +/// const args: ParamsTuple(doThing) = undefined; +/// const out: u64 = try @call(.auto, doThing, args); +/// ``` +pub fn ParamsTuple(comptime function: anytype) type { + const params = @typeInfo(@TypeOf(function)).Fn.params; + var fields: [params.len]std.builtin.Type.StructField = undefined; + for (params, 0..) |param, i| { + fields[i] = .{ + .name = std.fmt.comptimePrint("{}", .{i}), + .type = param.type.?, + .default_value = null, + .is_comptime = false, + .alignment = 0, + }; + } + return @Type(.{ .Struct = std.builtin.Type.Struct{ + .layout = .auto, + .fields = &fields, + .is_tuple = true, + .decls = &.{}, + } }); +} From 8d9c1aba8889ece6e3e2802f8a08a47d63339d5b Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 10:03:22 -0400 Subject: [PATCH 11/51] fix(shred-collector): socket error and tests - fix error only seen in release modes: use Socket instead of *Socket since it's just a few ints and basically behaves as a pointer already (also impacts gossip and generic socket code) - fix test compilation issues and memory bugs --- src/cmd/cmd.zig | 1 + src/common/lru.zig | 1 + src/gossip/service.zig | 4 +- src/net/socket_utils.zig | 13 ++--- src/shred_collector/repair_message.zig | 21 ++++---- src/shred_collector/repair_service.zig | 71 +++++++++++++++----------- src/shred_collector/service.zig | 10 ++-- src/shred_collector/shred_receiver.zig | 8 +-- src/shred_collector/shred_tracker.zig | 44 ++++++++++------ src/utils/thread.zig | 4 +- 10 files changed, 101 insertions(+), 76 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index de4ff8dbf..5a701b9de 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -298,6 +298,7 @@ var app = &cli.App{ &gossip_spy_node_option, &gossip_dump_option, // repair + &tvu_port_option, &repair_port_option, &test_repair_option, // accounts-db diff --git a/src/common/lru.zig b/src/common/lru.zig index f6c18a0aa..b71168e2e 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -112,6 +112,7 @@ pub fn LruCacheCustom( fn internal_recycle_or_create_node(self: *Self, key: K, value: V) error{OutOfMemory}!struct { ?LruEntry, LruEntry } { if (self.dbl_link_list.len == self.max_items) { const recycled_node = self.dbl_link_list.popFirst().?; + deinitFn(&recycled_node.data.value, self.deinit_context); assert(self.hashmap.swapRemove(recycled_node.data.key)); // after swap, this node is thrown away var node_to_swap: Node = .{ diff --git a/src/gossip/service.zig b/src/gossip/service.zig index d7df722fa..9b7067e96 100644 --- a/src/gossip/service.zig +++ b/src/gossip/service.zig @@ -282,7 +282,7 @@ pub const GossipService = struct { var receiver_handle = try Thread.spawn(.{}, socket_utils.readSocket, .{ self.allocator, - &self.gossip_socket, + self.gossip_socket, self.packet_incoming_channel, self.exit, self.logger, @@ -303,7 +303,7 @@ pub const GossipService = struct { } var responder_handle = try Thread.spawn(.{}, socket_utils.sendSocket, .{ - &self.gossip_socket, + self.gossip_socket, self.packet_outgoing_channel, self.exit, self.logger, diff --git a/src/net/socket_utils.zig b/src/net/socket_utils.zig index 0cb1c0bb1..d3b815eeb 100644 --- a/src/net/socket_utils.zig +++ b/src/net/socket_utils.zig @@ -12,7 +12,7 @@ pub const PACKETS_PER_BATCH: usize = 64; pub fn readSocket( allocator: std.mem.Allocator, - socket: *UdpSocket, + socket_: UdpSocket, incoming_channel: *Channel(std.ArrayList(Packet)), exit: *const std.atomic.Value(bool), logger: Logger, @@ -23,6 +23,7 @@ pub fn readSocket( // * read until it fails // * set it back to blocking before returning + var socket = socket_; const MAX_WAIT_NS = std.time.ns_per_ms; // 1ms while (!exit.load(.unordered)) { @@ -42,7 +43,7 @@ pub fn readSocket( // recv packets into batch while (true) { - const n_packets_read = recvMmsg(socket, packet_batch.items[count..capacity], exit) catch |err| { + const n_packets_read = recvMmsg(&socket, packet_batch.items[count..capacity], exit) catch |err| { if (count > 0 and err == error.WouldBlock) { if (timer.read() > MAX_WAIT_NS) { break; @@ -109,7 +110,7 @@ pub fn recvMmsg( } pub fn sendSocket( - socket: *UdpSocket, + socket: UdpSocket, outgoing_channel: *Channel(std.ArrayList(Packet)), exit: *const std.atomic.Value(bool), logger: Logger, @@ -158,7 +159,7 @@ pub const SocketThread = struct { const Self = @This(); - pub fn initSender(allocator: Allocator, logger: Logger, socket: *UdpSocket, exit: *Atomic(bool)) !Self { + pub fn initSender(allocator: Allocator, logger: Logger, socket: UdpSocket, exit: *Atomic(bool)) !Self { const channel = Channel(std.ArrayList(Packet)).init(allocator, 0); return .{ .channel = channel, @@ -167,7 +168,7 @@ pub const SocketThread = struct { }; } - pub fn initReceiver(allocator: Allocator, logger: Logger, socket: *UdpSocket, exit: *Atomic(bool)) !Self { + pub fn initReceiver(allocator: Allocator, logger: Logger, socket: UdpSocket, exit: *Atomic(bool)) !Self { const channel = Channel(std.ArrayList(Packet)).init(allocator, 0); return .{ .channel = channel, @@ -214,7 +215,7 @@ pub const BenchmarkPacketProcessing = struct { var exit = std.atomic.Value(bool).init(false); - var handle = try std.Thread.spawn(.{}, readSocket, .{ allocator, &socket, channel, &exit, .noop }); + var handle = try std.Thread.spawn(.{}, readSocket, .{ allocator, socket, channel, &exit, .noop }); var recv_handle = try std.Thread.spawn(.{}, benchmarkChannelRecv, .{ channel, n_packets }); var rand = std.rand.DefaultPrng.init(0); diff --git a/src/shred_collector/repair_message.zig b/src/shred_collector/repair_message.zig index 8d59868ae..1882dcc4a 100644 --- a/src/shred_collector/repair_message.zig +++ b/src/shred_collector/repair_message.zig @@ -53,6 +53,7 @@ pub fn serializeRepairRequest( timestamp: u64, nonce: Nonce, ) ![]u8 { + // TODO assert minimum length const header = RepairRequestHeader{ .signature = Signature.init(undefined), .sender = try Pubkey.fromBytes(&keypair.public_key.bytes), @@ -198,7 +199,7 @@ pub const RepairRequestHeader = struct { } }; -test "tvu.repair_message: signed/serialized RepairRequest is valid" { +test "signed/serialized RepairRequest is valid" { const allocator = std.testing.allocator; var rand = std.rand.DefaultPrng.init(392138); const rng = rand.random(); @@ -215,15 +216,15 @@ test "tvu.repair_message: signed/serialized RepairRequest is valid" { const timestamp = rng.int(u64); const nonce = rng.int(Nonce); + var buf: [1232]u8 = undefined; var serialized = try serializeRepairRequest( - allocator, + &buf, request, &keypair, recipient, timestamp, nonce, ); - defer allocator.free(serialized); var deserialized = try bincode.readFromSlice(allocator, RepairMessage, serialized, .{}); try deserialized.verify(serialized, recipient, timestamp); @@ -234,7 +235,7 @@ test "tvu.repair_message: signed/serialized RepairRequest is valid" { } } -test "tvu.repair_message: RepairRequestHeader serialization round trip" { +test "RepairRequestHeader serialization round trip" { var rng = std.rand.DefaultPrng.init(5224); var signature: [SIGNATURE_LENGTH]u8 = undefined; rng.fill(&signature); @@ -273,7 +274,7 @@ test "tvu.repair_message: RepairRequestHeader serialization round trip" { try std.testing.expect(header.eql(&roundtripped)); } -test "tvu.repair_message: RepairProtocolMessage.Pong serialization round trip" { +test "RepairProtocolMessage.Pong serialization round trip" { try testHelpers.assertMessageSerializesCorrectly(57340, .Pong, &[_]u8{ 7, 0, 0, 0, 252, 143, 181, 36, 240, 87, 69, 104, 157, 159, 242, 94, 101, 48, 187, 120, 173, 241, 68, 167, 217, 67, 141, 46, 105, 85, 179, 69, 249, 140, @@ -286,7 +287,7 @@ test "tvu.repair_message: RepairProtocolMessage.Pong serialization round trip" { }); } -test "tvu.repair_message: RepairProtocolMessage.WindowIndex serialization round trip" { +test "RepairProtocolMessage.WindowIndex serialization round trip" { try testHelpers.assertMessageSerializesCorrectly(4823794, .WindowIndex, &[_]u8{ 8, 0, 0, 0, 100, 7, 241, 74, 194, 88, 24, 128, 85, 15, 149, 108, 142, 133, 234, 217, 3, 79, 124, 171, 68, 30, 189, 219, 173, 11, 184, 159, 208, 104, @@ -301,7 +302,7 @@ test "tvu.repair_message: RepairProtocolMessage.WindowIndex serialization round }); } -test "tvu.repair_message: RepairProtocolMessage.HighestWindowIndex serialization round trip" { +test "RepairProtocolMessage.HighestWindowIndex serialization round trip" { try testHelpers.assertMessageSerializesCorrectly(636345, .HighestWindowIndex, &[_]u8{ 9, 0, 0, 0, 44, 123, 16, 108, 173, 151, 229, 132, 4, 0, 5, 215, 25, 179, 235, 166, 181, 42, 30, 231, 218, 43, 166, 238, 92, 80, 234, 87, 30, 123, @@ -316,7 +317,7 @@ test "tvu.repair_message: RepairProtocolMessage.HighestWindowIndex serialization }); } -test "tvu.repair_message: RepairProtocolMessage.Orphan serialization round trip" { +test "RepairProtocolMessage.Orphan serialization round trip" { try testHelpers.assertMessageSerializesCorrectly(734566, .Orphan, &[_]u8{ 10, 0, 0, 0, 52, 54, 182, 49, 197, 238, 253, 118, 145, 61, 198, 235, 42, 211, 229, 42, 2, 33, 5, 161, 179, 171, 26, 243, 51, 240, 82, 98, 121, 90, @@ -330,7 +331,7 @@ test "tvu.repair_message: RepairProtocolMessage.Orphan serialization round trip" }); } -test "tvu.repair_message: RepairProtocolMessage.AncestorHashes serialization round trip" { +test "RepairProtocolMessage.AncestorHashes serialization round trip" { try testHelpers.assertMessageSerializesCorrectly(6236757, .AncestorHashes, &[_]u8{ 11, 0, 0, 0, 192, 86, 218, 156, 168, 139, 216, 200, 30, 181, 244, 121, 90, 41, 177, 117, 55, 40, 199, 207, 62, 118, 56, 134, 73, 88, 74, 2, 139, 189, @@ -344,7 +345,7 @@ test "tvu.repair_message: RepairProtocolMessage.AncestorHashes serialization rou }); } -test "tvu.repair_message: RepairProtocolMessage serializes to size <= MAX_SERIALIZED_SIZE" { +test "RepairProtocolMessage serializes to size <= MAX_SERIALIZED_SIZE" { var rng = std.rand.DefaultPrng.init(184837); for (0..10) |_| { inline for (@typeInfo(RepairMessage.Tag).Enum.fields) |enum_field| { diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 239fd437c..ec8028010 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -44,6 +44,7 @@ pub const RepairService = struct { logger: Logger, exit: *Atomic(bool), start_slot: ?Slot, + last_big_request_timestamp_ms: i64 = 0, /// memory to re-use across iterations. initialized to empty report: MultiSlotReport, @@ -89,6 +90,8 @@ pub const RepairService = struct { pub fn deinit(self: *Self) void { self.peer_provider.deinit(); self.requester.deinit(); + self.thread_pool.deinit(); + self.report.deinit(); } /// Used to run RepairService continuously. @@ -121,12 +124,16 @@ pub const RepairService = struct { // TODO less often if (addressed_requests.items.len > 0) { - self.logger.debugf("sent {} repair requests", .{addressed_requests.items.len}); + self.logger.debugf( + "sent {} repair requests", + .{addressed_requests.items.len}, + ); } } const MAX_SHRED_REPAIRS = 1000; - const MAX_HIGHEST_REPAIRS = 100; + const MIN_HIGHEST_REPAIRS = 10; + const MAX_HIGHEST_REPAIRS = 200; fn getRepairs(self: *Self) !ArrayList(RepairRequest) { var repairs = ArrayList(RepairRequest).init(self.allocator); @@ -134,6 +141,13 @@ pub const RepairService = struct { var individual_count: usize = 0; var highest_count: usize = 0; var slot: Slot = 0; + + var num_highest_repairs: usize = MIN_HIGHEST_REPAIRS; + if (self.last_big_request_timestamp_ms + 5_000 < std.time.milliTimestamp()) { + self.last_big_request_timestamp_ms = std.time.milliTimestamp(); + num_highest_repairs = MAX_HIGHEST_REPAIRS; + } + for (self.report.items()) |*report| outer: { slot = report.slot; for (report.missing_shreds.items) |shred_window| { @@ -147,13 +161,14 @@ pub const RepairService = struct { } } } - if (highest_count < MAX_HIGHEST_REPAIRS) { + if (highest_count < num_highest_repairs) { highest_count += 1; try repairs.append(.{ .HighestShred = .{ slot, 0 } }); } } - if (highest_count < MAX_HIGHEST_REPAIRS) { - for (slot..slot + MAX_HIGHEST_REPAIRS - highest_count) |s| { + + if (highest_count < num_highest_repairs) { + for (slot..slot + num_highest_repairs - highest_count) |s| { try repairs.append(.{ .HighestShred = .{ s, 0 } }); } } @@ -194,7 +209,7 @@ pub const RepairRequester = struct { logger: Logger, rng: Random, keypair: *const KeyPair, - udp_send_socket: *Socket, + udp_send_socket: Socket, exit: *Atomic(bool), ) !Self { const sndr = try SocketThread.initSender(allocator, logger, udp_send_socket, exit); @@ -379,7 +394,7 @@ pub const RepairPeerProvider = struct { } }; -test "tvu.repair_service: RepairService sends repair request to gossip peer" { +test "RepairService sends repair request to gossip peer" { const allocator = std.testing.allocator; var rand = std.rand.DefaultPrng.init(4328095); var random = rand.random(); @@ -426,27 +441,27 @@ test "tvu.repair_service: RepairService sends repair request to gossip peer" { Pubkey.fromPublicKey(&keypair.public_key), &my_shred_version, ); - var tracker = try BasicShredTracker.init(allocator, 13579); - defer tracker.deinit(); - var service = RepairService{ - .allocator = allocator, - .requester = RepairRequester{ - .allocator = allocator, - .rng = random, - .udp_send_socket = &repair_socket, - .keypair = &keypair, - .logger = logger, - }, - .peer_provider = peers, - .logger = logger, - .exit = &exit, - .start_slot = 13579, - .shred_tracker = &tracker, - }; + var tracker = BasicShredTracker.init(13579, .noop); + var service = RepairService.init( + allocator, + logger, + &exit, + try RepairRequester.init( + allocator, + logger, + random, + &keypair, + repair_socket, + &exit, + ), + peers, + &tracker, + 13579, + ); defer service.deinit(); // run test - const handle = try std.Thread.spawn(.{}, RepairService.run, .{&service}); + try service.sendNecessaryRepairs(); var buf: [200]u8 = undefined; const size = peer_socket.receive(&buf) catch 0; @@ -456,13 +471,9 @@ test "tvu.repair_service: RepairService sends repair request to gossip peer" { try msg.verify(buf[0..160], Pubkey.fromPublicKey(&peer_keypair.public_key), @intCast(std.time.milliTimestamp())); try std.testing.expect(msg.HighestWindowIndex.slot == 13579); try std.testing.expect(msg.HighestWindowIndex.shred_index == 0); - - // exit - exit.store(true, .monotonic); - handle.join(); } -test "tvu.repair_service: RepairPeerProvider selects correct peers" { +test "RepairPeerProvider selects correct peers" { const allocator = std.testing.allocator; var rand = std.rand.DefaultPrng.init(4328095); var random = rand.random(); diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index f21efc226..8fc9d7ff4 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -70,8 +70,8 @@ pub fn start( var shred_collector = ServiceManager.init(deps.allocator, deps.logger, interface.exit); var arena = shred_collector.arena(); - var repair_socket = try bindUdpReusable(conf.repair_port); - var tvu_socket = try bindUdpReusable(conf.tvu_port); + const repair_socket = try bindUdpReusable(conf.repair_port); + const tvu_socket = try bindUdpReusable(conf.tvu_port); // tracker (shared state, internal to Shred Collector) const shred_tracker = try arena.create(sig.shred_collector.BasicShredTracker); @@ -93,7 +93,7 @@ pub fn start( deps.logger, deps.random, deps.my_keypair, - &repair_socket, + repair_socket, interface.exit, ); const repair_svc = try arena.create(RepairService); @@ -128,8 +128,8 @@ pub fn start( .keypair = deps.my_keypair, .exit = interface.exit, .logger = deps.logger, - .repair_socket = &repair_socket, - .tvu_socket = &tvu_socket, + .repair_socket = repair_socket, + .tvu_socket = tvu_socket, .outgoing_shred_channel = unverified_shreds_channel, .shred_version = interface.my_shred_version, }; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 95ded885c..aa8c19dc0 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -28,8 +28,8 @@ pub const ShredReceiver = struct { keypair: *const KeyPair, exit: *Atomic(bool), logger: Logger, - repair_socket: *Socket, - tvu_socket: *Socket, + repair_socket: Socket, + tvu_socket: Socket, outgoing_shred_channel: *Channel(ArrayList(Packet)), shred_version: *const Atomic(u16), @@ -199,7 +199,3 @@ fn verifyShredSlots(slot: Slot, parent: Slot, root: Slot) bool { const REPAIR_RESPONSE_SERIALIZED_PING_BYTES = 132; const RepairPing = union(enum) { Ping: Ping }; - -test "asd quend" { - std.debug.print("{s}", .{@typeName(@TypeOf(ShredReceiver.run))}); -} diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index dbb721d23..2a2cf33cc 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -9,6 +9,8 @@ const Slot = sig.core.Slot; const MAX_SHREDS_PER_SLOT: usize = sig.shred_collector.MAX_SHREDS_PER_SLOT; +const MIN_SLOT_AGE_TO_REPORT_AS_MISSING: u64 = 200; + pub const Range = struct { start: usize, end: ?usize, @@ -22,7 +24,9 @@ pub const BasicShredTracker = struct { /// The oldest slot still being tracked, which hasn't yet been finished current_bottom_slot: Slot, /// The highest slot for which a shred has been received and processed successfully. - max_slot_seen: Slot, + max_slot_processed: Slot, + /// The highest slot that has been seen at all. + max_slot_seen: Slot = 0, /// ring buffer slots: [num_slots]MonitoredSlot = .{.{}} ** num_slots, @@ -34,7 +38,7 @@ pub const BasicShredTracker = struct { return .{ .start_slot = slot, .current_bottom_slot = slot, - .max_slot_seen = slot -| 1, + .max_slot_processed = slot -| 1, .logger = logger, }; } @@ -47,10 +51,11 @@ pub const BasicShredTracker = struct { self.mux.lock(); defer self.mux.unlock(); + self.max_slot_seen = @max(self.max_slot_seen, slot); const monitored_slot = try self.getSlot(slot); const new = try monitored_slot.record(shred_index); if (new) self.logger.debugf("new slot: {}", .{slot}); - self.max_slot_seen = @max(self.max_slot_seen, slot); + self.max_slot_processed = @max(self.max_slot_processed, slot); } pub fn setLastShred(self: *Self, slot: Slot, index: usize) !void { @@ -72,10 +77,12 @@ pub const BasicShredTracker = struct { var found_an_incomplete_slot = false; slot_reports.clearRetainingCapacity(); const timestamp = std.time.milliTimestamp(); - const last_slot_to_check = @max(self.max_slot_seen, self.current_bottom_slot); + const last_slot_to_check = @max(self.max_slot_processed, self.current_bottom_slot); for (self.current_bottom_slot..last_slot_to_check + 1) |slot| { const monitored_slot = try self.getSlot(slot); - if (monitored_slot.first_received_timestamp_ms + 1000 > timestamp) { + if (monitored_slot.is_complete or + monitored_slot.first_received_timestamp_ms + MIN_SLOT_AGE_TO_REPORT_AS_MISSING > timestamp) + { continue; } var slot_report = try slot_reports.addOne(); @@ -87,7 +94,7 @@ pub const BasicShredTracker = struct { slot_reports.drop(1); } if (!found_an_incomplete_slot) { - self.logger.debugf("finished slot: {}", .{slot}); + self.logger.debugf("finished slot: {}", .{slot}); // FIXME not always logged self.current_bottom_slot = @max(self.current_bottom_slot, slot + 1); monitored_slot.* = .{}; } @@ -140,10 +147,13 @@ const MonitoredSlot = struct { max_seen: ?usize = null, last_shred: ?usize = null, first_received_timestamp_ms: i64 = 0, + is_complete: bool = false, const Self = @This(); + /// returns whether this is the first shred received for the slot pub fn record(self: *Self, shred_index: usize) !bool { + if (self.is_complete) return false; self.shreds.set(shred_index); if (self.max_seen == null) { self.max_seen = shred_index; @@ -156,6 +166,7 @@ const MonitoredSlot = struct { pub fn identifyMissing(self: *Self, missing_shreds: *ArrayList(Range)) !void { missing_shreds.clearRetainingCapacity(); + if (self.is_complete) return; const highest_shred_to_check = self.last_shred orelse self.max_seen orelse 0; var gap_start: ?usize = null; for (0..highest_shred_to_check + 1) |i| { @@ -173,47 +184,48 @@ const MonitoredSlot = struct { } else if (self.max_seen.? < self.last_shred.?) { try missing_shreds.append(.{ .start = self.max_seen.? + 1, .end = self.last_shred }); } + if (missing_shreds.items.len == 0) { + self.is_complete = true; + } } }; -test "tvu.shred_tracker2: trivial happy path" { +test "trivial happy path" { const allocator = std.testing.allocator; var msr = MultiSlotReport.init(allocator); defer msr.deinit(); - var tracker = try BasicShredTracker.init(allocator, 13579); - defer tracker.deinit(); + var tracker = BasicShredTracker.init(13579, .noop); try tracker.identifyMissing(&msr); - try std.testing.expect(1 == msr.reports.items.len); - const report = msr.reports.items[0]; + try std.testing.expect(1 == msr.len); + const report = msr.items()[0]; try std.testing.expect(13579 == report.slot); try std.testing.expect(1 == report.missing_shreds.items.len); try std.testing.expect(0 == report.missing_shreds.items[0].start); try std.testing.expect(null == report.missing_shreds.items[0].end); } -test "tvu.shred_tracker2: 1 registered shred is identified" { +test "1 registered shred is identified" { const allocator = std.testing.allocator; var msr = MultiSlotReport.init(allocator); defer msr.deinit(); - var tracker = try BasicShredTracker.init(allocator, 13579); - defer tracker.deinit(); + var tracker = BasicShredTracker.init(13579, .noop); try tracker.registerShred(13579, 123); std.time.sleep(210 * std.time.ns_per_ms); try tracker.identifyMissing(&msr); try std.testing.expect(1 == msr.len); - const report = msr.reports.items[0]; + const report = msr.items()[0]; try std.testing.expect(13579 == report.slot); try std.testing.expect(2 == report.missing_shreds.items.len); try std.testing.expect(0 == report.missing_shreds.items[0].start); try std.testing.expect(123 == report.missing_shreds.items[0].end); - try std.testing.expect(124 == report.missing_shreds.items[1].start); + try std.testing.expect(0 == report.missing_shreds.items[1].start); try std.testing.expect(null == report.missing_shreds.items[1].end); } diff --git a/src/utils/thread.zig b/src/utils/thread.zig index 00bdc6f6f..812b5922d 100644 --- a/src/utils/thread.zig +++ b/src/utils/thread.zig @@ -193,7 +193,9 @@ pub fn HomogeneousThreadPool(comptime TaskType: type) type { /// Like join, but it returns an error if any tasks failed, and otherwise discards task output. pub fn joinFallible(self: *Self) !void { - for (self.join().items) |result| try result; + const results = self.join(); + for (results.items) |result| try result; + results.deinit(); } }; } From ac03b25868c63728bb9ef84a13eaa3d2192588d3 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 10:05:32 -0400 Subject: [PATCH 12/51] fix: zig fmt --- src/utils/types.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/types.zig b/src/utils/types.zig index bc57b459c..a18548d89 100644 --- a/src/utils/types.zig +++ b/src/utils/types.zig @@ -20,7 +20,7 @@ pub fn enumFromName(comptime T: type, variant_name: []const u8) error{UnknownVar /// fn doThing(name: []const u8, count: usize) !u64 { ... } /// /// ParamsTuple(doThing) == struct { []const u8, usize } -/// +/// /// const args: ParamsTuple(doThing) = undefined; /// const out: u64 = try @call(.auto, doThing, args); /// ``` From 3bd773c818ef08dbf96745774f8b5672b29716c1 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 10:25:23 -0400 Subject: [PATCH 13/51] fix(remove_unused.py): was removing lines used in other imports sometimes an import is only used as a shortcut for other imports. this addresses this to make sure lines are not used in other imports this also addresses a related issue. if an import is *only* used in another import, and that other import is unused, then both should be removed. this accomplishes it by repeating the process until an iteration completes without removing any lines. i believe this is how brennan originally had the logic but it was lost when i rewrote it --- remove_unused.py | 62 ++++++++++++++++++--------------- src/shred_collector/service.zig | 6 ---- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/remove_unused.py b/remove_unused.py index c08f7e661..0178507fd 100644 --- a/remove_unused.py +++ b/remove_unused.py @@ -29,34 +29,40 @@ ) total_num_lines_removed = 0 +lines_removed_this_time = 999 # get past 1st while check -for path in zig_files: - with open(path) as f: - orig_file = f.read() - orig_lines = orig_file.split("\n") - if orig_lines[-1] == "": - orig_lines = orig_lines[0:-1] - non_import_lines = [] - imported_names = [] - for line_num, line in enumerate(orig_lines): - match = import_line_regex.match(line) - if match: - imported_names.append((match.groups()[0], line_num)) - else: - non_import_lines.append(line) - non_import_file = "\n".join(non_import_lines) - lines_to_drop = set() - num_lines_to_remove = 0 - for name in imported_names: - if re.search(f"[^a-zA-Z0-9_]{name[0]}[^a-zA-Z0-9_]", non_import_file) is None: - lines_to_drop.add(name[1]) - num_lines_to_remove += 1 - with open(path, "w") as f: - f.writelines( - f"{line}\n" for i, line in enumerate(orig_lines) if i not in lines_to_drop - ) - lines_to_drop - print(path, num_lines_to_remove) - total_num_lines_removed += num_lines_to_remove +while lines_removed_this_time > 0: + lines_removed_this_time = 0 + for path in zig_files: + with open(path) as f: + orig_file = f.read() + orig_lines = orig_file.split("\n") + if orig_lines[-1] == "": + orig_lines = orig_lines[0:-1] + imported_names = [] + for line_num, line in enumerate(orig_lines): + match = import_line_regex.match(line) + if match: + imported_names.append((match.groups()[0], line_num)) + lines_to_drop = set() + num_lines_to_remove = 0 + for name, line in imported_names: + match = re.findall(f"[^a-zA-Z0-9_.]{name}[^a-zA-Z0-9_]", orig_file) + assert len(match) > 0 + if len(match) == 1: + lines_to_drop.add(line) + num_lines_to_remove += 1 + with open(path, "w") as f: + f.writelines( + f"{line}\n" + for i, line in enumerate(orig_lines) + if i not in lines_to_drop + ) + lines_to_drop + print(path, num_lines_to_remove) + total_num_lines_removed += num_lines_to_remove + lines_removed_this_time += num_lines_to_remove + print("removed this iteration:", lines_removed_this_time) + print() print("total lines removed:", total_num_lines_removed) diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 8fc9d7ff4..16d7e3527 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -3,30 +3,24 @@ const network = @import("zig-network"); const sig = @import("../lib.zig"); const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = network.Socket; -const Channel = sig.sync.Channel; const GossipTable = sig.gossip.GossipTable; const Logger = sig.trace.Logger; -const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; const RwMux = sig.sync.RwMux; const ServiceManager = sig.utils.ServiceManager; const Slot = sig.core.Slot; const this = sig.shred_collector; -const BasicShredTracker = this.BasicShredTracker; const RepairPeerProvider = this.RepairPeerProvider; const RepairRequester = this.RepairRequester; const RepairService = this.RepairService; const ShredReceiver = this.ShredReceiver; -const SOCKET_TIMEOUT = sig.net.SOCKET_TIMEOUT; - /// Settings which tell the Shred Collector how to behave. pub const ShredCollectorConfig = struct { start_slot: ?Slot, From eec97ea5b3a7abb3a685861773b4bb72a797534d Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 10:53:17 -0400 Subject: [PATCH 14/51] fix(shred-collector): handle unknown start slot --- src/shred_collector/repair_service.zig | 4 ---- src/shred_collector/service.zig | 8 ++++---- src/shred_collector/shred_tracker.zig | 25 ++++++++++++++++--------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index ec8028010..ad05cf25b 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -43,7 +43,6 @@ pub const RepairService = struct { shred_tracker: *BasicShredTracker, logger: Logger, exit: *Atomic(bool), - start_slot: ?Slot, last_big_request_timestamp_ms: i64 = 0, /// memory to re-use across iterations. initialized to empty @@ -72,7 +71,6 @@ pub const RepairService = struct { requester: RepairRequester, peer_provider: RepairPeerProvider, shred_tracker: *BasicShredTracker, - start_slot: ?Slot, ) Self { return RepairService{ .allocator = allocator, @@ -81,7 +79,6 @@ pub const RepairService = struct { .shred_tracker = shred_tracker, .logger = logger, .exit = exit, - .start_slot = start_slot, // TODO: do nothing if null .report = MultiSlotReport.init(allocator), .thread_pool = RequestBatchThreadPool.init(allocator, 4), }; @@ -456,7 +453,6 @@ test "RepairService sends repair request to gossip peer" { ), peers, &tracker, - 13579, ); defer service.deinit(); diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 16d7e3527..4f63eb2c0 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -16,6 +16,7 @@ const ServiceManager = sig.utils.ServiceManager; const Slot = sig.core.Slot; const this = sig.shred_collector; +const BasicShredTracker = this.BasicShredTracker; const RepairPeerProvider = this.RepairPeerProvider; const RepairRequester = this.RepairRequester; const RepairService = this.RepairService; @@ -68,9 +69,9 @@ pub fn start( const tvu_socket = try bindUdpReusable(conf.tvu_port); // tracker (shared state, internal to Shred Collector) - const shred_tracker = try arena.create(sig.shred_collector.BasicShredTracker); - shred_tracker.* = sig.shred_collector.BasicShredTracker.init( - conf.start_slot orelse 0, // TODO + const shred_tracker = try arena.create(BasicShredTracker); + shred_tracker.* = BasicShredTracker.init( + conf.start_slot, deps.logger, ); @@ -99,7 +100,6 @@ pub fn start( repair_requester, repair_peer_provider, shred_tracker, - conf.start_slot, ); try shred_collector.spawn( RepairService.run_config, diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index 2a2cf33cc..0b0f716cf 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -20,11 +20,11 @@ pub const BasicShredTracker = struct { logger: sig.trace.Logger, mux: Mutex = .{}, /// The slot that this struct was initialized with at index 0 - start_slot: Slot, + start_slot: ?Slot, /// The oldest slot still being tracked, which hasn't yet been finished current_bottom_slot: Slot, /// The highest slot for which a shred has been received and processed successfully. - max_slot_processed: Slot, + max_slot_processed: Slot = 0, /// The highest slot that has been seen at all. max_slot_seen: Slot = 0, /// ring buffer @@ -34,15 +34,21 @@ pub const BasicShredTracker = struct { const Self = @This(); - pub fn init(slot: Slot, logger: sig.trace.Logger) Self { + pub fn init(slot: ?Slot, logger: sig.trace.Logger) Self { return .{ .start_slot = slot, - .current_bottom_slot = slot, - .max_slot_processed = slot -| 1, + .current_bottom_slot = slot orelse 0, .logger = logger, }; } + pub fn maybeSetStart(self: *Self, start_slot: Slot) void { + if (self.start_slot == null) { + self.start_slot = start_slot; + self.current_bottom_slot = start_slot; + } + } + pub fn registerShred( self: *Self, slot: Slot, @@ -51,6 +57,7 @@ pub const BasicShredTracker = struct { self.mux.lock(); defer self.mux.unlock(); + self.maybeSetStart(slot); self.max_slot_seen = @max(self.max_slot_seen, slot); const monitored_slot = try self.getSlot(slot); const new = try monitored_slot.record(shred_index); @@ -62,6 +69,7 @@ pub const BasicShredTracker = struct { self.mux.lock(); defer self.mux.unlock(); + self.maybeSetStart(slot); const monitored_slot = try self.getSlot(slot); if (monitored_slot.last_shred) |old_last| { monitored_slot.last_shred = @min(old_last, index); @@ -71,6 +79,7 @@ pub const BasicShredTracker = struct { } pub fn identifyMissing(self: *Self, slot_reports: *MultiSlotReport) !void { + if (self.start_slot == null) return; self.mux.lock(); defer self.mux.unlock(); @@ -80,9 +89,7 @@ pub const BasicShredTracker = struct { const last_slot_to_check = @max(self.max_slot_processed, self.current_bottom_slot); for (self.current_bottom_slot..last_slot_to_check + 1) |slot| { const monitored_slot = try self.getSlot(slot); - if (monitored_slot.is_complete or - monitored_slot.first_received_timestamp_ms + MIN_SLOT_AGE_TO_REPORT_AS_MISSING > timestamp) - { + if (monitored_slot.first_received_timestamp_ms + MIN_SLOT_AGE_TO_REPORT_AS_MISSING > timestamp) { continue; } var slot_report = try slot_reports.addOne(); @@ -108,7 +115,7 @@ pub const BasicShredTracker = struct { if (slot < self.current_bottom_slot) { return error.SlotUnderflow; } - const slot_index = (slot - self.start_slot) % num_slots; + const slot_index = (slot - self.start_slot.?) % num_slots; return &self.slots[slot_index]; } }; From 8dc33ed0b367e3098beecc74c3b81f3ac61cb551 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 10:57:05 -0400 Subject: [PATCH 15/51] fix(repair): don't send pointless repair requests --- src/shred_collector/repair_service.zig | 4 +++- src/shred_collector/shred_tracker.zig | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index ad05cf25b..89b8e126e 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -134,7 +134,9 @@ pub const RepairService = struct { fn getRepairs(self: *Self) !ArrayList(RepairRequest) { var repairs = ArrayList(RepairRequest).init(self.allocator); - try self.shred_tracker.identifyMissing(&self.report); + if (!try self.shred_tracker.identifyMissing(&self.report)) { + return repairs; + } var individual_count: usize = 0; var highest_count: usize = 0; var slot: Slot = 0; diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index 0b0f716cf..fa537c7fe 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -78,8 +78,9 @@ pub const BasicShredTracker = struct { } } - pub fn identifyMissing(self: *Self, slot_reports: *MultiSlotReport) !void { - if (self.start_slot == null) return; + /// returns whether it makes sense to send any repair requests + pub fn identifyMissing(self: *Self, slot_reports: *MultiSlotReport) !bool { + if (self.start_slot == null) return false; self.mux.lock(); defer self.mux.unlock(); @@ -106,6 +107,7 @@ pub const BasicShredTracker = struct { monitored_slot.* = .{}; } } + return true; } fn getSlot(self: *Self, slot: Slot) error{ SlotUnderflow, SlotOverflow }!*MonitoredSlot { From cbf74ef2efcfa5960606d9642476f0344aba1b24 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 11:17:39 -0400 Subject: [PATCH 16/51] refactor(shred): restructure Shred to reduce redundancy previously, Shred was structured the same as in agave, which throws everything in an enum despite sharing the same structure, except for a few fields. this puts those few fields in a union, and otherwise puts all the common data directly into shred also fixes a bug in a test --- src/shred_collector/shred.zig | 118 +++++++++----------------- src/shred_collector/shred_tracker.zig | 4 +- 2 files changed, 41 insertions(+), 81 deletions(-) diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 496efbd13..7cdf51376 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -19,69 +19,50 @@ pub const MAX_DATA_SHREDS_PER_SLOT: usize = 32_768; pub const MAX_CODE_SHREDS_PER_SLOT: usize = MAX_DATA_SHREDS_PER_SLOT; pub const MAX_SHREDS_PER_SLOT: usize = MAX_CODE_SHREDS_PER_SLOT + MAX_DATA_SHREDS_PER_SLOT; -/// TODO this can be restructured with shared code lifted -pub const Shred = union(enum) { - code: ShredCode, - data: ShredData, - - const Self = @This(); - - pub fn fromPayload(allocator: Allocator, shred: []const u8) !Self { - const variant = shred_layout.getShredVariant(shred) orelse return error.uygugj; - return switch (variant.shred_type) { - .Code => .{ .code = try ShredCode.fromPayload(allocator, shred) }, - .Data => .{ .data = try ShredData.fromPayload(allocator, shred) }, - }; - } - - pub fn header(self: *const Self) *const ShredCommonHeader { - return switch (self.*) { - .code, .data => |s| &s.common_header, - }; - } - - pub fn isLastInSlot(self: *const Self) bool { - return switch (self.*) { - .code => false, - .data => |data| data.isLastInSlot(), - }; - } -}; - -pub const ShredData = struct { +pub const Shred = struct { common_header: ShredCommonHeader, - custom_header: DataShredHeader, - // payload: ArrayList(u8), + custom_header: CustomHeader, + payload: ArrayList(u8), - const SIZE_OF_PAYLOAD: usize = 1203; // TODO this can be calculated like solana + const CustomHeader = union(ShredType) { + Code: CodingShredHeader, + Data: DataShredHeader, + }; const Self = @This(); pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { - return try eitherShredFromPayload(Self, DataShredHeader, SIZE_OF_PAYLOAD, allocator, payload); + const variant = shred_layout.getShredVariant(payload) orelse return error.uygugj; + const SIZE_OF_PAYLOAD = switch (variant.shred_type) { + .Code => CodingShredHeader.SIZE_OF_PAYLOAD, + .Data => DataShredHeader.SIZE_OF_PAYLOAD, + }; + if (payload.len < SIZE_OF_PAYLOAD) { + return error.InvalidPayloadSize; + } + const exact_payload = payload[0..SIZE_OF_PAYLOAD]; + var buf = std.io.fixedBufferStream(exact_payload); + const common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}); + const custom_header: CustomHeader = switch (variant.shred_type) { + .Code => .{ .Code = try bincode.read(allocator, CodingShredHeader, buf.reader(), .{}) }, + .Data => .{ .Data = try bincode.read(allocator, DataShredHeader, buf.reader(), .{}) }, + }; + var owned_payload = ArrayList(u8).init(allocator); // TODO: find a cheaper way to get the payload in here + try owned_payload.appendSlice(exact_payload); + var self = Self{ + .common_header = common_header, + .custom_header = custom_header, + .payload = owned_payload, + }; + try self.sanitize(); + return self; } pub fn isLastInSlot(self: *const Self) bool { - return self.custom_header.flags.isSet(.last_shred_in_slot); - } - - fn sanitize(self: *const Self) !void { - _ = self; - // TODO - } -}; - -pub const ShredCode = struct { - common_header: ShredCommonHeader, - custom_header: CodingShredHeader, - // payload: ArrayList(u8), - - const SIZE_OF_PAYLOAD: usize = 1228; // TODO this can be calculated like solana - - const Self = @This(); - - pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { - return try eitherShredFromPayload(Self, CodingShredHeader, SIZE_OF_PAYLOAD, allocator, payload); + return switch (self.custom_header) { + .Code => false, + .Data => |data| data.flags.isSet(.last_shred_in_slot), + }; } fn sanitize(self: *const Self) !void { @@ -90,31 +71,6 @@ pub const ShredCode = struct { } }; -fn eitherShredFromPayload( - comptime Self: type, - comptime Header: type, - comptime SIZE_OF_PAYLOAD: usize, - allocator: Allocator, - payload: []const u8, -) !Self { - if (payload.len < SIZE_OF_PAYLOAD) { - return error.InvalidPayloadSize; - } - const exact_payload = payload[0..SIZE_OF_PAYLOAD]; - var buf = std.io.fixedBufferStream(exact_payload); - const common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}); - const custom_header = try bincode.read(allocator, Header, buf.reader(), .{}); - // var owned_payload = ArrayList(u8).init(allocator); // TODO: find a cheaper way to get the payload in here - // try owned_payload.appendSlice(exact_payload); - var self = Self{ - .common_header = common_header, - .custom_header = custom_header, - // .payload = owned_payload, - }; - try self.sanitize(); - return self; -} - pub const ShredCommonHeader = struct { signature: Signature, shred_variant: ShredVariant, @@ -130,12 +86,16 @@ pub const DataShredHeader = struct { parent_offset: u16, flags: ShredFlags, size: u16, // common shred header + data shred header + data + + const SIZE_OF_PAYLOAD: usize = 1203; // TODO this can be calculated like solana }; pub const CodingShredHeader = struct { num_data_shreds: u16, num_coding_shreds: u16, position: u16, // [0..num_coding_shreds) + + const SIZE_OF_PAYLOAD: usize = 1228; // TODO this can be calculated like solana }; pub const ShredType = enum(u8) { diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index fa537c7fe..0ccb8da29 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -207,7 +207,7 @@ test "trivial happy path" { var tracker = BasicShredTracker.init(13579, .noop); - try tracker.identifyMissing(&msr); + _ = try tracker.identifyMissing(&msr); try std.testing.expect(1 == msr.len); const report = msr.items()[0]; @@ -227,7 +227,7 @@ test "1 registered shred is identified" { try tracker.registerShred(13579, 123); std.time.sleep(210 * std.time.ns_per_ms); - try tracker.identifyMissing(&msr); + _ = try tracker.identifyMissing(&msr); try std.testing.expect(1 == msr.len); const report = msr.items()[0]; From 61ce2e18464332fdc424552e0b78be07aeb7de7b Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 11:30:14 -0400 Subject: [PATCH 17/51] fix(shred-collector): memory leak and refactor shreds were being leaked by shred-processor. refactor Shred to have less redundancy and indirection, in terms of readability --- src/shred_collector/shred.zig | 24 ++++++++++++------------ src/shred_collector/shred_processor.zig | 3 ++- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 7cdf51376..bc47250ae 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -21,16 +21,18 @@ pub const MAX_SHREDS_PER_SLOT: usize = MAX_CODE_SHREDS_PER_SLOT + MAX_DATA_SHRED pub const Shred = struct { common_header: ShredCommonHeader, - custom_header: CustomHeader, - payload: ArrayList(u8), - - const CustomHeader = union(ShredType) { + custom_header: union(ShredType) { Code: CodingShredHeader, Data: DataShredHeader, - }; + }, + payload: ArrayList(u8), const Self = @This(); + pub fn deinit(self: *Self) void { + self.payload.deinit(); + } + pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { const variant = shred_layout.getShredVariant(payload) orelse return error.uygugj; const SIZE_OF_PAYLOAD = switch (variant.shred_type) { @@ -42,16 +44,14 @@ pub const Shred = struct { } const exact_payload = payload[0..SIZE_OF_PAYLOAD]; var buf = std.io.fixedBufferStream(exact_payload); - const common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}); - const custom_header: CustomHeader = switch (variant.shred_type) { - .Code => .{ .Code = try bincode.read(allocator, CodingShredHeader, buf.reader(), .{}) }, - .Data => .{ .Data = try bincode.read(allocator, DataShredHeader, buf.reader(), .{}) }, - }; var owned_payload = ArrayList(u8).init(allocator); // TODO: find a cheaper way to get the payload in here try owned_payload.appendSlice(exact_payload); var self = Self{ - .common_header = common_header, - .custom_header = custom_header, + .common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}), + .custom_header = switch (variant.shred_type) { + .Code => .{ .Code = try bincode.read(allocator, CodingShredHeader, buf.reader(), .{}) }, + .Data => .{ .Data = try bincode.read(allocator, DataShredHeader, buf.reader(), .{}) }, + }, .payload = owned_payload, }; try self.sanitize(); diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index efcf97ba3..99b7f4a9f 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -34,7 +34,8 @@ pub fn processShreds( error.SlotUnderflow, error.SlotOverflow => continue, else => return err, }; - const shred = try Shred.fromPayload(allocator, shred_payload); + var shred = try Shred.fromPayload(allocator, shred_payload); + defer shred.deinit(); if (shred.isLastInSlot()) { tracker.setLastShred(slot, index) catch |err| switch (err) { error.SlotUnderflow, error.SlotOverflow => continue, From 6ea65fcd4d9402e7dc54ada1bcf2586560c39f2c Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 11:41:33 -0400 Subject: [PATCH 18/51] docs(shred-collector): add agave permalinks --- src/shred_collector/repair_message.zig | 10 +++++----- src/shred_collector/repair_service.zig | 2 ++ src/shred_collector/service.zig | 2 ++ src/shred_collector/shred.zig | 1 + src/shred_collector/shred_processor.zig | 2 +- src/shred_collector/shred_receiver.zig | 2 +- src/shred_collector/shred_verifier.zig | 1 + 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/shred_collector/repair_message.zig b/src/shred_collector/repair_message.zig index 1882dcc4a..bfdf2477d 100644 --- a/src/shred_collector/repair_message.zig +++ b/src/shred_collector/repair_message.zig @@ -13,13 +13,13 @@ const Slot = sig.core.Slot; const SIGNATURE_LENGTH = @import("../core/signature.zig").SIGNATURE_LENGTH; -/// Analogous to `SIGNED_REPAIR_TIME_WINDOW` +/// Analogous to [SIGNED_REPAIR_TIME_WINDOW](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/serve_repair.rs#L89) const SIGNED_REPAIR_TIME_WINDOW_SECS: u64 = 600; /// Internal representation of a repair request. /// Does not contain any header or identification, only info about the desired shreds. /// -/// Analogous to `solana_core::repair::serve_repair::ShredRepairType` +/// Analogous to [ShredRepairType](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/serve_repair.rs#L95) pub const RepairRequest = union(enum) { /// Requesting `MAX_ORPHAN_REPAIR_RESPONSES` parent shreds Orphan: Slot, @@ -44,7 +44,7 @@ pub const RepairRequest = union(enum) { /// - serialize message /// - sign message /// -/// Analogous to `ServeRepair::map_repair_request` +/// Analogous to [ServeRepair::map_repair_request](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/serve_repair.rs#L1141) pub fn serializeRepairRequest( buf: []u8, request: RepairRequest, @@ -90,7 +90,7 @@ pub fn serializeRepairRequest( /// Messaging data that is directly serialized and sent over repair sockets. /// Contains any header/identification as needed. /// -/// Analogous to `solana_core::repair::serve_repair::RepairProtocol` +/// Analogous to [RepairProtocol](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/serve_repair.rs#L221) pub const RepairMessage = union(enum(u8)) { Pong: Pong = 7, WindowIndex: struct { @@ -140,7 +140,7 @@ pub const RepairMessage = union(enum(u8)) { } } - /// Analogous to `ServeRepair::verify_signed_packet` + /// Analogous to [ServeRepair::verify_signed_packet](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/serve_repair.rs#L847) pub fn verify( self: *const Self, /// bincode serialized data, from which this struct was deserialized diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 89b8e126e..8fe9440ea 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -36,6 +36,8 @@ const serializeRepairRequest = sig.shred_collector.serializeRepairRequest; /// Identifies which repairs are needed and sends them /// - delegates to RepairPeerProvider to identify repair peers. /// - delegates to RepairRequester to send the requests. +/// +/// Analogous to [RepairService](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/repair_service.rs#L245) pub const RepairService = struct { allocator: Allocator, requester: RepairRequester, diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 4f63eb2c0..c9f1afdc5 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -57,6 +57,8 @@ pub const ShredCollectorInterface = struct { /// /// Returns a ServiceManager representing the Shred Collector. /// This can be used to join and deinit the Shred Collector. +/// +/// Analogous to a subset of [Tvu::new](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/tvu.rs#L119) pub fn start( conf: ShredCollectorConfig, deps: ShredCollectorDependencies, diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index bc47250ae..4d78b4f98 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -19,6 +19,7 @@ pub const MAX_DATA_SHREDS_PER_SLOT: usize = 32_768; pub const MAX_CODE_SHREDS_PER_SLOT: usize = MAX_DATA_SHREDS_PER_SLOT; pub const MAX_SHREDS_PER_SLOT: usize = MAX_CODE_SHREDS_PER_SLOT + MAX_DATA_SHREDS_PER_SLOT; +/// Analogous to [Shred](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/ledger/src/shred.rs#L245) pub const Shred = struct { common_header: ShredCommonHeader, custom_header: union(ShredType) { diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index 99b7f4a9f..c378e4a87 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -11,7 +11,7 @@ const Channel = sig.sync.Channel; const Packet = sig.net.Packet; const Shred = sig.shred_collector.Shred; -/// analogous to `WindowService` TODO permalink +/// Analogous to [WindowService](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/window_service.rs#L395) pub fn processShreds( allocator: Allocator, verified_shreds: *Channel(ArrayList(Packet)), diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index aa8c19dc0..b07a1c8e8 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -22,7 +22,7 @@ const SocketThread = sig.net.SocketThread; const endpointToString = sig.net.endpointToString; -/// Analogous to `ShredFetchStage` TODO permalinks TODO deinit? +/// Analogous to [ShredFetchStage](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/shred_fetch_stage.rs#L34) pub const ShredReceiver = struct { allocator: Allocator, keypair: *const KeyPair, diff --git a/src/shred_collector/shred_verifier.zig b/src/shred_collector/shred_verifier.zig index 265590764..3f33f74e4 100644 --- a/src/shred_collector/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -9,6 +9,7 @@ const Atomic = std.atomic.Value; const Channel = sig.sync.Channel; const Packet = sig.net.Packet; +/// Analogous to [run_shred_sigverify](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/turbine/src/sigverify_shreds.rs#L82) pub fn runShredSignatureVerification( exit: *Atomic(bool), incoming: *Channel(ArrayList(Packet)), From a2b1f229a4e2aeb57f0d3c9ca1e2f6f6367f4d56 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 11:56:57 -0400 Subject: [PATCH 19/51] refactor: rename param, remove unnecessary todo --- src/shred_collector/repair_message.zig | 1 - src/utils/service.zig | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/shred_collector/repair_message.zig b/src/shred_collector/repair_message.zig index bfdf2477d..e75f08279 100644 --- a/src/shred_collector/repair_message.zig +++ b/src/shred_collector/repair_message.zig @@ -53,7 +53,6 @@ pub fn serializeRepairRequest( timestamp: u64, nonce: Nonce, ) ![]u8 { - // TODO assert minimum length const header = RepairRequestHeader{ .signature = Signature.init(undefined), .sender = try Pubkey.fromBytes(&keypair.public_key.bytes), diff --git a/src/utils/service.zig b/src/utils/service.zig index 3f4e8956f..5c5290a7f 100644 --- a/src/utils/service.zig +++ b/src/utils/service.zig @@ -28,13 +28,13 @@ pub const ServiceManager = struct { const Self = @This(); - pub fn init(allocator_: Allocator, logger: Logger, exit: *Atomic(bool)) Self { + pub fn init(allocator: Allocator, logger: Logger, exit: *Atomic(bool)) Self { return .{ .logger = logger, .exit = exit, - .threads = std.ArrayList(std.Thread).init(allocator_), - ._arena = ArenaAllocator.init(allocator_), - .defers = DeferList.init(allocator_), + .threads = std.ArrayList(std.Thread).init(allocator), + ._arena = ArenaAllocator.init(allocator), + .defers = DeferList.init(allocator), }; } From 76a06fddc2fac2899729ba96ad1bcb43a9d01e77 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 12:20:02 -0400 Subject: [PATCH 20/51] refactor(shred-receiver): clarify todos --- src/shred_collector/shred_receiver.zig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index b07a1c8e8..9063e7b0e 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -116,10 +116,8 @@ pub const ShredReceiver = struct { } else { const endpoint_str = try endpointToString(self.allocator, &packet.addr); defer endpoint_str.deinit(); - // self.logger.field("from_endpoint", endpoint_str.items) - // .debugf("tvu: recv shred message: {} bytes", .{packet.size}); - // TODO figure out these values + // TODO set correct values once using snapshot + blockstore const root = 0; const max_slot = std.math.maxInt(Slot); if (shouldDiscardShred(packet, root, shred_version, max_slot)) { @@ -178,7 +176,10 @@ fn shouldDiscardShred( }, } - // TODO: should we check for enable_chained_merkle_shreds? + // TODO: check for feature activation of enable_chained_merkle_shreds + // 7uZBkJXJ1HkuP6R3MJfZs7mLwymBcDbKdqbF51ZWLier + // https://github.com/solana-labs/solana/pull/34916 + // https://github.com/solana-labs/solana/pull/35076 _ = layout.getSignature(shred) orelse return true; _ = layout.getSignedData(shred) orelse return true; From 3fd3b1742f8a24c7ef3b17735d6b74be68f267ee Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 6 May 2024 14:45:45 -0400 Subject: [PATCH 21/51] feat(shred): ShredVariant.toByte + serialize + test --- src/shred_collector/shred.zig | 65 +++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 4d78b4f98..e7e464dd7 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -110,7 +110,9 @@ pub const ShredVariant = struct { chained: bool, resigned: bool, - fn fromByte(byte: u8) error{ UnknownShredVariant, LegacyShredVariant }!@This() { + const Self = @This(); + + fn fromByte(byte: u8) error{ UnknownShredVariant, LegacyShredVariant }!Self { return switch (byte & 0xF0) { 0x40 => .{ .shred_type = .Code, @@ -153,16 +155,44 @@ pub const ShredVariant = struct { else => error.UnknownShredVariant, }; } + + fn toByte(self: Self) error{ UnknownShredVariant, LegacyShredVariant, IllegalProof }!u8 { + if (self.proof_size & 0xF0 != 0) return error.IllegalProof; + const big_end: u8 = + if (self.shred_type == .Code and + self.chained == false and + self.resigned == false) + 0x40 + else if (self.shred_type == .Code and + self.chained == true and + self.resigned == false) + 0x60 + else if (self.shred_type == .Code and + self.chained == true and + self.resigned == true) + 0x70 + else if (self.shred_type == .Data and + self.chained == false and + self.resigned == false) + 0x80 + else if (self.shred_type == .Data and + self.chained == true and + self.resigned == false) + 0x90 + else if (self.shred_type == .Data and + self.chained == true and + self.resigned == true) + 0xb0 + else + return error.UnknownShredVariant; + return big_end | self.proof_size; + } }; pub const ShredVariantConfig = blk: { const S = struct { - pub fn serialize(writer: anytype, data: anytype, params: bincode.Params) !void { - _ = writer; - _ = params; - _ = data; - @panic("todo - not implemented"); // TODO - // try writer.writeByte(0); + pub fn serialize(writer: anytype, data: anytype, _: bincode.Params) !void { + return writer.writeByte(try ShredVariant.toByte(data)); } pub fn deserialize(_: ?std.mem.Allocator, reader: anytype, _: bincode.Params) !ShredVariant { @@ -260,3 +290,24 @@ pub const shred_layout = struct { return std.mem.readInt(Int, bytes, .little); } }; + +test "basic shred variant round trip" { + try testShredVariantRoundTrip(0x4C, .{ + .shred_type = .Code, + .proof_size = 0x0C, + .chained = false, + .resigned = false, + }); +} + +fn testShredVariantRoundTrip(expected_byte: u8, start_variant: ShredVariant) !void { + const actual_byte = try start_variant.toByte(); + try std.testing.expect(actual_byte == expected_byte); + const end_variant = try ShredVariant.fromByte(actual_byte); + try std.testing.expect( + start_variant.shred_type == end_variant.shred_type and + start_variant.proof_size == end_variant.proof_size and + start_variant.chained == end_variant.chained and + start_variant.resigned == end_variant.resigned, + ); +} From f013ac07d0cb6efbbd0ab0ef570e646e4ee6f5ba Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 7 May 2024 10:30:29 -0400 Subject: [PATCH 22/51] feat(shred): implement sanitize sanitize is comprehensive and required several other methods for the shreds to be implemented as well --- src/lib.zig | 1 + src/shred_collector/shred.zig | 295 +++++++++++++++++++++---- src/shred_collector/shred_receiver.zig | 4 +- src/utils/bitflags.zig | 12 +- src/utils/math.zig | 26 +++ 5 files changed, 292 insertions(+), 46 deletions(-) create mode 100644 src/utils/math.zig diff --git a/src/lib.zig b/src/lib.zig index 77cc8084d..1ada9cb91 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -64,6 +64,7 @@ pub const utils = struct { pub usingnamespace @import("utils/arraylist.zig"); pub usingnamespace @import("utils/bitflags.zig"); pub usingnamespace @import("utils/lazy.zig"); + pub usingnamespace @import("utils/math.zig"); pub usingnamespace @import("utils/shortvec.zig"); pub usingnamespace @import("utils/service.zig"); pub usingnamespace @import("utils/thread.zig"); diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index e7e464dd7..4ae5ffd8c 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -13,62 +13,275 @@ const Packet = sig.net.Packet; const Signature = sig.core.Signature; const Slot = sig.core.Slot; +const checkedAdd = sig.utils.checkedAdd; +const checkedSub = sig.utils.checkedSub; + const SIGNATURE_LENGTH = sig.core.SIGNATURE_LENGTH; -pub const MAX_DATA_SHREDS_PER_SLOT: usize = 32_768; -pub const MAX_CODE_SHREDS_PER_SLOT: usize = MAX_DATA_SHREDS_PER_SLOT; -pub const MAX_SHREDS_PER_SLOT: usize = MAX_CODE_SHREDS_PER_SLOT + MAX_DATA_SHREDS_PER_SLOT; +pub const MAX_SHREDS_PER_SLOT: usize = coding_shred.max_per_slot + data_shred.max_per_slot; + +const DATA_SHREDS_PER_FEC_BLOCK: usize = 32; +const SIZE_OF_MERKLE_ROOT: usize = sig.core.HASH_SIZE; + +pub const coding_shred = ShredConstants{ + .max_per_slot = 32_768, + .payload_size = 1228, // TODO this can be calculated like solana + .headers_size = 89, +}; + +pub const data_shred = ShredConstants{ + .max_per_slot = 32_768, + .payload_size = 1203, // TODO this can be calculated like solana + .headers_size = 88, +}; /// Analogous to [Shred](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/ledger/src/shred.rs#L245) -pub const Shred = struct { - common_header: ShredCommonHeader, - custom_header: union(ShredType) { - Code: CodingShredHeader, - Data: DataShredHeader, - }, - payload: ArrayList(u8), +pub const Shred = union(ShredType) { + Code: CodingShred, + Data: DataShred, const Self = @This(); - pub fn deinit(self: *Self) void { - self.payload.deinit(); + pub fn deinit(self: Self) void { + return switch (self) { + inline .Code, .Data => |s| s.fields.deinit(), + }; } pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { const variant = shred_layout.getShredVariant(payload) orelse return error.uygugj; - const SIZE_OF_PAYLOAD = switch (variant.shred_type) { - .Code => CodingShredHeader.SIZE_OF_PAYLOAD, - .Data => DataShredHeader.SIZE_OF_PAYLOAD, - }; - if (payload.len < SIZE_OF_PAYLOAD) { - return error.InvalidPayloadSize; - } - const exact_payload = payload[0..SIZE_OF_PAYLOAD]; - var buf = std.io.fixedBufferStream(exact_payload); - var owned_payload = ArrayList(u8).init(allocator); // TODO: find a cheaper way to get the payload in here - try owned_payload.appendSlice(exact_payload); - var self = Self{ - .common_header = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}), - .custom_header = switch (variant.shred_type) { - .Code => .{ .Code = try bincode.read(allocator, CodingShredHeader, buf.reader(), .{}) }, - .Data => .{ .Data = try bincode.read(allocator, DataShredHeader, buf.reader(), .{}) }, - }, - .payload = owned_payload, + return switch (variant.shred_type) { + .Code => .{ .Code = .{ .fields = try CodingShred.Fields.fromPayload(allocator, payload) } }, + .Data => .{ .Data = .{ .fields = try DataShred.Fields.fromPayload(allocator, payload) } }, }; - try self.sanitize(); - return self; } pub fn isLastInSlot(self: *const Self) bool { - return switch (self.custom_header) { + return switch (self.*) { .Code => false, - .Data => |data| data.flags.isSet(.last_shred_in_slot), + .Data => |data| data.fields.custom.flags.isSet(.last_shred_in_slot), }; } fn sanitize(self: *const Self) !void { - _ = self; - // TODO + if (self.commonHeader().shred_variant.shred_type != self) { + return error.InconsistentShredVariant; + } + switch (self.*) { + inline .Code, .Data => |s| try s.sanitize(), + } + } + + pub fn commonHeader(self: *const Self) *const ShredCommonHeader { + return switch (self.*) { + inline .Code, .Data => |c| &c.common, + }; + } +}; + +pub const CodingShred = struct { + fields: Fields, + const Fields = GenericShred(CodingShredHeader, coding_shred); + + const Self = @This(); + const consts = coding_shred; + + fn sanitize(self: *const Self) error{InvalidNumCodingShreds}!void { + try self.fields.sanitize(); + if (self.custom.num_coding_shreds > 8 * DATA_SHREDS_PER_FEC_BLOCK) { + return error.InvalidNumCodingShreds; + } + } + + pub fn erasureShardIndex(self: *const Self) !usize { + // Assert that the last shred index in the erasure set does not + // overshoot MAX_{DATA,CODE}_SHREDS_PER_SLOT. + if (try checkedAdd( + self.common.fec_set_index, + try checkedSub(@as(u32, @intCast(self.custom.num_data_shreds)), 1), + ) >= data_shred.max_per_slot) { + return error.InvalidErasureShardIndex; + } + if (try checkedAdd( + try self.first_coding_index(), + try checkedSub(@as(u32, @intCast(self.custom.num_coding_shreds)), 1), + ) >= coding_shred.max_per_slot) { + return error.InvalidErasureShardIndex; + } + const num_data_shreds: usize = @intCast(self.custom.num_data_shreds); + const num_coding_shreds: usize = @intCast(self.custom.num_coding_shreds); + const position: usize = @intCast(self.custom.position); + const fec_set_size = try checkedAdd(num_data_shreds, num_coding_shreds); + const index = try checkedAdd(position, num_data_shreds); + return if (index < fec_set_size) index else error.InvalidErasureShardIndex; + } + + fn first_coding_index(self: *const Self) !u32 { + return checkedSub(self.common.index, self.custom.position); + } +}; + +pub const DataShred = struct { + fields: Fields, + const Fields = GenericShred(DataShredHeader, data_shred); + + const Self = @This(); + const consts = data_shred; + + fn sanitize(self: *const Self) !void { + try self.fields.sanitize(); + const flags = self.fields.custom.flags; + if (flags.intersects(.last_shred_in_slot) and + !flags.isSet(.data_complete_shred)) + { + return error.InvalidShredFlags; + } + _ = try self.data(); + _ = try self.parent(); + } + + fn data(self: *const Self) ![]const u8 { + const v = self.fields.common.shred_variant; + const data_buffer_size = try Fields.capacity(v.proof_size, v.chained, v.resigned); + const size = self.fields.custom.size; + if (size > self.payload.len or + size < consts.headers_size or + size > consts.headers_size + data_buffer_size) + { + return error.InvalidDataSize; + } + + return self.payload[consts.headers_size..size]; + } + + fn parent(self: *const Self) !Slot { + const slot = self.fields.common.slot; + if (self.fields.custom.parent_offset == 0 and slot != 0) { + return error.InvalidParentOffset; + } + return checkedSub(slot, self.fields.custom.parent_offset) catch error.InvalidParentOffset; + } + + pub fn erasureShardIndex(self: *const Self) error{IntegerOverflow}!usize { + return @intCast(try checkedSub(self.fields.common.index, self.fields.common.fec_set_index)); + } +}; + +/// Analogous to [Shred trait](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/ledger/src/shred/traits.rs#L6) +pub fn GenericShred( + comptime CustomHeader: type, + constants: ShredConstants, +) type { + return struct { + common: ShredCommonHeader, + custom: CustomHeader, + allocator: Allocator, + payload: []const u8, + + const Self = @This(); + + pub fn deinit(self: Self) void { + self.allocator.free(self.payload); + } + + pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { + if (payload.len < constants.payload_size) { + return error.InvalidPayloadSize; + } + const owned_payload = try allocator.alloc(u8, constants.payload_size); + + // TODO: It would be nice to find a way to get the payload in here without coping the entire thing. + // The challenge is that the input payload is owned by the original packet list which was read + // from the socket, and that list may be cluttered with a lot of garbage data. + // So a copy like this may be needed somewhere. but it's worth some more thought. + @memcpy(owned_payload, payload[0..constants.payload_size]); + + var buf = std.io.fixedBufferStream(payload[0..constants.payload_size]); + const self = Self{ + .allocator = allocator, + .common = try bincode.read(allocator, ShredCommonHeader, buf.reader(), .{}), + .custom = try bincode.read(allocator, CustomHeader, buf.reader(), .{}), + .payload = owned_payload, + }; + + try self.sanitize(); + return self; + } + + fn sanitize(self: *const Self) !void { + _ = try self.merkleProof(); + + if (self.common.index > constants.max_per_slot) { + return error.InvalidShredIndex; + } + if (constants.payload_size != self.payload.len) { + return error.InvalidPayloadSize; + } + } + + /// TODO should this be memoized? + fn capacity(proof_size: u8, chained: bool, resigned: bool) !usize { + std.debug.assert(chained or !resigned); + return checkedSub( + constants.payload_size, + constants.headers_size + + if (chained) SIZE_OF_MERKLE_ROOT else 0 + + proof_size * merkle_proof_entry_size + + if (resigned) SIGNATURE_LENGTH else 0, + ) catch error.InvalidProofSize; + } + + /// The return contains a pointer to data owned by the shred. + fn merkleProof(self: *const Self) !MerkleProofEntryList { + const size = self.common.shred_variant.proof_size * merkle_proof_entry_size; + const offset = try self.proofOffset(); + const end = offset + size; + if (self.payload.len < end) { + return error.InsufficentPayloadSize; + } + return .{ + .bytes = self.payload[offset..end], + .len = self.common.shred_variant.proof_size, + }; + } + + // Where the merkle proof starts in the shred binary. + fn proofOffset(self: *const Self) !usize { + const v = self.common.shred_variant; + return constants.headers_size + + try capacity(v.proof_size, v.chained, v.resigned) + + if (v.chained) SIZE_OF_MERKLE_ROOT else 0; + } + + fn erasureShardAsSlice(self: *const Self) ![]u8 { + if (self.payload.len() != self.constants().payload_size) { + return error.InvalidPayloadSize; + } + const variant = self.common.shred_variant; + const end = constants.headers_size + + try capacity(variant.proof_size, variant.chained, variant.resigned) + + SIGNATURE_LENGTH; + if (self.payload.len < end) { + return error.InsufficientPayloadSize; + } + return self.payload[SIGNATURE_LENGTH..end]; + } + }; +} + +const MerkleProofEntry = [merkle_proof_entry_size]u8; +const merkle_proof_entry_size: usize = 20; + +/// This is a reference. It does not own the data. Be careful with its lifetime. +const MerkleProofEntryList = struct { + bytes: []const u8, + len: usize, + + pub fn get(self: *@This(), index: usize) error{IndexOutOfBounds}!MerkleProofEntry { + if (index > self.len) return error.IndexOutOfBounds; + const start = index * merkle_proof_entry_size; + const end = start + merkle_proof_entry_size; + return self.bytes[start..end]; } }; @@ -87,16 +300,12 @@ pub const DataShredHeader = struct { parent_offset: u16, flags: ShredFlags, size: u16, // common shred header + data shred header + data - - const SIZE_OF_PAYLOAD: usize = 1203; // TODO this can be calculated like solana }; pub const CodingShredHeader = struct { num_data_shreds: u16, num_coding_shreds: u16, position: u16, // [0..num_coding_shreds) - - const SIZE_OF_PAYLOAD: usize = 1228; // TODO this can be calculated like solana }; pub const ShredType = enum(u8) { @@ -215,6 +424,12 @@ pub const ShredFlags = BitFlags(enum(u8) { last_shred_in_slot = 0b1100_0000, }); +pub const ShredConstants = struct { + max_per_slot: usize, + payload_size: usize, + headers_size: usize, +}; + pub const shred_layout = struct { const SIZE_OF_COMMON_SHRED_HEADER: usize = 83; const SIZE_OF_DATA_SHRED_HEADERS: usize = 88; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 9063e7b0e..c1800998c 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -165,11 +165,11 @@ fn shouldDiscardShred( if (slot > max_slot) return true; switch (variant.shred_type) { .Code => { - if (index >= sig.shred_collector.MAX_CODE_SHREDS_PER_SLOT) return true; + if (index >= sig.shred_collector.coding_shred.max_per_slot) return true; if (slot <= root) return true; }, .Data => { - if (index >= sig.shred_collector.MAX_DATA_SHREDS_PER_SLOT) return true; + if (index >= sig.shred_collector.data_shred.max_per_slot) return true; const parent_offset = layout.getParentOffset(shred) orelse return true; const parent = slot -| @as(Slot, @intCast(parent_offset)); if (!verifyShredSlots(slot, parent, root)) return true; diff --git a/src/utils/bitflags.zig b/src/utils/bitflags.zig index 69b723894..8655c0541 100644 --- a/src/utils/bitflags.zig +++ b/src/utils/bitflags.zig @@ -6,10 +6,6 @@ pub fn BitFlags(comptime FlagEnum: type) type { pub const Flag = FlagEnum; - pub fn isSet(self: *const Self, flag: FlagEnum) bool { - return self.state & @intFromEnum(flag) == @intFromEnum(flag); - } - pub fn set(self: *Self, flag: FlagEnum) void { self.state |= @intFromEnum(flag); } @@ -17,5 +13,13 @@ pub fn BitFlags(comptime FlagEnum: type) type { pub fn unset(self: *Self, flag: FlagEnum) void { self.state &= ~@intFromEnum(flag); } + + pub fn isSet(self: *const Self, flag: FlagEnum) bool { + return self.state & @intFromEnum(flag) == @intFromEnum(flag); + } + + pub fn intersects(self: *const Self, flag: FlagEnum) bool { + return self.state & @intFromEnum(flag) != 0; + } }; } diff --git a/src/utils/math.zig b/src/utils/math.zig new file mode 100644 index 000000000..de83381a9 --- /dev/null +++ b/src/utils/math.zig @@ -0,0 +1,26 @@ +pub fn checkedAdd(a: anytype, b: anytype) error{IntegerOverflow}!@TypeOf(a) { + const sum = a + b; + if (@typeInfo(@TypeOf(a)).Int.signedness == .unsigned) { + return if (sum < a or sum < b) error.IntegerOverflow else sum; + } else { + return if (checkSignedSum(a, b, sum)) sum else error.IntegerOverflow; + } +} + +pub fn checkedSub(a: anytype, b: anytype) error{IntegerOverflow}!@TypeOf(a) { + if (@typeInfo(@TypeOf(a)).Int.signedness == .unsigned) { + return if (b > a) error.IntegerOverflow else a - b; + } else { + const diff = a - b; + return if (checkSignedSum(a, -b, diff)) diff else error.IntegerOverflow; + } +} + +fn checkSignedSum(a: anytype, b: anytype, sum: anytype) bool { + return a == 0 or + b == 0 or + a > 0 and b < 0 or + a < 0 and b > 0 or + a > 0 and sum > 0 or + a < 0 and sum < 0; +} From 4cf68e232646b9118137fa9c3313b38ab368b15f Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 30 May 2024 22:24:24 -0400 Subject: [PATCH 23/51] fix(shred-collector): remove debugging code and move up reply bytes --- src/shred_collector/shred_receiver.zig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index c1800998c..0970d252f 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -114,9 +114,6 @@ pub const ShredReceiver = struct { try self.handlePing(packet, responses); packet.set(.discard); } else { - const endpoint_str = try endpointToString(self.allocator, &packet.addr); - defer endpoint_str.deinit(); - // TODO set correct values once using snapshot + blockstore const root = 0; const max_slot = std.math.maxInt(Slot); @@ -140,12 +137,9 @@ pub const ShredReceiver = struct { const reply = RepairMessage{ .Pong = try Pong.init(&ping, self.keypair) }; const reply_packet = try responses.addOne(); - reply_packet.addr = packet.addr; const reply_bytes = try bincode.writeToSlice(&reply_packet.data, reply, .{}); reply_packet.size = reply_bytes.len; - - const endpoint_str = try endpointToString(self.allocator, &packet.addr); - defer endpoint_str.deinit(); + reply_packet.addr = packet.addr; } }; From f3dbc6b383c3ebbc7c0bb800ee68de58b617def3 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 30 May 2024 22:48:01 -0400 Subject: [PATCH 24/51] refactor(RecyclingList): readability with else --- src/utils/arraylist.zig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/utils/arraylist.zig b/src/utils/arraylist.zig index 4ea73f5c2..316c3299a 100644 --- a/src/utils/arraylist.zig +++ b/src/utils/arraylist.zig @@ -103,11 +103,12 @@ pub fn RecyclingList( resetItem(item); self.len += 1; return item; + } else { + const item = try self.private.addOne(); + item.* = initBlank(self.private.allocator); + self.len += 1; + return item; } - const item = try self.private.addOne(); - item.* = initBlank(self.private.allocator); - self.len += 1; - return item; } pub fn drop(self: *Self, n: usize) void { From e77e4b6b93775af8637746b20eb3570854c1d4c5 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Thu, 30 May 2024 23:11:39 -0400 Subject: [PATCH 25/51] fix(shred-collector): set repair flag and use bitflags in packet --- src/net/packet.zig | 13 ++++--------- src/net/socket_utils.zig | 2 +- src/shred_collector/repair_service.zig | 1 - src/shred_collector/shred.zig | 2 +- src/shred_collector/shred_processor.zig | 2 +- src/shred_collector/shred_receiver.zig | 10 ++++++---- src/shred_collector/shred_verifier.zig | 4 ++-- src/utils/bitflags.zig | 2 +- 8 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/net/packet.zig b/src/net/packet.zig index fa23c41c2..23988a6e3 100644 --- a/src/net/packet.zig +++ b/src/net/packet.zig @@ -1,4 +1,7 @@ const network = @import("zig-network"); +const sig = @import("../lib.zig"); + +const BitFlags = sig.utils.BitFlags; /// Maximum over-the-wire size of a Transaction /// 1280 is IPv6 minimum MTU @@ -10,7 +13,7 @@ pub const Packet = struct { data: [PACKET_DATA_SIZE]u8, size: usize, addr: network.EndPoint, - flags: u8 = 0, + flags: BitFlags(Flag) = .{}, const Self = @This(); @@ -29,14 +32,6 @@ pub const Packet = struct { .size = 0, }; } - - pub fn set(self: *Self, flag: Flag) void { - self.flags |= @intFromEnum(flag); - } - - pub fn isSet(self: *const Self, flag: Flag) bool { - return self.flags & @intFromEnum(flag) == @intFromEnum(flag); - } }; /// TODO this violates separation of concerns. it's unusual for network-specific diff --git a/src/net/socket_utils.zig b/src/net/socket_utils.zig index d3b815eeb..e439be1b5 100644 --- a/src/net/socket_utils.zig +++ b/src/net/socket_utils.zig @@ -97,7 +97,7 @@ pub fn recvMmsg( } packet.addr = recv_meta.sender; packet.size = bytes_read; - packet.flags = 0; + packet.flags = .{}; if (count == 0) { // nonblocking mode diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 8fe9440ea..9a71afc2b 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -238,7 +238,6 @@ pub const RepairRequester = struct { const packet = packet_batch.addOneAssumeCapacity(); packet.* = Packet{ .addr = request.recipient_addr.toEndpoint(), - .flags = 0, .data = undefined, .size = undefined, }; diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 4ae5ffd8c..825831713 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -448,7 +448,7 @@ pub const shred_layout = struct { } pub fn getShredSize(packet: *const Packet) usize { - return if (packet.isSet(.repair)) + return if (packet.flags.isSet(.repair)) packet.size -| @sizeOf(Nonce) else packet.size; diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index c378e4a87..0c85549b6 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -26,7 +26,7 @@ pub fn processShreds( continue; } for (buf.items) |packet_batch| { - for (packet_batch.items) |*packet| if (!packet.isSet(.discard)) { + for (packet_batch.items) |*packet| if (!packet.flags.isSet(.discard)) { const shred_payload = layout.getShred(packet) orelse continue; const slot = layout.getSlot(shred_payload) orelse continue; const index = layout.getIndex(shred_payload) orelse continue; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 0970d252f..590b96456 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -62,12 +62,12 @@ pub const ShredReceiver = struct { const x = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, tvu_receivers, sender.channel }, + .{ self, tvu_receivers, sender.channel, false }, ); const y = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, .{repair_receiver.channel}, sender.channel }, + .{ self, .{repair_receiver.channel}, sender.channel, true }, ); x.join(); y.join(); @@ -79,6 +79,7 @@ pub const ShredReceiver = struct { self: *Self, receivers: anytype, sender: *Channel(ArrayList(Packet)), + comptime is_repair: bool, ) !void { var buf = ArrayList(ArrayList(Packet)).init(self.allocator); while (!self.exit.load(.unordered)) { @@ -90,6 +91,7 @@ pub const ShredReceiver = struct { for (buf.items) |batch| { for (batch.items) |*packet| { try self.handlePacket(packet, &responses, shred_version); + if (is_repair) packet.flags.set(.repair); } try self.outgoing_shred_channel.send(batch); } @@ -112,13 +114,13 @@ pub const ShredReceiver = struct { ) !void { if (packet.size == REPAIR_RESPONSE_SERIALIZED_PING_BYTES) { try self.handlePing(packet, responses); - packet.set(.discard); + packet.flags.set(.discard); } else { // TODO set correct values once using snapshot + blockstore const root = 0; const max_slot = std.math.maxInt(Slot); if (shouldDiscardShred(packet, root, shred_version, max_slot)) { - packet.set(.discard); + packet.flags.set(.discard); } } } diff --git a/src/shred_collector/shred_verifier.zig b/src/shred_collector/shred_verifier.zig index 3f33f74e4..856f8e9e1 100644 --- a/src/shred_collector/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -28,7 +28,7 @@ pub fn runShredSignatureVerification( // TODO parallelize this once it's actually verifying signatures for (packet_batch.items) |*packet| { if (!verifyShred(packet, &leader_schedule)) { - packet.set(.discard); + packet.flags.set(.discard); } else { verified_count += 1; } @@ -41,7 +41,7 @@ pub fn runShredSignatureVerification( /// verify_shred_cpu fn verifyShred(packet: *const Packet, leader_schedule: *const LeaderScheduleCalculator) bool { - if (packet.isSet(.discard)) return false; + if (packet.flags.isSet(.discard)) return false; const shred = shred_layout.getShred(packet) orelse return false; const slot = shred_layout.getSlot(shred) orelse return false; const signature = shred_layout.getSignature(shred) orelse return false; diff --git a/src/utils/bitflags.zig b/src/utils/bitflags.zig index 8655c0541..cdabe827a 100644 --- a/src/utils/bitflags.zig +++ b/src/utils/bitflags.zig @@ -1,6 +1,6 @@ pub fn BitFlags(comptime FlagEnum: type) type { return packed struct { - state: @typeInfo(FlagEnum).Enum.tag_type, + state: @typeInfo(FlagEnum).Enum.tag_type = 0, const Self = @This(); From 5df91284de632ea43b31b4f940ed6d17b8590b5b Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 31 May 2024 18:21:50 -0400 Subject: [PATCH 26/51] refactor(shred_collector): consolidate config --- src/cmd/cmd.zig | 38 +++++++++++++++++++------------------- src/cmd/config.zig | 11 ++++++----- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index bffbdfa10..042e0ed4c 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -82,7 +82,7 @@ var gossip_port_option = cli.Option{ var repair_port_option = cli.Option{ .long_name = "repair-port", .help = "The port to run tvu repair listener - default: 8002", - .value_ref = cli.mkRef(&config.current.tvu.repair_port), + .value_ref = cli.mkRef(&config.current.shred_collector.repair_port), .required = false, .value_name = "Repair Port", }; @@ -90,7 +90,7 @@ var repair_port_option = cli.Option{ var tvu_port_option = cli.Option{ .long_name = "tvu-port", .help = "The port to run turbine listener - default: 8003", - .value_ref = cli.mkRef(&config.current.tvu.tvu_port), + .value_ref = cli.mkRef(&config.current.shred_collector.tvu_port), .required = false, .value_name = "TVU Port", }; @@ -98,7 +98,7 @@ var tvu_port_option = cli.Option{ var test_repair_option = cli.Option{ .long_name = "test-repair-for-slot", .help = "Set a slot here to repeatedly send repair requests for shreds from this slot. This is only intended for use during short-lived tests of the repair service. Do not set this during normal usage.", - .value_ref = cli.mkRef(&config.current.tvu.test_repair_slot), + .value_ref = cli.mkRef(&config.current.shred_collector.start_slot), .required = false, .value_name = "slot number", }; @@ -399,8 +399,8 @@ fn validator() !void { defer entrypoints.deinit(); const ip_echo_data = try getMyDataFromIpEcho(logger, entrypoints.items); - const repair_port: u16 = config.current.tvu.repair_port; - const tvu_port: u16 = config.current.tvu.repair_port; + const repair_port: u16 = config.current.shred_collector.repair_port; + const tvu_port: u16 = config.current.shred_collector.repair_port; // gossip var gossip_service = try initGossip( @@ -419,20 +419,20 @@ fn validator() !void { const gossip_handle = try std.Thread.spawn(.{}, runGossipWithConfigValues, .{&gossip_service}); // shred collector - var shred_collector = try sig.shred_collector.start(.{ - .start_slot = if (config.current.tvu.test_repair_slot) |n| @intCast(n) else null, - .repair_port = repair_port, - .tvu_port = tvu_port, - }, .{ - .allocator = gpa_allocator, - .logger = logger, - .random = rand.random(), - .my_keypair = &my_keypair, - }, .{ - .exit = &exit, - .gossip_table_rw = &gossip_service.gossip_table_rw, - .my_shred_version = &gossip_service.my_shred_version, - }); + var shred_collector = try sig.shred_collector.start( + config.current.shred_collector, + .{ + .allocator = gpa_allocator, + .logger = logger, + .random = rand.random(), + .my_keypair = &my_keypair, + }, + .{ + .exit = &exit, + .gossip_table_rw = &gossip_service.gossip_table_rw, + .my_shred_version = &gossip_service.my_shred_version, + }, + ); defer shred_collector.deinit(); // accounts db diff --git a/src/cmd/config.zig b/src/cmd/config.zig index f09dee53f..c726096e0 100644 --- a/src/cmd/config.zig +++ b/src/cmd/config.zig @@ -1,9 +1,10 @@ const ACCOUNT_INDEX_BINS = @import("../accountsdb/db.zig").ACCOUNT_INDEX_BINS; +const ShredCollectorConfig = @import("../shred_collector/service.zig").ShredCollectorConfig; pub const Config = struct { identity: IdentityConfig = .{}, gossip: GossipConfig = .{}, - tvu: TvuConfig = .{}, + shred_collector: ShredCollectorConfig = shred_collector_defaults, accounts_db: AccountsDbConfig = .{}, // general config log_level: []const u8 = "debug", @@ -24,10 +25,10 @@ const GossipConfig = struct { trusted_validators: [][]const u8 = &.{}, }; -const TvuConfig = struct { - tvu_port: u16 = 8002, - repair_port: u16 = 8003, - test_repair_slot: ?u64 = null, +const shred_collector_defaults = ShredCollectorConfig{ + .tvu_port = 8002, + .repair_port = 8003, + .start_slot = null, }; const AccountsDbConfig = struct { From 79c9b4015e6a93232762d67918b0e4fa24d78370 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 31 May 2024 18:25:06 -0400 Subject: [PATCH 27/51] refactor(shred_collector): rename service manager variable --- src/shred_collector/service.zig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index c9f1afdc5..76d2d8def 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -64,8 +64,8 @@ pub fn start( deps: ShredCollectorDependencies, interface: ShredCollectorInterface, ) !ServiceManager { - var shred_collector = ServiceManager.init(deps.allocator, deps.logger, interface.exit); - var arena = shred_collector.arena(); + var service_manager = ServiceManager.init(deps.allocator, deps.logger, interface.exit); + var arena = service_manager.arena(); const repair_socket = try bindUdpReusable(conf.repair_port); const tvu_socket = try bindUdpReusable(conf.tvu_port); @@ -94,7 +94,7 @@ pub fn start( interface.exit, ); const repair_svc = try arena.create(RepairService); - try shred_collector.defers.deferCall(RepairService.deinit, .{repair_svc}); + try service_manager.defers.deferCall(RepairService.deinit, .{repair_svc}); repair_svc.* = RepairService.init( deps.allocator, deps.logger, @@ -103,7 +103,7 @@ pub fn start( repair_peer_provider, shred_tracker, ); - try shred_collector.spawn( + try service_manager.spawn( RepairService.run_config, RepairService.sendNecessaryRepairs, .{repair_svc}, @@ -129,23 +129,23 @@ pub fn start( .outgoing_shred_channel = unverified_shreds_channel, .shred_version = interface.my_shred_version, }; - try shred_collector.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); + try service_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); // verifier (thread) - try shred_collector.spawn( + try service_manager.spawn( .{ .name = "Shred Verifier" }, sig.shred_collector.runShredSignatureVerification, .{ interface.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, ); // processor (thread) - try shred_collector.spawn( + try service_manager.spawn( .{ .name = "Shred Processor" }, sig.shred_collector.processShreds, .{ deps.allocator, verified_shreds_channel, shred_tracker }, ); - return shred_collector; + return service_manager; } fn bindUdpReusable(port: u16) !Socket { From e32c8e0490865ae7f1c9188f28724ae268b3da4a Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 31 May 2024 18:29:40 -0400 Subject: [PATCH 28/51] docs(shred_collector): BasicShredTracker --- src/shred_collector/shred_tracker.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index 0ccb8da29..0bc9cc13c 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -16,6 +16,9 @@ pub const Range = struct { end: ?usize, }; +/// This is a temporary placeholder that will be replaced by the Blockstore +/// once it is implemented. This struct tracks shreds linearly with no regard +/// for forking. The Blockstore will fix this by tracking forks. pub const BasicShredTracker = struct { logger: sig.trace.Logger, mux: Mutex = .{}, From 73ed6b0acee76c7b9d448723c6e4753d77dfc02b Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 31 May 2024 19:00:19 -0400 Subject: [PATCH 29/51] refactor(shred_collector): reorder startup --- src/shred_collector/service.zig | 72 ++++++++++++++++----------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 76d2d8def..d21797dfc 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -70,6 +70,35 @@ pub fn start( const repair_socket = try bindUdpReusable(conf.repair_port); const tvu_socket = try bindUdpReusable(conf.tvu_port); + // receiver (threads) + const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( + deps.allocator, + 1000, + ); + const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( + deps.allocator, + 1000, + ); + const shred_receiver = try arena.create(ShredReceiver); + shred_receiver.* = ShredReceiver{ + .allocator = deps.allocator, + .keypair = deps.my_keypair, + .exit = interface.exit, + .logger = deps.logger, + .repair_socket = repair_socket, + .tvu_socket = tvu_socket, + .outgoing_shred_channel = unverified_shreds_channel, + .shred_version = interface.my_shred_version, + }; + try service_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); + + // verifier (thread) + try service_manager.spawn( + .{ .name = "Shred Verifier" }, + sig.shred_collector.runShredSignatureVerification, + .{ interface.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, + ); + // tracker (shared state, internal to Shred Collector) const shred_tracker = try arena.create(BasicShredTracker); shred_tracker.* = BasicShredTracker.init( @@ -77,6 +106,13 @@ pub fn start( deps.logger, ); + // processor (thread) + try service_manager.spawn( + .{ .name = "Shred Processor" }, + sig.shred_collector.processShreds, + .{ deps.allocator, verified_shreds_channel, shred_tracker }, + ); + // repair (thread) const repair_peer_provider = try RepairPeerProvider.init( deps.allocator, @@ -109,42 +145,6 @@ pub fn start( .{repair_svc}, ); - // receiver (threads) - const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( - deps.allocator, - 1000, - ); - const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( - deps.allocator, - 1000, - ); - const shred_receiver = try arena.create(ShredReceiver); - shred_receiver.* = ShredReceiver{ - .allocator = deps.allocator, - .keypair = deps.my_keypair, - .exit = interface.exit, - .logger = deps.logger, - .repair_socket = repair_socket, - .tvu_socket = tvu_socket, - .outgoing_shred_channel = unverified_shreds_channel, - .shred_version = interface.my_shred_version, - }; - try service_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); - - // verifier (thread) - try service_manager.spawn( - .{ .name = "Shred Verifier" }, - sig.shred_collector.runShredSignatureVerification, - .{ interface.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, - ); - - // processor (thread) - try service_manager.spawn( - .{ .name = "Shred Processor" }, - sig.shred_collector.processShreds, - .{ deps.allocator, verified_shreds_channel, shred_tracker }, - ); - return service_manager; } From 8c1c4609838e3c1e721419a197c11b7256cf415a Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 3 Jun 2024 18:02:48 -0400 Subject: [PATCH 30/51] refactor(lru): camel case functions --- src/common/lru.zig | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/common/lru.zig b/src/common/lru.zig index a305133e8..791b2519c 100644 --- a/src/common/lru.zig +++ b/src/common/lru.zig @@ -109,7 +109,7 @@ pub fn LruCacheCustom( /// Recycles an old node if LruCache capacity is full. If replaced, first element of tuple is replaced /// Entry (otherwise null) and second element of tuple is inserted Entry. - fn internal_recycle_or_create_node(self: *Self, key: K, value: V) error{OutOfMemory}!struct { ?LruEntry, LruEntry } { + fn internalRecycleOrCreateNode(self: *Self, key: K, value: V) error{OutOfMemory}!struct { ?LruEntry, LruEntry } { if (self.dbl_link_list.len == self.max_items) { const recycled_node = self.dbl_link_list.popFirst().?; deinitFn(&recycled_node.data.value, self.deinit_context); @@ -133,15 +133,15 @@ pub fn LruCacheCustom( return .{ null, node.data }; } - fn internal_insert(self: *Self, key: K, value: V) LruEntry { + fn internalInsert(self: *Self, key: K, value: V) LruEntry { // if key exists, we update it if (self.hashmap.get(key)) |existing_node| { existing_node.data.value = value; - self.internal_reorder(existing_node); + self.internalReorder(existing_node); return existing_node.data; } - const replaced_and_created_node = self.internal_recycle_or_create_node(key, value) catch |e| { + const replaced_and_created_node = self.internalRecycleOrCreateNode(key, value) catch |e| { std.debug.panic("recycle_or_create_node returned error: {any}", .{e}); }; const new_lru_entry = replaced_and_created_node[1]; @@ -154,7 +154,7 @@ pub fn LruCacheCustom( if (kind == .locking) self.mux.lock(); defer if (kind == .locking) self.mux.unlock(); - _ = self.internal_insert(key, value); + _ = self.internalInsert(key, value); return; } @@ -190,7 +190,7 @@ pub fn LruCacheCustom( } // reorder Node to the top - fn internal_reorder(self: *Self, node: *Node) void { + fn internalReorder(self: *Self, node: *Node) void { self.dbl_link_list.remove(node); self.dbl_link_list.append(node); } @@ -242,11 +242,11 @@ pub fn LruCacheCustom( var existing_node: *Node = existing_entry.value_ptr.*; const old_value = existing_node.data.value; existing_node.data.value = value; - self.internal_reorder(existing_node); + self.internalReorder(existing_node); return old_value; } - _ = self.internal_insert(key, value); + _ = self.internalInsert(key, value); return null; } @@ -319,7 +319,7 @@ test "common.lru: locked put is thread safe" { defer cache.deinit(); var threads = std.ArrayList(std.Thread).init(testing.allocator); defer threads.deinit(); - for (0..2) |_| try threads.append(try std.Thread.spawn(.{}, test_put, .{ &cache, 1 })); + for (0..2) |_| try threads.append(try std.Thread.spawn(.{}, testPut, .{ &cache, 1 })); for (threads.items) |thread| thread.join(); } @@ -328,13 +328,13 @@ test "common.lru: locked insert is thread safe" { defer cache.deinit(); var threads = std.ArrayList(std.Thread).init(testing.allocator); defer threads.deinit(); - for (0..2) |_| try threads.append(try std.Thread.spawn(.{}, test_insert, .{ &cache, 1 })); + for (0..2) |_| try threads.append(try std.Thread.spawn(.{}, testInsert, .{ &cache, 1 })); for (threads.items) |thread| thread.join(); } -fn test_put(lru_cache: *LruCache(.locking, usize, usize), k: usize) void { +fn testPut(lru_cache: *LruCache(.locking, usize, usize), k: usize) void { _ = lru_cache.put(k, 2); } -fn test_insert(lru_cache: *LruCache(.locking, usize, usize), k: usize) void { +fn testInsert(lru_cache: *LruCache(.locking, usize, usize), k: usize) void { _ = lru_cache.insert(k, 2) catch unreachable; } From d8e65f73745481c2a9c03d6074c100c5e8565968 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 3 Jun 2024 20:28:36 -0400 Subject: [PATCH 31/51] refactor(shred-collector): more consistent naming and document component relationships --- src/shred_collector/service.zig | 23 ++++++++++------------- src/shred_collector/shred_processor.zig | 7 ++++--- src/shred_collector/shred_receiver.zig | 5 +++-- src/shred_collector/shred_verifier.zig | 14 ++++++++------ 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index d21797dfc..4ff1d55eb 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -3,13 +3,16 @@ const network = @import("zig-network"); const sig = @import("../lib.zig"); const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = network.Socket; +const Channel = sig.sync.Channel; const GossipTable = sig.gossip.GossipTable; const Logger = sig.trace.Logger; +const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; const RwMux = sig.sync.RwMux; const ServiceManager = sig.utils.ServiceManager; @@ -71,14 +74,8 @@ pub fn start( const tvu_socket = try bindUdpReusable(conf.tvu_port); // receiver (threads) - const unverified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( - deps.allocator, - 1000, - ); - const verified_shreds_channel = sig.sync.Channel(std.ArrayList(sig.net.Packet)).init( - deps.allocator, - 1000, - ); + const unverified_shred_channel = Channel(ArrayList(Packet)).init(deps.allocator, 1000); + const verified_shred_channel = Channel(ArrayList(Packet)).init(deps.allocator, 1000); const shred_receiver = try arena.create(ShredReceiver); shred_receiver.* = ShredReceiver{ .allocator = deps.allocator, @@ -87,7 +84,7 @@ pub fn start( .logger = deps.logger, .repair_socket = repair_socket, .tvu_socket = tvu_socket, - .outgoing_shred_channel = unverified_shreds_channel, + .unverified_shred_channel = unverified_shred_channel, .shred_version = interface.my_shred_version, }; try service_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); @@ -95,8 +92,8 @@ pub fn start( // verifier (thread) try service_manager.spawn( .{ .name = "Shred Verifier" }, - sig.shred_collector.runShredSignatureVerification, - .{ interface.exit, unverified_shreds_channel, verified_shreds_channel, .{} }, + sig.shred_collector.runShredVerifier, + .{ interface.exit, unverified_shred_channel, verified_shred_channel, .{} }, ); // tracker (shared state, internal to Shred Collector) @@ -109,8 +106,8 @@ pub fn start( // processor (thread) try service_manager.spawn( .{ .name = "Shred Processor" }, - sig.shred_collector.processShreds, - .{ deps.allocator, verified_shreds_channel, shred_tracker }, + sig.shred_collector.runShredProcessor, + .{ deps.allocator, verified_shred_channel, shred_tracker }, ); // repair (thread) diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index 0c85549b6..d166fdba0 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -12,15 +12,16 @@ const Packet = sig.net.Packet; const Shred = sig.shred_collector.Shred; /// Analogous to [WindowService](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/window_service.rs#L395) -pub fn processShreds( +pub fn runShredProcessor( allocator: Allocator, - verified_shreds: *Channel(ArrayList(Packet)), + // shred verifier --> me + verified_shred_channel: *Channel(ArrayList(Packet)), tracker: *BasicShredTracker, ) !void { var processed_count: usize = 0; var buf = ArrayList(ArrayList(Packet)).init(allocator); while (true) { - try verified_shreds.tryDrainRecycle(&buf); + try verified_shred_channel.tryDrainRecycle(&buf); if (buf.items.len == 0) { std.time.sleep(10 * std.time.ns_per_ms); continue; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 590b96456..264a6ec2f 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -30,7 +30,8 @@ pub const ShredReceiver = struct { logger: Logger, repair_socket: Socket, tvu_socket: Socket, - outgoing_shred_channel: *Channel(ArrayList(Packet)), + /// me --> shred verifier + unverified_shred_channel: *Channel(ArrayList(Packet)), shred_version: *const Atomic(u16), const Self = @This(); @@ -93,7 +94,7 @@ pub const ShredReceiver = struct { try self.handlePacket(packet, &responses, shred_version); if (is_repair) packet.flags.set(.repair); } - try self.outgoing_shred_channel.send(batch); + try self.unverified_shred_channel.send(batch); } if (responses.items.len > 0) { try sender.send(responses); diff --git a/src/shred_collector/shred_verifier.zig b/src/shred_collector/shred_verifier.zig index 856f8e9e1..505071a76 100644 --- a/src/shred_collector/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -10,16 +10,18 @@ const Channel = sig.sync.Channel; const Packet = sig.net.Packet; /// Analogous to [run_shred_sigverify](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/turbine/src/sigverify_shreds.rs#L82) -pub fn runShredSignatureVerification( +pub fn runShredVerifier( exit: *Atomic(bool), - incoming: *Channel(ArrayList(Packet)), - verified: *Channel(ArrayList(Packet)), + /// shred receiver --> me + unverified_shred_channel: *Channel(ArrayList(Packet)), + /// me --> shred processor + verified_shred_channel: *Channel(ArrayList(Packet)), leader_schedule: LeaderScheduleCalculator, ) !void { var verified_count: usize = 0; - var buf: ArrayList(ArrayList(Packet)) = ArrayList(ArrayList(Packet)).init(incoming.allocator); + var buf = ArrayList(ArrayList(Packet)).init(unverified_shred_channel.allocator); while (true) { - try incoming.tryDrainRecycle(&buf); + try unverified_shred_channel.tryDrainRecycle(&buf); if (buf.items.len == 0) { std.time.sleep(10 * std.time.ns_per_ms); continue; @@ -33,7 +35,7 @@ pub fn runShredSignatureVerification( verified_count += 1; } } - try verified.send(packet_batch); + try verified_shred_channel.send(packet_batch); if (exit.load(.monotonic)) return; } } From 2bf571c6c6c975eb64b916d9b640b5ee40ac9b34 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Mon, 3 Jun 2024 20:39:02 -0400 Subject: [PATCH 32/51] refactor(shred-collector): elevate hardcoded thread config to top level consts --- src/shred_collector/repair_service.zig | 4 +++- src/shred_collector/shred_receiver.zig | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 9a71afc2b..284890181 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -33,6 +33,8 @@ const RepairMessage = sig.shred_collector.RepairMessage; const serializeRepairRequest = sig.shred_collector.serializeRepairRequest; +const repair_requester_threads = 4; + /// Identifies which repairs are needed and sends them /// - delegates to RepairPeerProvider to identify repair peers. /// - delegates to RepairRequester to send the requests. @@ -82,7 +84,7 @@ pub const RepairService = struct { .logger = logger, .exit = exit, .report = MultiSlotReport.init(allocator), - .thread_pool = RequestBatchThreadPool.init(allocator, 4), + .thread_pool = RequestBatchThreadPool.init(allocator, repair_requester_threads), }; } diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 264a6ec2f..f5614ea12 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -22,6 +22,8 @@ const SocketThread = sig.net.SocketThread; const endpointToString = sig.net.endpointToString; +const num_tvu_receivers = 2; + /// Analogous to [ShredFetchStage](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/shred_fetch_stage.rs#L34) pub const ShredReceiver = struct { allocator: Allocator, @@ -49,7 +51,6 @@ pub const ShredReceiver = struct { .initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); defer repair_receiver.deinit(); - const num_tvu_receivers = 2; var tvu_receivers: [num_tvu_receivers]*Channel(ArrayList(Packet)) = undefined; for (0..num_tvu_receivers) |i| { tvu_receivers[i] = (try SocketThread.initReceiver( From a50ffa3076f465e99d5939f0d6ff2f5bf119b02d Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 4 Jun 2024 09:47:16 -0400 Subject: [PATCH 33/51] perf(shred-collector): batch repair ping responses in larger chunks also rename ambiguous "sender" to "response_sender" --- src/shred_collector/shred_receiver.zig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index f5614ea12..36c31f0d4 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -44,9 +44,9 @@ pub const ShredReceiver = struct { defer self.logger.err("exiting shred receiver"); errdefer self.logger.err("error in shred receiver"); - var sender = try SocketThread + var response_sender = try SocketThread .initSender(self.allocator, self.logger, self.repair_socket, self.exit); - defer sender.deinit(); + defer response_sender.deinit(); var repair_receiver = try SocketThread .initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); defer repair_receiver.deinit(); @@ -64,12 +64,12 @@ pub const ShredReceiver = struct { const x = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, tvu_receivers, sender.channel, false }, + .{ self, tvu_receivers, response_sender.channel, false }, ); const y = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, .{repair_receiver.channel}, sender.channel, true }, + .{ self, .{repair_receiver.channel}, response_sender.channel, true }, ); x.join(); y.join(); @@ -80,13 +80,13 @@ pub const ShredReceiver = struct { fn runPacketHandler( self: *Self, receivers: anytype, - sender: *Channel(ArrayList(Packet)), + response_sender: *Channel(ArrayList(Packet)), comptime is_repair: bool, ) !void { var buf = ArrayList(ArrayList(Packet)).init(self.allocator); while (!self.exit.load(.unordered)) { + var responses = ArrayList(Packet).init(self.allocator); inline for (receivers) |receiver| { - var responses = ArrayList(Packet).init(self.allocator); try receiver.tryDrainRecycle(&buf); if (buf.items.len > 0) { const shred_version = self.shred_version.load(.monotonic); @@ -97,13 +97,13 @@ pub const ShredReceiver = struct { } try self.unverified_shred_channel.send(batch); } - if (responses.items.len > 0) { - try sender.send(responses); - } } else { std.time.sleep(10 * std.time.ns_per_ms); } } + if (responses.items.len > 0) { + try response_sender.send(responses); + } } } From 66007608e872ed437b7fddcc4eee249f769f2aba Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 4 Jun 2024 09:58:46 -0400 Subject: [PATCH 34/51] refactor(shred-collector): rename getSlot to getMonitoredSlot --- src/shred_collector/shred_tracker.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index 0bc9cc13c..51ec620c6 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -62,7 +62,7 @@ pub const BasicShredTracker = struct { self.maybeSetStart(slot); self.max_slot_seen = @max(self.max_slot_seen, slot); - const monitored_slot = try self.getSlot(slot); + const monitored_slot = try self.getMonitoredSlot(slot); const new = try monitored_slot.record(shred_index); if (new) self.logger.debugf("new slot: {}", .{slot}); self.max_slot_processed = @max(self.max_slot_processed, slot); @@ -73,7 +73,7 @@ pub const BasicShredTracker = struct { defer self.mux.unlock(); self.maybeSetStart(slot); - const monitored_slot = try self.getSlot(slot); + const monitored_slot = try self.getMonitoredSlot(slot); if (monitored_slot.last_shred) |old_last| { monitored_slot.last_shred = @min(old_last, index); } else { @@ -92,7 +92,7 @@ pub const BasicShredTracker = struct { const timestamp = std.time.milliTimestamp(); const last_slot_to_check = @max(self.max_slot_processed, self.current_bottom_slot); for (self.current_bottom_slot..last_slot_to_check + 1) |slot| { - const monitored_slot = try self.getSlot(slot); + const monitored_slot = try self.getMonitoredSlot(slot); if (monitored_slot.first_received_timestamp_ms + MIN_SLOT_AGE_TO_REPORT_AS_MISSING > timestamp) { continue; } @@ -113,7 +113,7 @@ pub const BasicShredTracker = struct { return true; } - fn getSlot(self: *Self, slot: Slot) error{ SlotUnderflow, SlotOverflow }!*MonitoredSlot { + fn getMonitoredSlot(self: *Self, slot: Slot) error{ SlotUnderflow, SlotOverflow }!*MonitoredSlot { if (slot > self.current_bottom_slot + num_slots - 1) { return error.SlotOverflow; } From cf2360236cb59b67626ae1cdec03dd83942a8d98 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Tue, 4 Jun 2024 22:28:07 -0400 Subject: [PATCH 35/51] refactor(lib): remove usingnamespace, each folder gets lib.zig --- build.zig | 1 + src/accountsdb/lib.zig | 24 +++++ src/bincode/bincode.zig | 43 ++++---- src/bloom/lib.zig | 8 ++ src/cmd/cmd.zig | 5 +- src/core/lib.zig | 30 ++++++ src/gossip/data.zig | 2 +- src/gossip/lib.zig | 26 +++++ src/lib.zig | 133 +++--------------------- src/net/lib.zig | 17 +++ src/net/net.zig | 2 +- src/net/packet.zig | 2 +- src/prometheus/lib.zig | 18 ++++ src/rpc/lib.zig | 3 + src/shred_collector/lib.zig | 16 +++ src/shred_collector/repair_service.zig | 17 +-- src/shred_collector/service.zig | 18 ++-- src/shred_collector/shred.zig | 29 +++--- src/shred_collector/shred_processor.zig | 7 +- src/shred_collector/shred_receiver.zig | 9 +- src/shred_collector/shred_tracker.zig | 5 +- src/shred_collector/shred_verifier.zig | 3 +- src/sync/lib.zig | 15 +++ src/tests.zig | 20 +++- src/trace/lib.zig | 10 ++ src/utils/arraylist.zig | 60 ----------- src/utils/collections.zig | 64 ++++++++++++ src/utils/lazy.zig | 2 +- src/utils/lib.zig | 11 ++ src/utils/service.zig | 2 +- src/utils/varint.zig | 35 ------- 31 files changed, 353 insertions(+), 284 deletions(-) create mode 100644 src/accountsdb/lib.zig create mode 100644 src/bloom/lib.zig create mode 100644 src/core/lib.zig create mode 100644 src/gossip/lib.zig create mode 100644 src/net/lib.zig create mode 100644 src/prometheus/lib.zig create mode 100644 src/rpc/lib.zig create mode 100644 src/shred_collector/lib.zig create mode 100644 src/sync/lib.zig create mode 100644 src/trace/lib.zig create mode 100644 src/utils/collections.zig create mode 100644 src/utils/lib.zig diff --git a/build.zig b/build.zig index f25bd1ed4..9cfbefbd7 100644 --- a/build.zig +++ b/build.zig @@ -79,6 +79,7 @@ pub fn build(b: *Build) void { }); b.installArtifact(unit_tests); unit_tests.root_module.addImport("base58-zig", base58_module); + unit_tests.root_module.addImport("curl", curl_mod); unit_tests.root_module.addImport("httpz", httpz_mod); unit_tests.root_module.addImport("zig-network", zig_network_module); unit_tests.root_module.addImport("zstd", zstd_mod); diff --git a/src/accountsdb/lib.zig b/src/accountsdb/lib.zig new file mode 100644 index 000000000..e5e50f3a3 --- /dev/null +++ b/src/accountsdb/lib.zig @@ -0,0 +1,24 @@ +pub const _private = struct { + pub const accounts_file = @import("accounts_file.zig"); + pub const bank = @import("bank.zig"); + pub const db = @import("db.zig"); + pub const download = @import("download.zig"); + pub const genesis_config = @import("genesis_config.zig"); + pub const index = @import("index.zig"); + pub const snapshots = @import("snapshots.zig"); + pub const sysvars = @import("sysvars.zig"); +}; + +pub const AccountsDB = _private.db.AccountsDB; +pub const AccountsDBConfig = _private.db.AccountsDBConfig; +pub const AllSnapshotFields = _private.snapshots.AllSnapshotFields; +pub const Bank = _private.bank.Bank; +pub const GenesisConfig = _private.genesis_config.GenesisConfig; +pub const SnapshotFieldsAndPaths = _private.snapshots.SnapshotFieldsAndPaths; +pub const SnapshotFiles = _private.snapshots.SnapshotFiles; +pub const StatusCache = _private.snapshots.StatusCache; + +pub const downloadSnapshotsFromGossip = _private.download.downloadSnapshotsFromGossip; +pub const parallelUnpackZstdTarBall = _private.snapshots.parallelUnpackZstdTarBall; + +pub const ACCOUNT_INDEX_BINS = _private.db.ACCOUNT_INDEX_BINS; diff --git a/src/bincode/bincode.zig b/src/bincode/bincode.zig index e07fa57e2..c353de9f3 100644 --- a/src/bincode/bincode.zig +++ b/src/bincode/bincode.zig @@ -4,6 +4,13 @@ const testing = std.testing; const bincode = @This(); +pub const config = struct { + // TODO move these files to the bincode folder + pub const arraylist = @import("../utils/arraylist.zig"); + pub const shortvec = @import("../utils/shortvec.zig"); + pub const varint = @import("../utils/varint.zig"); +}; + pub const Params = struct { pub const legacy: Params = .{ .endian = .little, @@ -110,24 +117,24 @@ pub fn read(allocator: std.mem.Allocator, comptime U: type, reader: anytype, par } else { inline for (info.fields) |field| { if (field.is_comptime) continue; - if (getFieldConfig(T, field)) |config| { - if (shouldUseDefaultValue(field, config)) |default_value| { + if (getFieldConfig(T, field)) |field_config| { + if (shouldUseDefaultValue(field, field_config)) |default_value| { @field(data, field.name) = @as(*const field.type, @ptrCast(@alignCast(default_value))).*; continue; } - if (config.deserializer) |deser_fcn| { + if (field_config.deserializer) |deser_fcn| { @field(data, field.name) = try deser_fcn(allocator, reader, params); continue; } - if (config.default_on_eof) { + if (field_config.default_on_eof) { const field_type = field.type; @field(data, field.name) = bincode.read(allocator, field_type, reader, params) catch |err| blk: { if (err == error.EndOfStream) { if (field.default_value) |default_value| { break :blk @as(*const field_type, @ptrCast(@alignCast(default_value))).*; - } else if (config.default_fn) |default_fcn| { + } else if (field_config.default_fn) |default_fcn| { break :blk default_fcn(allocator); } else { return error.MissingFieldDefaultValue; @@ -346,8 +353,8 @@ pub fn free(allocator: std.mem.Allocator, value: anytype) void { } val.deinit(); } else inline for (info.fields) |field| { - if (getFieldConfig(T, field)) |config| { - if (config.free) |free_fcn| { + if (getFieldConfig(T, field)) |field_config| { + if (field_config.free) |free_fcn| { var field_value = @field(value, field.name); switch (@typeInfo(field.type)) { .Pointer => |*field_info| { @@ -412,8 +419,8 @@ pub fn write(writer: anytype, data: anytype, params: bincode.Params) !void { .Type, .Void, .NoReturn, .Undefined, .Null, .Fn, .Opaque, .Frame, .AnyFrame => return, .Bool => return writer.writeByte(@intFromBool(data)), .Enum => |_| { - if (getConfig(T)) |config| { - if (config.serializer) |serialize_fcn| { + if (getConfig(T)) |type_config| { + if (type_config.serializer) |serialize_fcn| { return serialize_fcn(writer, data, params); } } @@ -449,10 +456,10 @@ pub fn write(writer: anytype, data: anytype, params: bincode.Params) !void { inline for (info.fields) |field| { if (field.is_comptime) continue; - if (getFieldConfig(T, field)) |config| { - if (config.skip) { + if (getFieldConfig(T, field)) |field_config| { + if (field_config.skip) { continue; - } else if (config.serializer) |ser_fcn| { + } else if (field_config.serializer) |ser_fcn| { try ser_fcn(writer, @field(data, field.name), params); continue; } @@ -592,8 +599,8 @@ pub fn FieldConfig(comptime T: type) type { pub fn getConfig(comptime struct_type: type) ?FieldConfig(struct_type) { const bincode_field = "!bincode-config"; if (@hasDecl(struct_type, bincode_field)) { - const config = @field(struct_type, bincode_field); - return config; + const type_config = @field(struct_type, bincode_field); + return type_config; } return null; } @@ -601,14 +608,14 @@ pub fn getConfig(comptime struct_type: type) ?FieldConfig(struct_type) { pub fn getFieldConfig(comptime struct_type: type, comptime field: std.builtin.Type.StructField) ?FieldConfig(field.type) { const bincode_field = "!bincode-config:" ++ field.name; if (@hasDecl(struct_type, bincode_field)) { - const config = @field(struct_type, bincode_field); - return config; + const field_config = @field(struct_type, bincode_field); + return field_config; } return null; } -pub inline fn shouldUseDefaultValue(comptime field: std.builtin.Type.StructField, comptime config: FieldConfig(field.type)) ?*const anyopaque { - if (config.skip) { +pub inline fn shouldUseDefaultValue(comptime field: std.builtin.Type.StructField, comptime field_config: FieldConfig(field.type)) ?*const anyopaque { + if (field_config.skip) { if (field.default_value == null) { const field_type_name = @typeName(field.type); @compileError("┓\n|\n|--> Invalid config: cannot skip field '" ++ field_type_name ++ "." ++ field.name ++ "' deserialization if no default value set\n\n"); diff --git a/src/bloom/lib.zig b/src/bloom/lib.zig new file mode 100644 index 000000000..281cb69d0 --- /dev/null +++ b/src/bloom/lib.zig @@ -0,0 +1,8 @@ +pub const _private = struct { + pub const bit_set = @import("bit_set.zig"); + pub const bit_vec = @import("bit_vec.zig"); + pub const bitvec = @import("bitvec.zig"); + pub const bloom = @import("bloom.zig"); +}; + +pub const Bloom = _private.bloom.Bloom; diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index ed2b95162..52c27b459 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -16,7 +16,6 @@ const AccountsDB = sig.accounts_db.AccountsDB; const AccountsDBConfig = sig.accounts_db.AccountsDBConfig; const AllSnapshotFields = sig.accounts_db.AllSnapshotFields; const Bank = sig.accounts_db.Bank; -const BasicShredTracker = sig.shred_collector.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GenesisConfig = sig.accounts_db.GenesisConfig; const GossipService = sig.gossip.GossipService; @@ -24,15 +23,13 @@ const IpAddr = sig.net.IpAddr; const Level = sig.trace.Level; const Logger = sig.trace.Logger; const Pubkey = sig.core.Pubkey; -const RepairService = sig.shred_collector.RepairService; -const ShredReceiver = sig.shred_collector.ShredReceiver; const SnapshotFieldsAndPaths = sig.accounts_db.SnapshotFieldsAndPaths; const SnapshotFiles = sig.accounts_db.SnapshotFiles; const SocketAddr = sig.net.SocketAddr; const StatusCache = sig.accounts_db.StatusCache; const downloadSnapshotsFromGossip = sig.accounts_db.downloadSnapshotsFromGossip; -const enumFromName = sig.utils.enumFromName; +const enumFromName = sig.utils.types.enumFromName; const getOrInitIdentity = helpers.getOrInitIdentity; const globalRegistry = sig.prometheus.globalRegistry; const getWallclockMs = sig.gossip.getWallclockMs; diff --git a/src/core/lib.zig b/src/core/lib.zig new file mode 100644 index 000000000..4825f686c --- /dev/null +++ b/src/core/lib.zig @@ -0,0 +1,30 @@ +pub const _private = struct { + pub const account = @import("account.zig"); + pub const hard_forks = @import("hard_forks.zig"); + pub const hash = @import("hash.zig"); + pub const pubkey = @import("pubkey.zig"); + pub const shred = @import("shred.zig"); + pub const signature = @import("signature.zig"); + pub const time = @import("time.zig"); + pub const transaction = @import("transaction.zig"); +}; + +pub const Account = _private.account.Account; +pub const HardForks = _private.hard_forks.HardForks; +pub const HardFork = _private.hard_forks.HardFork; +pub const Hash = _private.hash.Hash; +pub const Nonce = _private.shred.Nonce; +pub const Pubkey = _private.pubkey.Pubkey; +pub const ShredVersion = _private.shred.ShredVersion; +pub const Signature = _private.signature.Signature; + +pub const Epoch = _private.time.Epoch; +pub const Slot = _private.time.Slot; + +pub const CompiledInstruction = _private.transaction.CompiledInstruction; +pub const Message = _private.transaction.Message; +pub const MessageHeader = _private.transaction.MessageHeader; +pub const Transaction = _private.transaction.Transaction; + +pub const SIGNATURE_LENGTH = _private.signature.SIGNATURE_LENGTH; +pub const HASH_SIZE = _private.hash.HASH_SIZE; diff --git a/src/gossip/data.zig b/src/gossip/data.zig index 62da4e25b..062607840 100644 --- a/src/gossip/data.zig +++ b/src/gossip/data.zig @@ -1013,7 +1013,7 @@ pub const ContactInfo = struct { const Self = @This(); pub fn toNodeInstance(self: *Self) NodeInstance { - return NodeInstance.init(self.Pubkey, @intCast(std.time.milliTimestamp())); + return NodeInstance.init(self.pubkey, @intCast(std.time.milliTimestamp())); } pub fn deinit(self: Self) void { diff --git a/src/gossip/lib.zig b/src/gossip/lib.zig new file mode 100644 index 000000000..c2fbbaaed --- /dev/null +++ b/src/gossip/lib.zig @@ -0,0 +1,26 @@ +pub const _private = struct { + pub const active_set = @import("active_set.zig"); + pub const data = @import("data.zig"); + pub const dump_service = @import("dump_service.zig"); + pub const fuzz = @import("fuzz.zig"); + pub const message = @import("message.zig"); + pub const ping_pong = @import("ping_pong.zig"); + pub const pull_request = @import("pull_request.zig"); + pub const pull_response = @import("pull_response.zig"); + pub const service = @import("service.zig"); + pub const shards = @import("shards.zig"); + pub const table = @import("table.zig"); +}; + +pub const data = _private.data; + +pub const ContactInfo = data.ContactInfo; +pub const GossipService = _private.service.GossipService; +pub const GossipTable = _private.table.GossipTable; +pub const SignedGossipData = data.SignedGossipData; +pub const LowestSlot = data.LowestSlot; +pub const Ping = _private.ping_pong.Ping; +pub const Pong = _private.ping_pong.Pong; + +pub const getWallclockMs = data.getWallclockMs; +pub const socket_tag = data.socket_tag; diff --git a/src/lib.zig b/src/lib.zig index 1ada9cb91..c489c04cf 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,120 +1,13 @@ -pub const rpc = struct { - // TODO: FIXME - // pub usingnamespace @import("rpc/client.zig"); - pub const types = struct { - pub usingnamespace @import("rpc/types.zig"); - }; -}; - -pub const core = struct { - pub usingnamespace @import("core/pubkey.zig"); - pub usingnamespace @import("core/account.zig"); - pub usingnamespace @import("core/transaction.zig"); - pub usingnamespace @import("core/hash.zig"); - pub usingnamespace @import("core/signature.zig"); - pub usingnamespace @import("core/time.zig"); - pub usingnamespace @import("core/hard_forks.zig"); - pub usingnamespace @import("core/shred.zig"); -}; - -pub const accounts_db = struct { - pub usingnamespace @import("accountsdb/db.zig"); - pub usingnamespace @import("accountsdb/bank.zig"); - pub usingnamespace @import("accountsdb/accounts_file.zig"); - pub usingnamespace @import("accountsdb/genesis_config.zig"); - pub usingnamespace @import("accountsdb/index.zig"); - pub usingnamespace @import("accountsdb/snapshots.zig"); - pub usingnamespace @import("accountsdb/sysvars.zig"); - pub usingnamespace @import("accountsdb/download.zig"); -}; - -pub const gossip = struct { - pub usingnamespace @import("gossip/data.zig"); - pub usingnamespace @import("gossip/table.zig"); - pub usingnamespace @import("gossip/service.zig"); - pub usingnamespace @import("gossip/message.zig"); - pub usingnamespace @import("gossip/pull_request.zig"); - pub usingnamespace @import("gossip/pull_response.zig"); - pub usingnamespace @import("gossip/shards.zig"); - pub usingnamespace @import("gossip/ping_pong.zig"); - pub usingnamespace @import("gossip/active_set.zig"); - pub usingnamespace @import("gossip/dump_service.zig"); -}; - -pub const bloom = struct { - pub usingnamespace @import("bloom/bit_vec.zig"); - pub usingnamespace @import("bloom/bit_set.zig"); - pub usingnamespace @import("bloom/bloom.zig"); -}; - -pub const version = struct { - pub usingnamespace @import("version/version.zig"); -}; - -pub const sync = struct { - pub usingnamespace @import("sync/channel.zig"); - pub usingnamespace @import("sync/mpmc.zig"); - pub usingnamespace @import("sync/ref.zig"); - pub usingnamespace @import("sync/mux.zig"); - pub usingnamespace @import("sync/once_cell.zig"); - pub usingnamespace @import("sync/thread_pool.zig"); -}; - -pub const utils = struct { - pub usingnamespace @import("utils/arraylist.zig"); - pub usingnamespace @import("utils/bitflags.zig"); - pub usingnamespace @import("utils/lazy.zig"); - pub usingnamespace @import("utils/math.zig"); - pub usingnamespace @import("utils/shortvec.zig"); - pub usingnamespace @import("utils/service.zig"); - pub usingnamespace @import("utils/thread.zig"); - pub usingnamespace @import("utils/types.zig"); - pub usingnamespace @import("utils/varint.zig"); -}; - -pub const trace = struct { - pub usingnamespace @import("trace/level.zig"); - pub usingnamespace @import("trace/log.zig"); - pub usingnamespace @import("trace/entry.zig"); -}; - -pub const common = struct { - pub usingnamespace @import("common/lru.zig"); - pub usingnamespace @import("common/merkle_tree.zig"); -}; - -pub const bincode = struct { - pub usingnamespace @import("bincode/bincode.zig"); -}; - -pub const cmd = struct { - pub usingnamespace @import("cmd/helpers.zig"); -}; - -pub const net = struct { - pub usingnamespace @import("net/net.zig"); - pub usingnamespace @import("net/echo.zig"); - pub usingnamespace @import("net/packet.zig"); - pub usingnamespace @import("net/socket_utils.zig"); -}; - -pub const prometheus = struct { - pub usingnamespace @import("prometheus/counter.zig"); - pub usingnamespace @import("prometheus/gauge.zig"); - pub usingnamespace @import("prometheus/gauge_fn.zig"); - pub usingnamespace @import("prometheus/http.zig"); - pub usingnamespace @import("prometheus/histogram.zig"); - pub usingnamespace @import("prometheus/metric.zig"); - pub usingnamespace @import("prometheus/registry.zig"); -}; - -pub const shred_collector = struct { - pub usingnamespace @import("shred_collector/repair_message.zig"); - pub usingnamespace @import("shred_collector/repair_service.zig"); - pub usingnamespace @import("shred_collector/shred_receiver.zig"); - pub usingnamespace @import("shred_collector/shred_verifier.zig"); - pub usingnamespace @import("shred_collector/shred.zig"); - pub usingnamespace @import("shred_collector/shred_tracker.zig"); - pub usingnamespace @import("shred_collector/shred_processor.zig"); - pub usingnamespace @import("shred_collector/service.zig"); -}; +pub const accounts_db = @import("accountsdb/lib.zig"); +pub const bincode = @import("bincode/bincode.zig"); +pub const bloom = @import("bloom/lib.zig"); +pub const core = @import("core/lib.zig"); +pub const gossip = @import("gossip/lib.zig"); +pub const net = @import("net/lib.zig"); +pub const prometheus = @import("prometheus/lib.zig"); +pub const rpc = @import("rpc/lib.zig"); +pub const shred_collector = @import("shred_collector/lib.zig"); +pub const sync = @import("sync/lib.zig"); +pub const trace = @import("trace/lib.zig"); +pub const utils = @import("utils/lib.zig"); +pub const version = @import("version/version.zig"); diff --git a/src/net/lib.zig b/src/net/lib.zig new file mode 100644 index 000000000..bba4ef988 --- /dev/null +++ b/src/net/lib.zig @@ -0,0 +1,17 @@ +pub const _private = struct { + pub const net = @import("net.zig"); + pub const echo = @import("echo.zig"); + pub const packet = @import("packet.zig"); + pub const socket_utils = @import("socket_utils.zig"); +}; + +pub const IpAddr = _private.net.IpAddr; +pub const SocketAddr = _private.net.SocketAddr; +pub const Packet = _private.packet.Packet; +pub const SocketThread = _private.socket_utils.SocketThread; + +pub const requestIpEcho = _private.echo.requestIpEcho; +pub const enablePortReuse = _private.net.enablePortReuse; +pub const endpointToString = _private.net.endpointToString; + +pub const SOCKET_TIMEOUT_US = _private.socket_utils.SOCKET_TIMEOUT_US; diff --git a/src/net/net.zig b/src/net/net.zig index 7bdebdb27..2106aa195 100644 --- a/src/net/net.zig +++ b/src/net/net.zig @@ -116,7 +116,7 @@ pub const SocketAddr = union(enum(u8)) { pub fn initIpv6(octets: [16]u8, portt: u16) Self { return Self{ - .V4 = .{ .ip = Ipv6Addr.init(octets), .port = portt }, + .V6 = .{ .ip = Ipv6Addr.init(octets), .port = portt, .flowinfo = 0, .scope_id = 0 }, }; } diff --git a/src/net/packet.zig b/src/net/packet.zig index 23988a6e3..dc8fe365e 100644 --- a/src/net/packet.zig +++ b/src/net/packet.zig @@ -1,7 +1,7 @@ const network = @import("zig-network"); const sig = @import("../lib.zig"); -const BitFlags = sig.utils.BitFlags; +const BitFlags = sig.utils.bitflags.BitFlags; /// Maximum over-the-wire size of a Transaction /// 1280 is IPv6 minimum MTU diff --git a/src/prometheus/lib.zig b/src/prometheus/lib.zig new file mode 100644 index 000000000..4aea291ae --- /dev/null +++ b/src/prometheus/lib.zig @@ -0,0 +1,18 @@ +pub const _private = struct { + pub const counter = @import("counter.zig"); + pub const gauge_fn = @import("gauge_fn.zig"); + pub const gauge = @import("gauge.zig"); + pub const histogram = @import("histogram.zig"); + pub const http = @import("http.zig"); + pub const metric = @import("metric.zig"); + pub const registry = @import("registry.zig"); +}; + +pub const Counter = _private.counter.Counter; +pub const GaugeFn = _private.gauge_fn.GaugeFn; +pub const Gauge = _private.gauge.Gauge; +pub const Histogram = _private.histogram.Histogram; +pub const Registry = _private.registry.Registry; + +pub const globalRegistry = _private.registry.globalRegistry; +pub const servePrometheus = _private.http.servePrometheus; diff --git a/src/rpc/lib.zig b/src/rpc/lib.zig new file mode 100644 index 000000000..13729a26b --- /dev/null +++ b/src/rpc/lib.zig @@ -0,0 +1,3 @@ +// pub const client = @import("client.zig"); // TODO fix compilation errors +pub const jsonrpc = @import("jsonrpc.zig"); +pub const types = @import("types.zig"); diff --git a/src/shred_collector/lib.zig b/src/shred_collector/lib.zig new file mode 100644 index 000000000..fe834023d --- /dev/null +++ b/src/shred_collector/lib.zig @@ -0,0 +1,16 @@ +pub const _private = struct { + pub const repair_message = @import("repair_message.zig"); + pub const repair_service = @import("repair_service.zig"); + pub const service = @import("service.zig"); + pub const shred_processor = @import("shred_processor.zig"); + pub const shred_receiver = @import("shred_receiver.zig"); + pub const shred_tracker = @import("shred_tracker.zig"); + pub const shred_verifier = @import("shred_verifier.zig"); + pub const shred = @import("shred.zig"); +}; + +pub const ShredCollectorConfig = _private.service.ShredCollectorConfig; +pub const ShredCollectorDependencies = _private.service.ShredCollectorDependencies; +pub const ShredCollectorInterface = _private.service.ShredCollectorInterface; + +pub const start = _private.service.start; diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 284890181..85d48c895 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -1,6 +1,7 @@ const std = @import("std"); const zig_network = @import("zig-network"); const sig = @import("../lib.zig"); +const shred_collector = @import("lib.zig")._private; const bincode = sig.bincode; const socket_tag = sig.gossip.socket_tag; @@ -12,13 +13,13 @@ const KeyPair = std.crypto.sign.Ed25519.KeyPair; const Random = std.rand.Random; const Socket = zig_network.Socket; -const BasicShredTracker = sig.shred_collector.BasicShredTracker; +const BasicShredTracker = shred_collector.shred_tracker.BasicShredTracker; const ContactInfo = sig.gossip.ContactInfo; const GossipTable = sig.gossip.GossipTable; -const HomogeneousThreadPool = sig.utils.HomogeneousThreadPool; +const HomogeneousThreadPool = sig.utils.thread.HomogeneousThreadPool; const Logger = sig.trace.Logger; -const LruCacheCustom = sig.common.LruCacheCustom; -const MultiSlotReport = sig.shred_collector.MultiSlotReport; +const LruCacheCustom = sig.utils.lru.LruCacheCustom; +const MultiSlotReport = shred_collector.shred_tracker.MultiSlotReport; const Nonce = sig.core.Nonce; const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; @@ -28,10 +29,10 @@ const SocketAddr = sig.net.SocketAddr; const SocketThread = sig.net.SocketThread; const Slot = sig.core.Slot; -const RepairRequest = sig.shred_collector.RepairRequest; -const RepairMessage = sig.shred_collector.RepairMessage; +const RepairRequest = shred_collector.repair_message.RepairRequest; +const RepairMessage = shred_collector.repair_message.RepairMessage; -const serializeRepairRequest = sig.shred_collector.serializeRepairRequest; +const serializeRepairRequest = shred_collector.repair_message.serializeRepairRequest; const repair_requester_threads = 4; @@ -96,7 +97,7 @@ pub const RepairService = struct { } /// Used to run RepairService continuously. - pub const run_config = sig.utils.RunConfig{ + pub const run_config = sig.utils.service_manager.RunConfig{ .name = "Repair Service", .min_loop_duration_ns = 100 * std.time.ns_per_ms, }; diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 58eecdefe..739d7c8bf 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -1,6 +1,7 @@ const std = @import("std"); const network = @import("zig-network"); const sig = @import("../lib.zig"); +const shred_collector = @import("lib.zig")._private; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; @@ -15,15 +16,14 @@ const Logger = sig.trace.Logger; const Packet = sig.net.Packet; const Pubkey = sig.core.Pubkey; const RwMux = sig.sync.RwMux; -const ServiceManager = sig.utils.ServiceManager; +const ServiceManager = sig.utils.service_manager.ServiceManager; const Slot = sig.core.Slot; -const this = sig.shred_collector; -const BasicShredTracker = this.BasicShredTracker; -const RepairPeerProvider = this.RepairPeerProvider; -const RepairRequester = this.RepairRequester; -const RepairService = this.RepairService; -const ShredReceiver = this.ShredReceiver; +const BasicShredTracker = shred_collector.shred_tracker.BasicShredTracker; +const RepairPeerProvider = shred_collector.repair_service.RepairPeerProvider; +const RepairRequester = shred_collector.repair_service.RepairRequester; +const RepairService = shred_collector.repair_service.RepairService; +const ShredReceiver = shred_collector.shred_receiver.ShredReceiver; /// Settings which tell the Shred Collector how to behave. pub const ShredCollectorConfig = struct { @@ -92,7 +92,7 @@ pub fn start( // verifier (thread) try service_manager.spawn( .{ .name = "Shred Verifier" }, - sig.shred_collector.runShredVerifier, + shred_collector.shred_verifier.runShredVerifier, .{ interface.exit, unverified_shred_channel, verified_shred_channel, .{} }, ); @@ -106,7 +106,7 @@ pub fn start( // processor (thread) try service_manager.spawn( .{ .name = "Shred Processor" }, - sig.shred_collector.runShredProcessor, + shred_collector.shred_processor.runShredProcessor, .{ deps.allocator, verified_shred_channel, shred_tracker }, ); diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 825831713..48bbf4320 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -6,15 +6,15 @@ const bincode = sig.bincode; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; -const BitFlags = sig.utils.BitFlags; +const BitFlags = sig.utils.bitflags.BitFlags; const Hash = sig.core.Hash; const Nonce = sig.core.Nonce; const Packet = sig.net.Packet; const Signature = sig.core.Signature; const Slot = sig.core.Slot; -const checkedAdd = sig.utils.checkedAdd; -const checkedSub = sig.utils.checkedSub; +const checkedAdd = sig.utils.math.checkedAdd; +const checkedSub = sig.utils.math.checkedSub; const SIGNATURE_LENGTH = sig.core.SIGNATURE_LENGTH; @@ -49,7 +49,7 @@ pub const Shred = union(ShredType) { } pub fn fromPayload(allocator: Allocator, payload: []const u8) !Self { - const variant = shred_layout.getShredVariant(payload) orelse return error.uygugj; + const variant = layout.getShredVariant(payload) orelse return error.uygugj; return switch (variant.shred_type) { .Code => .{ .Code = .{ .fields = try CodingShred.Fields.fromPayload(allocator, payload) } }, .Data => .{ .Data = .{ .fields = try DataShred.Fields.fromPayload(allocator, payload) } }, @@ -74,7 +74,7 @@ pub const Shred = union(ShredType) { pub fn commonHeader(self: *const Self) *const ShredCommonHeader { return switch (self.*) { - inline .Code, .Data => |c| &c.common, + inline .Code, .Data => |c| &c.fields.common, }; } }; @@ -88,36 +88,37 @@ pub const CodingShred = struct { fn sanitize(self: *const Self) error{InvalidNumCodingShreds}!void { try self.fields.sanitize(); - if (self.custom.num_coding_shreds > 8 * DATA_SHREDS_PER_FEC_BLOCK) { + if (self.fields.custom.num_coding_shreds > 8 * DATA_SHREDS_PER_FEC_BLOCK) { return error.InvalidNumCodingShreds; } + try self.erasureShardIndex(); } pub fn erasureShardIndex(self: *const Self) !usize { // Assert that the last shred index in the erasure set does not // overshoot MAX_{DATA,CODE}_SHREDS_PER_SLOT. if (try checkedAdd( - self.common.fec_set_index, - try checkedSub(@as(u32, @intCast(self.custom.num_data_shreds)), 1), + self.fields.common.fec_set_index, + try checkedSub(@as(u32, @intCast(self.fields.custom.num_data_shreds)), 1), ) >= data_shred.max_per_slot) { return error.InvalidErasureShardIndex; } if (try checkedAdd( try self.first_coding_index(), - try checkedSub(@as(u32, @intCast(self.custom.num_coding_shreds)), 1), + try checkedSub(@as(u32, @intCast(self.fields.custom.num_coding_shreds)), 1), ) >= coding_shred.max_per_slot) { return error.InvalidErasureShardIndex; } - const num_data_shreds: usize = @intCast(self.custom.num_data_shreds); - const num_coding_shreds: usize = @intCast(self.custom.num_coding_shreds); - const position: usize = @intCast(self.custom.position); + const num_data_shreds: usize = @intCast(self.fields.custom.num_data_shreds); + const num_coding_shreds: usize = @intCast(self.fields.custom.num_coding_shreds); + const position: usize = @intCast(self.fields.custom.position); const fec_set_size = try checkedAdd(num_data_shreds, num_coding_shreds); const index = try checkedAdd(position, num_data_shreds); return if (index < fec_set_size) index else error.InvalidErasureShardIndex; } fn first_coding_index(self: *const Self) !u32 { - return checkedSub(self.common.index, self.custom.position); + return checkedSub(self.fields.common.index, self.fields.custom.position); } }; @@ -430,7 +431,7 @@ pub const ShredConstants = struct { headers_size: usize, }; -pub const shred_layout = struct { +pub const layout = struct { const SIZE_OF_COMMON_SHRED_HEADER: usize = 83; const SIZE_OF_DATA_SHRED_HEADERS: usize = 88; const SIZE_OF_CODING_SHRED_HEADERS: usize = 89; diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index d166fdba0..454efe14a 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -1,15 +1,16 @@ const std = @import("std"); const sig = @import("../lib.zig"); +const shred_collector = @import("lib.zig")._private; -const layout = sig.shred_collector.shred_layout; +const layout = shred_collector.shred.layout; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; -const BasicShredTracker = sig.shred_collector.BasicShredTracker; +const BasicShredTracker = shred_collector.shred_tracker.BasicShredTracker; const Channel = sig.sync.Channel; const Packet = sig.net.Packet; -const Shred = sig.shred_collector.Shred; +const Shred = shred_collector.shred.Shred; /// Analogous to [WindowService](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/window_service.rs#L395) pub fn runShredProcessor( diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 36c31f0d4..2829bba6a 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -1,9 +1,10 @@ const std = @import("std"); const network = @import("zig-network"); const sig = @import("../lib.zig"); +const shred_collector = @import("lib.zig")._private; const bincode = sig.bincode; -const layout = sig.shred_collector.shred_layout; +const layout = shred_collector.shred.layout; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; @@ -16,7 +17,7 @@ const Logger = sig.trace.Logger; const Packet = sig.net.Packet; const Ping = sig.gossip.Ping; const Pong = sig.gossip.Pong; -const RepairMessage = sig.shred_collector.RepairMessage; +const RepairMessage = shred_collector.repair_message.RepairMessage; const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; @@ -163,11 +164,11 @@ fn shouldDiscardShred( if (slot > max_slot) return true; switch (variant.shred_type) { .Code => { - if (index >= sig.shred_collector.coding_shred.max_per_slot) return true; + if (index >= shred_collector.shred.coding_shred.max_per_slot) return true; if (slot <= root) return true; }, .Data => { - if (index >= sig.shred_collector.data_shred.max_per_slot) return true; + if (index >= shred_collector.shred.data_shred.max_per_slot) return true; const parent_offset = layout.getParentOffset(shred) orelse return true; const parent = slot -| @as(Slot, @intCast(parent_offset)); if (!verifyShredSlots(slot, parent, root)) return true; diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index 51ec620c6..67b1e95f0 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -1,5 +1,6 @@ const std = @import("std"); const sig = @import("../lib.zig"); +const shred_collector = @import("lib.zig")._private; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; @@ -7,7 +8,7 @@ const Mutex = std.Thread.Mutex; const Slot = sig.core.Slot; -const MAX_SHREDS_PER_SLOT: usize = sig.shred_collector.MAX_SHREDS_PER_SLOT; +const MAX_SHREDS_PER_SLOT: usize = shred_collector.shred.MAX_SHREDS_PER_SLOT; const MIN_SLOT_AGE_TO_REPORT_AS_MISSING: u64 = 200; @@ -125,7 +126,7 @@ pub const BasicShredTracker = struct { } }; -pub const MultiSlotReport = sig.utils.RecyclingList( +pub const MultiSlotReport = sig.utils.collections.RecyclingList( SlotReport, SlotReport.initBlank, SlotReport.reset, diff --git a/src/shred_collector/shred_verifier.zig b/src/shred_collector/shred_verifier.zig index 505071a76..d33bc7c7a 100644 --- a/src/shred_collector/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -1,7 +1,8 @@ const std = @import("std"); const sig = @import("../lib.zig"); +const shred_collector = @import("lib.zig")._private; -const shred_layout = sig.shred_collector.shred_layout; +const shred_layout = shred_collector.shred.layout; const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; diff --git a/src/sync/lib.zig b/src/sync/lib.zig new file mode 100644 index 000000000..89e71ef13 --- /dev/null +++ b/src/sync/lib.zig @@ -0,0 +1,15 @@ +pub const _private = struct { + pub const channel = @import("channel.zig"); + pub const mpmc = @import("mpmc.zig"); + pub const ref = @import("ref.zig"); + pub const mux = @import("mux.zig"); + pub const once_cell = @import("once_cell.zig"); + pub const thread_pool = @import("thread_pool.zig"); +}; + +pub const Channel = _private.channel.Channel; +pub const Mux = _private.mux.Mux; +pub const RwMux = _private.mux.RwMux; + +pub const OnceCell = _private.once_cell.OnceCell; +pub const ThreadPool = _private.thread_pool.ThreadPool; diff --git a/src/tests.zig b/src/tests.zig index 2c268a131..830d54c51 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -8,5 +8,23 @@ test { logger.default_logger.* = logger.Logger.init(allocator, .debug); std.testing.log_level = std.log.Level.err; - std.testing.refAllDecls(lib); + refAllDeclsRecursive(lib, 3); +} + +/// Like std.testing.refAllDeclsRecursive, except: +/// - you can specify depth to avoid infinite or unnecessary recursion. +/// - runs at comptime to avoid compiler errors for hypothetical +/// code paths that would never actually run. +pub inline fn refAllDeclsRecursive(comptime T: type, comptime depth: usize) void { + if (!@import("builtin").is_test) return; + if (depth == 0) return; + inline for (comptime std.meta.declarations(T)) |decl| { + if (@TypeOf(@field(T, decl.name)) == type) { + switch (@typeInfo(@field(T, decl.name))) { + .Struct, .Enum, .Union, .Opaque => refAllDeclsRecursive(@field(T, decl.name), depth - 1), + else => {}, + } + } + _ = &@field(T, decl.name); + } } diff --git a/src/trace/lib.zig b/src/trace/lib.zig new file mode 100644 index 000000000..8fe17abc1 --- /dev/null +++ b/src/trace/lib.zig @@ -0,0 +1,10 @@ +pub const _private = struct { + pub const entry = @import("entry.zig"); + pub const field = @import("field.zig"); + pub const level = @import("level.zig"); + pub const log = @import("log.zig"); + pub const logfmt = @import("logfmt.zig"); +}; + +pub const Logger = _private.log.Logger; +pub const Level = _private.level.Level; diff --git a/src/utils/arraylist.zig b/src/utils/arraylist.zig index 316c3299a..b50496cb7 100644 --- a/src/utils/arraylist.zig +++ b/src/utils/arraylist.zig @@ -56,63 +56,3 @@ pub fn defaultArrayListOnEOFConfig(comptime T: type) bincode.FieldConfig(std.Arr .default_fn = S.defaultEOF, }; } - -/// A list that recycles items that were removed from the list. -/// -/// Useful for types that are expensive to instantiate, like -/// those that include allocations. -/// -/// When you call `addOne`, it returns a pointer to an item of type -/// type T, which could either be a new item created with initBlank, -/// or one that was previously removed from the list and had -/// resetItem called on it. -pub fn RecyclingList( - comptime T: type, - comptime initBlank: fn (Allocator) T, - comptime resetItem: fn (*T) void, - comptime deinitOne: fn (T) void, -) type { - return struct { - /// Contains valid items up to `len` - /// Any other items beyond len in this arraylist are not valid. - private: ArrayList(T), - len: usize = 0, - - const Self = @This(); - - pub fn init(allocator: Allocator) Self { - return .{ .private = ArrayList(T).init(allocator) }; - } - - pub fn deinit(self: Self) void { - for (self.private.items) |item| deinitOne(item); - self.private.deinit(); - } - - pub fn items(self: *const Self) []const T { - return self.private.items[0..self.len]; - } - - pub fn clearRetainingCapacity(self: *Self) void { - self.len = 0; - } - - pub fn addOne(self: *Self) !*T { - if (self.len < self.private.items.len) { - const item = &self.private.items[self.len]; - resetItem(item); - self.len += 1; - return item; - } else { - const item = try self.private.addOne(); - item.* = initBlank(self.private.allocator); - self.len += 1; - return item; - } - } - - pub fn drop(self: *Self, n: usize) void { - self.len -|= n; - } - }; -} diff --git a/src/utils/collections.zig b/src/utils/collections.zig new file mode 100644 index 000000000..7af83e02c --- /dev/null +++ b/src/utils/collections.zig @@ -0,0 +1,64 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; + +/// A list that recycles items that were removed from the list. +/// +/// Useful for types that are expensive to instantiate, like +/// those that include allocations. +/// +/// When you call `addOne`, it returns a pointer to an item of type +/// type T, which could either be a new item created with initBlank, +/// or one that was previously removed from the list and had +/// resetItem called on it. +pub fn RecyclingList( + comptime T: type, + comptime initBlank: fn (Allocator) T, + comptime resetItem: fn (*T) void, + comptime deinitOne: fn (T) void, +) type { + return struct { + /// Contains valid items up to `len` + /// Any other items beyond len in this arraylist are not valid. + private: ArrayList(T), + len: usize = 0, + + const Self = @This(); + + pub fn init(allocator: Allocator) Self { + return .{ .private = ArrayList(T).init(allocator) }; + } + + pub fn deinit(self: Self) void { + for (self.private.items) |item| deinitOne(item); + self.private.deinit(); + } + + pub fn items(self: *const Self) []const T { + return self.private.items[0..self.len]; + } + + pub fn clearRetainingCapacity(self: *Self) void { + self.len = 0; + } + + pub fn addOne(self: *Self) !*T { + if (self.len < self.private.items.len) { + const item = &self.private.items[self.len]; + resetItem(item); + self.len += 1; + return item; + } else { + const item = try self.private.addOne(); + item.* = initBlank(self.private.allocator); + self.len += 1; + return item; + } + } + + pub fn drop(self: *Self, n: usize) void { + self.len -|= n; + } + }; +} diff --git a/src/utils/lazy.zig b/src/utils/lazy.zig index 21a2e80ec..f4ce5cdc8 100644 --- a/src/utils/lazy.zig +++ b/src/utils/lazy.zig @@ -2,7 +2,7 @@ const std = @import("std"); const sig = @import("../lib.zig"); const Allocator = std.mem.Allocator; -const ParamsTuple = sig.utils.ParamsTuple; +const ParamsTuple = sig.utils.types.ParamsTuple; /// A lazily evaluated instance of type T. /// diff --git a/src/utils/lib.zig b/src/utils/lib.zig new file mode 100644 index 000000000..18848bc64 --- /dev/null +++ b/src/utils/lib.zig @@ -0,0 +1,11 @@ +pub const collections = @import("collections.zig"); +pub const bitflags = @import("bitflags.zig"); +pub const directory = @import("directory.zig"); +pub const lazy = @import("lazy.zig"); +pub const lru = @import("../common/lru.zig"); // TODO move to utils folder +pub const math = @import("math.zig"); +pub const merkle_tree = @import("../common/merkle_tree.zig"); // TODO move to utils +pub const service_manager = @import("service.zig"); +pub const tar = @import("tar.zig"); +pub const thread = @import("thread.zig"); +pub const types = @import("types.zig"); diff --git a/src/utils/service.zig b/src/utils/service.zig index 5c5290a7f..e565cc836 100644 --- a/src/utils/service.zig +++ b/src/utils/service.zig @@ -8,7 +8,7 @@ const ArrayList = std.ArrayList; const Atomic = std.atomic.Value; const Logger = sig.trace.Logger; -const Lazy = sig.utils.Lazy; +const Lazy = sig.utils.lazy.Lazy; /// High level manager for long-running threads and the state /// shared by those threads. diff --git a/src/utils/varint.zig b/src/utils/varint.zig index 7cd8e6556..9615b8a5c 100644 --- a/src/utils/varint.zig +++ b/src/utils/varint.zig @@ -104,41 +104,6 @@ const DoneOrMore = union(enum) { const U32_MAX: u32 = 4_294_967_295; const MAX_ENCODING_LENGTH = 3; -pub fn visit_byte(elem: u8, val: u16, nth_byte: usize) !DoneOrMore { - if (elem == 0 and nth_byte != 0) { - return error.VisitError; - } - - var value = @as(u32, val); - const element = @as(u32, elem); - var elem_val: u8 = @as(u8, @intCast(element & 0x7f)); - const elem_done = (element & 0x80) == 0; - - if (nth_byte >= MAX_ENCODING_LENGTH) { - return error.TooLong; - } else if (nth_byte == (MAX_ENCODING_LENGTH - 1) and !elem_done) { - return error.ByteThreeContinues; - } - - const shift: u32 = (std.math.cast(u32, nth_byte) orelse U32_MAX) *| 7; - - const shift_res = @shlWithOverflow(elem_val, @as(u3, @intCast(shift))); - if (shift_res.@"1" == 1) { - elem_val = U32_MAX; - } else { - elem_val = shift_res.@"0".Int.bits; - } - - const new_val = value | elem_val; - value = std.math.cast(u16, new_val) catch return error.Overflow; - - if (elem_done) { - return .{ .Done = value }; - } else { - return .{ .More = value }; - } -} - pub fn visit_byte_2(elem: u8, val: u16, nth_byte: usize) !DoneOrMore { if (elem == 0 and nth_byte != 0) { return error.VisitError; From dbfd280371a37993c9968d759961ce9bcf637355 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 09:44:58 -0400 Subject: [PATCH 36/51] fix(shred-collector): handle skipped slots --- src/shred_collector/shred.zig | 2 +- src/shred_collector/shred_processor.zig | 6 +++++ src/shred_collector/shred_tracker.zig | 34 +++++++++++++++++++++---- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 48bbf4320..1cc9c7f58 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -155,7 +155,7 @@ pub const DataShred = struct { return self.payload[consts.headers_size..size]; } - fn parent(self: *const Self) !Slot { + pub fn parent(self: *const Self) !Slot { const slot = self.fields.common.slot; if (self.fields.custom.parent_offset == 0 and slot != 0) { return error.InvalidParentOffset; diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index 454efe14a..bfd3bf419 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -37,6 +37,12 @@ pub fn runShredProcessor( else => return err, }; var shred = try Shred.fromPayload(allocator, shred_payload); + if (shred == Shred.Data) { + const parent = try shred.Data.parent(); + if (parent + 1 != slot) { + try tracker.skipSlots(parent, slot); + } + } defer shred.deinit(); if (shred.isLastInSlot()) { tracker.setLastShred(slot, index) catch |err| switch (err) { diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index 67b1e95f0..ba65c56fc 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -53,6 +53,24 @@ pub const BasicShredTracker = struct { } } + pub fn skipSlots( + self: *Self, + start_inclusive: Slot, + end_exclusive: Slot, + ) !void { + self.mux.lock(); + defer self.mux.unlock(); + + for (start_inclusive..end_exclusive) |slot| { + const monitored_slot = try self.observeSlot(slot); + if (!monitored_slot.is_complete) { + monitored_slot.is_complete = true; + self.logger.infof("skipping slot: {}", .{slot}); + self.max_slot_processed = @max(self.max_slot_processed, slot); + } + } + } + pub fn registerShred( self: *Self, slot: Slot, @@ -61,9 +79,7 @@ pub const BasicShredTracker = struct { self.mux.lock(); defer self.mux.unlock(); - self.maybeSetStart(slot); - self.max_slot_seen = @max(self.max_slot_seen, slot); - const monitored_slot = try self.getMonitoredSlot(slot); + const monitored_slot = try self.observeSlot(slot); const new = try monitored_slot.record(shred_index); if (new) self.logger.debugf("new slot: {}", .{slot}); self.max_slot_processed = @max(self.max_slot_processed, slot); @@ -73,8 +89,7 @@ pub const BasicShredTracker = struct { self.mux.lock(); defer self.mux.unlock(); - self.maybeSetStart(slot); - const monitored_slot = try self.getMonitoredSlot(slot); + const monitored_slot = try self.observeSlot(slot); if (monitored_slot.last_shred) |old_last| { monitored_slot.last_shred = @min(old_last, index); } else { @@ -114,6 +129,15 @@ pub const BasicShredTracker = struct { return true; } + /// - Record that a slot has been observed. + /// - Acquire the slot's status for mutation. + fn observeSlot(self: *Self, slot: Slot) !*MonitoredSlot { + self.maybeSetStart(slot); + self.max_slot_seen = @max(self.max_slot_seen, slot); + const monitored_slot = try self.getMonitoredSlot(slot); + return monitored_slot; + } + fn getMonitoredSlot(self: *Self, slot: Slot) error{ SlotUnderflow, SlotOverflow }!*MonitoredSlot { if (slot > self.current_bottom_slot + num_slots - 1) { return error.SlotOverflow; From 0544591663d80bf8307ac38f51d500b2e13cb408 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 09:59:36 -0400 Subject: [PATCH 37/51] refactor(shred-collector): for readability, explicitly type shred receivers as slices instead of implicit tuples --- src/shred_collector/shred_receiver.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 2829bba6a..67a90baba 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -65,12 +65,12 @@ pub const ShredReceiver = struct { const x = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, tvu_receivers, response_sender.channel, false }, + .{ self, &tvu_receivers, response_sender.channel, false }, ); const y = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, .{repair_receiver.channel}, response_sender.channel, true }, + .{ self, &.{repair_receiver.channel}, response_sender.channel, true }, ); x.join(); y.join(); @@ -80,14 +80,14 @@ pub const ShredReceiver = struct { /// Returns when exit is set to true. fn runPacketHandler( self: *Self, - receivers: anytype, + receivers: []const *Channel(ArrayList(Packet)), response_sender: *Channel(ArrayList(Packet)), comptime is_repair: bool, ) !void { var buf = ArrayList(ArrayList(Packet)).init(self.allocator); while (!self.exit.load(.unordered)) { var responses = ArrayList(Packet).init(self.allocator); - inline for (receivers) |receiver| { + for (receivers) |receiver| { try receiver.tryDrainRecycle(&buf); if (buf.items.len > 0) { const shred_version = self.shred_version.load(.monotonic); From 5f8510064ce728f097c1c06ee63a66644df07f1e Mon Sep 17 00:00:00 2001 From: Trevor Berrange Sanchez Date: Wed, 5 Jun 2024 16:18:25 +0200 Subject: [PATCH 38/51] Take random interface param and log bad seeds --- src/gossip/pull_response.zig | 17 +++++++++++------ src/gossip/service.zig | 3 +++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/gossip/pull_response.zig b/src/gossip/pull_response.zig index 7014b105b..e5cdea37f 100644 --- a/src/gossip/pull_response.zig +++ b/src/gossip/pull_response.zig @@ -20,6 +20,9 @@ const deinitGossipPullFilters = _pull_request.deinitGossipPullFilters; pub const GOSSIP_PULL_TIMEOUT_MS: u64 = 15000; pub fn filterSignedGossipDatas( + /// It is advised to use a PRNG, and not a true RNG, otherwise + /// the runtime of this function may be unbounded. + rand: std.Random, allocator: std.mem.Allocator, gossip_table: *const GossipTable, filter: *const GossipPullFilter, @@ -30,11 +33,7 @@ pub fn filterSignedGossipDatas( return ArrayList(SignedGossipData).init(allocator); } - const seed: u64 = @intCast(std.time.milliTimestamp()); - var rand = std.rand.DefaultPrng.init(seed); - const rng = rand.random(); - - const jitter = rng.intRangeAtMost(u64, 0, GOSSIP_PULL_TIMEOUT_MS / 4); + const jitter = rand.intRangeAtMost(u64, 0, GOSSIP_PULL_TIMEOUT_MS / 4); const caller_wallclock_with_jitter = caller_wallclock + jitter; var bloom = filter.filter; @@ -130,7 +129,10 @@ test "gossip.pull_response: test filtering values works" { try lg.mut().insert(v2, 0); } + const maybe_failing_seed: u64 = @intCast(std.time.milliTimestamp()); + var maybe_failing_prng = std.Random.Xoshiro256.init(maybe_failing_seed); var values = try filterSignedGossipDatas( + maybe_failing_prng.random(), std.testing.allocator, lg.get(), &filter, @@ -140,5 +142,8 @@ test "gossip.pull_response: test filtering values works" { defer values.deinit(); lg.unlock(); - try std.testing.expect(values.items.len > 0); + std.testing.expect(values.items.len > 0) catch |err| { + std.log.err("\nThe failing seed is: '{d}'\n", .{maybe_failing_seed}); + return err; + }; } diff --git a/src/gossip/service.zig b/src/gossip/service.zig index 6b8186f41..7f3434c96 100644 --- a/src/gossip/service.zig +++ b/src/gossip/service.zig @@ -1195,7 +1195,10 @@ pub const GossipService = struct { return; } + const filter_rng_seed: u64 = @intCast(std.time.milliTimestamp()); + var filter_prng = std.Random.Xoshiro256.init(filter_rng_seed); const response_gossip_values = pull_response.filterSignedGossipDatas( + filter_prng.random(), self.allocator, self.gossip_table, self.filter, From 545e0c6f14e1cb8efedfda0dc08aec2f60fd1353 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 11:04:58 -0400 Subject: [PATCH 39/51] fix(shred-collector): invalid repair pings as metric not log the problem with logging whenever an invalid message is received on a publicly exposed port is that it enables a ddos attack vector --- src/shred_collector/service.zig | 2 ++ src/shred_collector/shred_receiver.zig | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 739d7c8bf..01122da41 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -24,6 +24,7 @@ const RepairPeerProvider = shred_collector.repair_service.RepairPeerProvider; const RepairRequester = shred_collector.repair_service.RepairRequester; const RepairService = shred_collector.repair_service.RepairService; const ShredReceiver = shred_collector.shred_receiver.ShredReceiver; +const ShredReceiverMetrics = shred_collector.shred_receiver.ShredReceiverMetrics; /// Settings which tell the Shred Collector how to behave. pub const ShredCollectorConfig = struct { @@ -86,6 +87,7 @@ pub fn start( .tvu_socket = tvu_socket, .unverified_shred_channel = unverified_shred_channel, .shred_version = interface.my_shred_version, + .metrics = try ShredReceiverMetrics.init(), }; try service_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 67a90baba..6b233fcd5 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -36,6 +36,7 @@ pub const ShredReceiver = struct { /// me --> shred verifier unverified_shred_channel: *Channel(ArrayList(Packet)), shred_version: *const Atomic(u16), + metrics: ShredReceiverMetrics, const Self = @This(); @@ -130,13 +131,13 @@ pub const ShredReceiver = struct { /// Handle a ping message and return fn handlePing(self: *Self, packet: *const Packet, responses: *ArrayList(Packet)) !void { - const repair_ping = bincode.readFromSlice(self.allocator, RepairPing, &packet.data, .{}) catch |e| { - self.logger.errf("could not deserialize ping: {} - {any}", .{ e, packet.data[0..packet.size] }); + const repair_ping = bincode.readFromSlice(self.allocator, RepairPing, &packet.data, .{}) catch { + self.metrics.invalid_repair_pings.inc(); return; }; const ping = repair_ping.Ping; - ping.verify() catch |e| { - self.logger.errf("ping failed verification: {} - {any}", .{ e, packet.data[0..packet.size] }); + ping.verify() catch { + self.metrics.invalid_repair_pings.inc(); return; }; @@ -199,3 +200,12 @@ fn verifyShredSlots(slot: Slot, parent: Slot, root: Slot) bool { const REPAIR_RESPONSE_SERIALIZED_PING_BYTES = 132; const RepairPing = union(enum) { Ping: Ping }; + +pub const ShredReceiverMetrics = struct { + invalid_repair_pings: *sig.prometheus.Counter, + + pub fn init() !ShredReceiverMetrics { + const registry = sig.prometheus.globalRegistry(); + return .{ .invalid_repair_pings = try registry.getOrCreateCounter("invalid_repair_pings") }; + } +}; From 07b2f2f442e5a3a626b24c63ecaf0c5199c5f371 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 11:07:32 -0400 Subject: [PATCH 40/51] refactor(shred-collector): remove unused imports --- src/shred_collector/shred.zig | 1 - src/shred_collector/shred_receiver.zig | 1 - 2 files changed, 2 deletions(-) diff --git a/src/shred_collector/shred.zig b/src/shred_collector/shred.zig index 1cc9c7f58..7dec5faaf 100644 --- a/src/shred_collector/shred.zig +++ b/src/shred_collector/shred.zig @@ -4,7 +4,6 @@ const sig = @import("../lib.zig"); const bincode = sig.bincode; const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; const BitFlags = sig.utils.bitflags.BitFlags; const Hash = sig.core.Hash; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 6b233fcd5..199d08442 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -21,7 +21,6 @@ const RepairMessage = shred_collector.repair_message.RepairMessage; const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; -const endpointToString = sig.net.endpointToString; const num_tvu_receivers = 2; From 597677684d65918748040459967ef1626b4cb18a Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 11:13:21 -0400 Subject: [PATCH 41/51] refactor(shred-collector): consolidate dependencies --- src/cmd/cmd.zig | 5 ++--- src/shred_collector/lib.zig | 1 - src/shred_collector/service.zig | 27 ++++++++++---------------- src/shred_collector/shred_receiver.zig | 1 - 4 files changed, 12 insertions(+), 22 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 52c27b459..0fc6a587e 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -23,6 +23,7 @@ const IpAddr = sig.net.IpAddr; const Level = sig.trace.Level; const Logger = sig.trace.Logger; const Pubkey = sig.core.Pubkey; +const ShredCollectorDependencies = sig.shred_collector.ShredCollectorDependencies; const SnapshotFieldsAndPaths = sig.accounts_db.SnapshotFieldsAndPaths; const SnapshotFiles = sig.accounts_db.SnapshotFiles; const SocketAddr = sig.net.SocketAddr; @@ -419,13 +420,11 @@ fn validator() !void { // shred collector var shred_collector = try sig.shred_collector.start( config.current.shred_collector, - .{ + ShredCollectorDependencies{ .allocator = gpa_allocator, .logger = logger, .random = rand.random(), .my_keypair = &my_keypair, - }, - .{ .exit = &exit, .gossip_table_rw = &gossip_service.gossip_table_rw, .my_shred_version = &gossip_service.my_shred_version, diff --git a/src/shred_collector/lib.zig b/src/shred_collector/lib.zig index fe834023d..faf7797f3 100644 --- a/src/shred_collector/lib.zig +++ b/src/shred_collector/lib.zig @@ -11,6 +11,5 @@ pub const _private = struct { pub const ShredCollectorConfig = _private.service.ShredCollectorConfig; pub const ShredCollectorDependencies = _private.service.ShredCollectorDependencies; -pub const ShredCollectorInterface = _private.service.ShredCollectorInterface; pub const start = _private.service.start; diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 01122da41..41c8eeb24 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -26,26 +26,20 @@ const RepairService = shred_collector.repair_service.RepairService; const ShredReceiver = shred_collector.shred_receiver.ShredReceiver; const ShredReceiverMetrics = shred_collector.shred_receiver.ShredReceiverMetrics; -/// Settings which tell the Shred Collector how to behave. +/// Settings which instruct the Shred Collector how to behave. pub const ShredCollectorConfig = struct { start_slot: ?Slot, repair_port: u16, tvu_port: u16, }; -/// Basic resources that are required for -/// the Shred Collector to operate. +/// Resources that are required for the Shred Collector to operate. pub const ShredCollectorDependencies = struct { allocator: Allocator, logger: Logger, random: Random, /// This validator's keypair my_keypair: *const KeyPair, -}; - -/// Interface between the Shred Collector and other components -/// that are external to the Shred Collector. -pub const ShredCollectorInterface = struct { /// Shared exit indicator, used to shutdown the Shred Collector. exit: *Atomic(bool), /// Shared state that is read from gossip @@ -66,9 +60,8 @@ pub const ShredCollectorInterface = struct { pub fn start( conf: ShredCollectorConfig, deps: ShredCollectorDependencies, - interface: ShredCollectorInterface, ) !ServiceManager { - var service_manager = ServiceManager.init(deps.allocator, deps.logger, interface.exit); + var service_manager = ServiceManager.init(deps.allocator, deps.logger, deps.exit); var arena = service_manager.arena(); const repair_socket = try bindUdpReusable(conf.repair_port); @@ -81,12 +74,12 @@ pub fn start( shred_receiver.* = ShredReceiver{ .allocator = deps.allocator, .keypair = deps.my_keypair, - .exit = interface.exit, + .exit = deps.exit, .logger = deps.logger, .repair_socket = repair_socket, .tvu_socket = tvu_socket, .unverified_shred_channel = unverified_shred_channel, - .shred_version = interface.my_shred_version, + .shred_version = deps.my_shred_version, .metrics = try ShredReceiverMetrics.init(), }; try service_manager.spawn(.{ .name = "Shred Receiver" }, ShredReceiver.run, .{shred_receiver}); @@ -95,7 +88,7 @@ pub fn start( try service_manager.spawn( .{ .name = "Shred Verifier" }, shred_collector.shred_verifier.runShredVerifier, - .{ interface.exit, unverified_shred_channel, verified_shred_channel, .{} }, + .{ deps.exit, unverified_shred_channel, verified_shred_channel, .{} }, ); // tracker (shared state, internal to Shred Collector) @@ -116,9 +109,9 @@ pub fn start( const repair_peer_provider = try RepairPeerProvider.init( deps.allocator, deps.random, - interface.gossip_table_rw, + deps.gossip_table_rw, Pubkey.fromPublicKey(&deps.my_keypair.public_key), - interface.my_shred_version, + deps.my_shred_version, ); const repair_requester = try RepairRequester.init( deps.allocator, @@ -126,14 +119,14 @@ pub fn start( deps.random, deps.my_keypair, repair_socket, - interface.exit, + deps.exit, ); const repair_svc = try arena.create(RepairService); try service_manager.defers.deferCall(RepairService.deinit, .{repair_svc}); repair_svc.* = RepairService.init( deps.allocator, deps.logger, - interface.exit, + deps.exit, repair_requester, repair_peer_provider, shred_tracker, diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 199d08442..47ad2ac07 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -21,7 +21,6 @@ const RepairMessage = shred_collector.repair_message.RepairMessage; const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; - const num_tvu_receivers = 2; /// Analogous to [ShredFetchStage](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/shred_fetch_stage.rs#L34) From 1a65d68468f5b9ad25da9f912fc4406885639245 Mon Sep 17 00:00:00 2001 From: Trevor Berrange Sanchez Date: Wed, 5 Jun 2024 16:57:29 +0200 Subject: [PATCH 42/51] Make `FileId` a typed alias, & enhance `validate`. --- src/accountsdb/accounts_file.zig | 22 +++++++++++++++++++++- src/accountsdb/db.zig | 6 +++--- src/accountsdb/index.zig | 5 +++-- src/accountsdb/snapshots.zig | 11 ++++++++++- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/accountsdb/accounts_file.zig b/src/accountsdb/accounts_file.zig index 12a413972..239d70cc2 100644 --- a/src/accountsdb/accounts_file.zig +++ b/src/accountsdb/accounts_file.zig @@ -10,7 +10,27 @@ const Pubkey = @import("../core/pubkey.zig").Pubkey; const AccountFileInfo = @import("snapshots.zig").AccountFileInfo; -pub const FileId = u32; +/// Simple strictly-typed alias for an integer, used to represent a file ID. +pub const FileId = enum(u32) { + _, + + pub inline fn fromInt(int: u32) FileId { + return @enumFromInt(int); + } + + pub inline fn toInt(file_id: FileId) u32 { + return @intFromEnum(file_id); + } + + pub fn format( + _: FileId, + comptime _: []const u8, + _: std.fmt.FormatOptions, + _: anytype, + ) !void { + @compileError("Should not print " ++ @typeName(FileId) ++ " directly"); + } +}; // an account thats stored in an AccountFile pub const AccountInFile = struct { diff --git a/src/accountsdb/db.zig b/src/accountsdb/db.zig index ce889dc54..dc86ac5ca 100644 --- a/src/accountsdb/db.zig +++ b/src/accountsdb/db.zig @@ -373,8 +373,8 @@ pub const AccountsDB = struct { std.debug.panic("failed to *sanitize* AccountsFile: {d}.{d}: {s}\n", .{ accounts_file.slot, accounts_file.id, @errorName(err) }); }; - const file_id_u32: u32 = @intCast(accounts_file_id); - file_map.putAssumeCapacityNoClobber(file_id_u32, accounts_file); + const file_id = FileId.fromInt(@intCast(accounts_file_id)); + file_map.putAssumeCapacityNoClobber(file_id, accounts_file); if (print_progress and progress_timer.read() > DB_PROGRESS_UPDATES_NS) { printTimeEstimate( @@ -984,7 +984,7 @@ pub const AccountsDB = struct { var refs = try ArrayList(AccountRef).initCapacity(reference_allocator, n_accounts); try self.account_index.validateAccountFile(account_file, bin_counts, &refs); - try self.storage.file_map.put(@as(u32, @intCast(account_file.id)), account_file.*); + try self.storage.file_map.put(FileId.fromInt(@intCast(account_file.id)), account_file.*); const refs_ptr = try self.account_index.addMemoryBlock(refs); // allocate enough memory here diff --git a/src/accountsdb/index.zig b/src/accountsdb/index.zig index 1fbfbc0d2..c0a4cb251 100644 --- a/src/accountsdb/index.zig +++ b/src/accountsdb/index.zig @@ -6,6 +6,7 @@ const Hash = @import("../core/hash.zig").Hash; const Slot = @import("../core/time.zig").Slot; const Pubkey = @import("../core/pubkey.zig").Pubkey; const AccountFile = @import("accounts_file.zig").AccountFile; +const FileId = @import("accounts_file.zig").FileId; /// reference to an account (either in a file or cache) pub const AccountRef = struct { @@ -16,7 +17,7 @@ pub const AccountRef = struct { pub const AccountLocation = union(enum(u8)) { File: struct { - file_id: u32, + file_id: FileId, offset: usize, }, Cache: struct { @@ -190,7 +191,7 @@ pub const AccountIndex = struct { .slot = accounts_file.slot, .location = .{ .File = .{ - .file_id = @as(u32, @intCast(accounts_file.id)), + .file_id = FileId.fromInt(@intCast(accounts_file.id)), .offset = offset, }, }, diff --git a/src/accountsdb/snapshots.zig b/src/accountsdb/snapshots.zig index f39b015e3..cdea044e0 100644 --- a/src/accountsdb/snapshots.zig +++ b/src/accountsdb/snapshots.zig @@ -247,7 +247,16 @@ pub const AccountFileInfo = struct { id: usize, length: usize, // amount of bytes used - pub fn validate(self: *const AccountFileInfo, file_size: usize) !void { + pub const ValidateError = error{ + IdOverflow, + FileSizeTooSmall, + FileSizeTooLarge, + OffsetOutOfBounds, + }; + pub fn validate(self: *const AccountFileInfo, file_size: usize) ValidateError!void { + if (self.id > std.math.maxInt(u32)) { + return error.IdOverflow; + } if (file_size == 0) { return error.FileSizeTooSmall; } else if (file_size > @as(usize, MAXIMUM_ACCOUNT_FILE_SIZE)) { From a6be81de8ee059d28d82af886ae3be5a8d11a3b6 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 14:22:31 -0400 Subject: [PATCH 43/51] refactor: unused imports --- src/utils/arraylist.zig | 3 --- src/utils/service.zig | 8 ++++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/utils/arraylist.zig b/src/utils/arraylist.zig index b50496cb7..2d28b7c41 100644 --- a/src/utils/arraylist.zig +++ b/src/utils/arraylist.zig @@ -1,9 +1,6 @@ const std = @import("std"); const bincode = @import("../bincode/bincode.zig"); -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; - pub fn ArrayListConfig(comptime Child: type) bincode.FieldConfig(std.ArrayList(Child)) { const S = struct { pub fn serialize(writer: anytype, data: anytype, params: bincode.Params) !void { diff --git a/src/utils/service.zig b/src/utils/service.zig index e565cc836..c8c2dcd03 100644 --- a/src/utils/service.zig +++ b/src/utils/service.zig @@ -20,7 +20,7 @@ pub const ServiceManager = struct { /// Signal that is expected to tell all threads to exit. exit: *Atomic(bool), /// Threads to join. - threads: std.ArrayList(std.Thread), + threads: ArrayList(std.Thread), /// State to free after all threads join. _arena: ArenaAllocator, /// Logic to run after all threads join. @@ -32,7 +32,7 @@ pub const ServiceManager = struct { return .{ .logger = logger, .exit = exit, - .threads = std.ArrayList(std.Thread).init(allocator), + .threads = ArrayList(std.Thread).init(allocator), ._arena = ArenaAllocator.init(allocator), .defers = DeferList.init(allocator), }; @@ -170,12 +170,12 @@ pub fn runService( /// 2. Return this struct to the broader scope. /// 3. Call `deinit` to run all the defers. pub const DeferList = struct { - defers: std.ArrayList(Lazy(void)), + defers: ArrayList(Lazy(void)), const Self = @This(); pub fn init(allocator: Allocator) Self { - return .{ .defers = std.ArrayList(Lazy(void)).init(allocator) }; + return .{ .defers = ArrayList(Lazy(void)).init(allocator) }; } pub fn deferCall( From d34455adf2b02379a6724c08abc57f038a6649c7 Mon Sep 17 00:00:00 2001 From: Trevor Berrange Sanchez Date: Wed, 5 Jun 2024 21:04:33 +0200 Subject: [PATCH 44/51] Guarantee the layout of some struct types by making them `extern struct`s. --- src/accountsdb/accounts_file.zig | 4 ++-- src/core/pubkey.zig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/accountsdb/accounts_file.zig b/src/accountsdb/accounts_file.zig index 239d70cc2..bb5bde54c 100644 --- a/src/accountsdb/accounts_file.zig +++ b/src/accountsdb/accounts_file.zig @@ -45,14 +45,14 @@ pub const AccountInFile = struct { len: usize = 0, /// info about the account stored - pub const StorageInfo = struct { + pub const StorageInfo = extern struct { write_version_obsolete: u64, data_len: u64, pubkey: Pubkey, }; /// on-chain account info about the account - pub const AccountInfo = struct { + pub const AccountInfo = extern struct { lamports: u64, rent_epoch: Epoch, owner: Pubkey, diff --git a/src/core/pubkey.zig b/src/core/pubkey.zig index 1df17963d..7c9310cc3 100644 --- a/src/core/pubkey.zig +++ b/src/core/pubkey.zig @@ -4,7 +4,7 @@ const Ed25519 = std.crypto.sign.Ed25519; const encoder = base58.Encoder.init(.{}); const decoder = base58.Decoder.init(.{}); -pub const Pubkey = struct { +pub const Pubkey = extern struct { data: [32]u8, const Self = @This(); From 89759aefac786e84090a923520631eeb6b2f5e06 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 16:36:59 -0400 Subject: [PATCH 45/51] refactor(shred-collector): uppercase file-scoped consts --- src/shred_collector/repair_service.zig | 4 ++-- src/shred_collector/shred_receiver.zig | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 85d48c895..51ba6e9b0 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -34,7 +34,7 @@ const RepairMessage = shred_collector.repair_message.RepairMessage; const serializeRepairRequest = shred_collector.repair_message.serializeRepairRequest; -const repair_requester_threads = 4; +const NUM_REQUESTER_THREADS = 4; /// Identifies which repairs are needed and sends them /// - delegates to RepairPeerProvider to identify repair peers. @@ -85,7 +85,7 @@ pub const RepairService = struct { .logger = logger, .exit = exit, .report = MultiSlotReport.init(allocator), - .thread_pool = RequestBatchThreadPool.init(allocator, repair_requester_threads), + .thread_pool = RequestBatchThreadPool.init(allocator, NUM_REQUESTER_THREADS), }; } diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 47ad2ac07..df77589a9 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -21,7 +21,7 @@ const RepairMessage = shred_collector.repair_message.RepairMessage; const Slot = sig.core.Slot; const SocketThread = sig.net.SocketThread; -const num_tvu_receivers = 2; +const NUM_TVU_RECEIVERS = 2; /// Analogous to [ShredFetchStage](https://github.com/anza-xyz/agave/blob/aa2f078836434965e1a5a03af7f95c6640fe6e1e/core/src/shred_fetch_stage.rs#L34) pub const ShredReceiver = struct { @@ -51,8 +51,8 @@ pub const ShredReceiver = struct { .initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); defer repair_receiver.deinit(); - var tvu_receivers: [num_tvu_receivers]*Channel(ArrayList(Packet)) = undefined; - for (0..num_tvu_receivers) |i| { + var tvu_receivers: [NUM_TVU_RECEIVERS]*Channel(ArrayList(Packet)) = undefined; + for (0..NUM_TVU_RECEIVERS) |i| { tvu_receivers[i] = (try SocketThread.initReceiver( self.allocator, self.logger, From 888747577181a4e2f78143752ae850bb86b0ed3f Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 17:03:16 -0400 Subject: [PATCH 46/51] refactor(shred-collector): rename channels to sender/receiver --- src/shred_collector/service.zig | 2 +- src/shred_collector/shred_processor.zig | 4 ++-- src/shred_collector/shred_receiver.zig | 4 ++-- src/shred_collector/shred_verifier.zig | 10 +++++----- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 41c8eeb24..6fc7dcfd8 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -78,7 +78,7 @@ pub fn start( .logger = deps.logger, .repair_socket = repair_socket, .tvu_socket = tvu_socket, - .unverified_shred_channel = unverified_shred_channel, + .unverified_shred_sender = unverified_shred_channel, .shred_version = deps.my_shred_version, .metrics = try ShredReceiverMetrics.init(), }; diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index bfd3bf419..7afdc8973 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -16,13 +16,13 @@ const Shred = shred_collector.shred.Shred; pub fn runShredProcessor( allocator: Allocator, // shred verifier --> me - verified_shred_channel: *Channel(ArrayList(Packet)), + verified_shred_receiver: *Channel(ArrayList(Packet)), tracker: *BasicShredTracker, ) !void { var processed_count: usize = 0; var buf = ArrayList(ArrayList(Packet)).init(allocator); while (true) { - try verified_shred_channel.tryDrainRecycle(&buf); + try verified_shred_receiver.tryDrainRecycle(&buf); if (buf.items.len == 0) { std.time.sleep(10 * std.time.ns_per_ms); continue; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index df77589a9..a4bf92421 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -32,7 +32,7 @@ pub const ShredReceiver = struct { repair_socket: Socket, tvu_socket: Socket, /// me --> shred verifier - unverified_shred_channel: *Channel(ArrayList(Packet)), + unverified_shred_sender: *Channel(ArrayList(Packet)), shred_version: *const Atomic(u16), metrics: ShredReceiverMetrics, @@ -95,7 +95,7 @@ pub const ShredReceiver = struct { try self.handlePacket(packet, &responses, shred_version); if (is_repair) packet.flags.set(.repair); } - try self.unverified_shred_channel.send(batch); + try self.unverified_shred_sender.send(batch); } } else { std.time.sleep(10 * std.time.ns_per_ms); diff --git a/src/shred_collector/shred_verifier.zig b/src/shred_collector/shred_verifier.zig index d33bc7c7a..fef2099e3 100644 --- a/src/shred_collector/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -14,15 +14,15 @@ const Packet = sig.net.Packet; pub fn runShredVerifier( exit: *Atomic(bool), /// shred receiver --> me - unverified_shred_channel: *Channel(ArrayList(Packet)), + unverified_shred_receiver: *Channel(ArrayList(Packet)), /// me --> shred processor - verified_shred_channel: *Channel(ArrayList(Packet)), + verified_shred_sender: *Channel(ArrayList(Packet)), leader_schedule: LeaderScheduleCalculator, ) !void { var verified_count: usize = 0; - var buf = ArrayList(ArrayList(Packet)).init(unverified_shred_channel.allocator); + var buf = ArrayList(ArrayList(Packet)).init(unverified_shred_receiver.allocator); while (true) { - try unverified_shred_channel.tryDrainRecycle(&buf); + try unverified_shred_receiver.tryDrainRecycle(&buf); if (buf.items.len == 0) { std.time.sleep(10 * std.time.ns_per_ms); continue; @@ -36,7 +36,7 @@ pub fn runShredVerifier( verified_count += 1; } } - try verified_shred_channel.send(packet_batch); + try verified_shred_sender.send(packet_batch); if (exit.load(.monotonic)) return; } } From e2e3716d403a68b20f4e2ebf2eabec973032b9d7 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Wed, 5 Jun 2024 17:46:59 -0400 Subject: [PATCH 47/51] refactor(shred-collector): import style consistency in repair_message.zig --- src/shred_collector/repair_message.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shred_collector/repair_message.zig b/src/shred_collector/repair_message.zig index e75f08279..5876cb367 100644 --- a/src/shred_collector/repair_message.zig +++ b/src/shred_collector/repair_message.zig @@ -1,7 +1,7 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const bincode = @import("../bincode/bincode.zig"); +const bincode = sig.bincode; const KeyPair = std.crypto.sign.Ed25519.KeyPair; @@ -11,7 +11,7 @@ const Pubkey = sig.core.Pubkey; const Signature = sig.core.Signature; const Slot = sig.core.Slot; -const SIGNATURE_LENGTH = @import("../core/signature.zig").SIGNATURE_LENGTH; +const SIGNATURE_LENGTH = sig.core.SIGNATURE_LENGTH; /// Analogous to [SIGNED_REPAIR_TIME_WINDOW](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/repair/serve_repair.rs#L89) const SIGNED_REPAIR_TIME_WINDOW_SECS: u64 = 600; From a930874c7ab3cab8ac60730da9994f5ec847ab64 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 7 Jun 2024 09:57:39 -0400 Subject: [PATCH 48/51] refactor(gossip, shred-collector): rename tvu_port to turbine_port --- src/cmd/cmd.zig | 18 ++++---- src/cmd/config.zig | 2 +- src/gossip/data.zig | 61 +++++++++++--------------- src/gossip/message.zig | 8 ++-- src/shred_collector/repair_service.zig | 8 ++-- src/shred_collector/service.zig | 9 ++-- src/shred_collector/shred_receiver.zig | 12 ++--- 7 files changed, 54 insertions(+), 64 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 4f81f21ae..8a455dbc2 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -85,18 +85,18 @@ var gossip_port_option = cli.Option{ var repair_port_option = cli.Option{ .long_name = "repair-port", - .help = "The port to run tvu repair listener - default: 8002", + .help = "The port to run shred repair listener - default: 8002", .value_ref = cli.mkRef(&config.current.shred_collector.repair_port), .required = false, .value_name = "Repair Port", }; -var tvu_port_option = cli.Option{ - .long_name = "tvu-port", - .help = "The port to run turbine listener - default: 8003", - .value_ref = cli.mkRef(&config.current.shred_collector.tvu_port), +var turbine_port_option = cli.Option{ + .long_name = "turbine-port", + .help = "The port to run turbine shred listener (aka TVU port) - default: 8003", + .value_ref = cli.mkRef(&config.current.shred_collector.turbine_port), .required = false, - .value_name = "TVU Port", + .value_name = "Turbine Port", }; var test_repair_option = cli.Option{ @@ -302,7 +302,7 @@ var app = &cli.App{ &gossip_spy_node_option, &gossip_dump_option, // repair - &tvu_port_option, + &turbine_port_option, &repair_port_option, &test_repair_option, // accounts-db @@ -405,7 +405,7 @@ fn validator() !void { const ip_echo_data = try getMyDataFromIpEcho(logger, entrypoints.items); const repair_port: u16 = config.current.shred_collector.repair_port; - const tvu_port: u16 = config.current.shred_collector.repair_port; + const turbine_port: u16 = config.current.shred_collector.repair_port; // gossip var gossip_service = try initGossip( @@ -417,7 +417,7 @@ fn validator() !void { ip_echo_data.ip, &.{ .{ .tag = socket_tag.REPAIR, .port = repair_port }, - .{ .tag = socket_tag.TVU, .port = tvu_port }, + .{ .tag = socket_tag.TURBINE, .port = turbine_port }, }, ); defer gossip_service.deinit(); diff --git a/src/cmd/config.zig b/src/cmd/config.zig index c726096e0..a5725d8c3 100644 --- a/src/cmd/config.zig +++ b/src/cmd/config.zig @@ -26,7 +26,7 @@ const GossipConfig = struct { }; const shred_collector_defaults = ShredCollectorConfig{ - .tvu_port = 8002, + .turbine_port = 8002, .repair_port = 8003, .start_slot = null, }; diff --git a/src/gossip/data.zig b/src/gossip/data.zig index 062607840..7c3de51a8 100644 --- a/src/gossip/data.zig +++ b/src/gossip/data.zig @@ -463,14 +463,17 @@ pub const GossipData = union(enum(u32)) { } }; +/// analogous to [LegactContactInfo](https://github.com/anza-xyz/agave/blob/0d34a1a160129c4293dac248e14231e9e773b4ce/gossip/src/legacy_contact_info.rs#L26) pub const LegacyContactInfo = struct { id: Pubkey, /// gossip address gossip: SocketAddr, /// address to connect to for replication - tvu: SocketAddr, + /// analogous to `tvu` in agave + turbine: SocketAddr, /// address to forward shreds to - tvu_forwards: SocketAddr, + /// analogous to `tvu_quic` in agave + turbine_quic: SocketAddr, /// address to send repair responses to repair: SocketAddr, /// transactions address @@ -501,8 +504,8 @@ pub const LegacyContactInfo = struct { return LegacyContactInfo{ .id = id, .gossip = unspecified_addr, - .tvu = unspecified_addr, - .tvu_forwards = unspecified_addr, + .turbine = unspecified_addr, + .turbine_quic = unspecified_addr, .repair = unspecified_addr, .tpu = unspecified_addr, .tpu_forwards = unspecified_addr, @@ -519,8 +522,8 @@ pub const LegacyContactInfo = struct { return LegacyContactInfo{ .id = Pubkey.random(rng), .gossip = SocketAddr.random(rng), - .tvu = SocketAddr.random(rng), - .tvu_forwards = SocketAddr.random(rng), + .turbine = SocketAddr.random(rng), + .turbine_quic = SocketAddr.random(rng), .repair = SocketAddr.random(rng), .tpu = SocketAddr.random(rng), .tpu_forwards = SocketAddr.random(rng), @@ -537,8 +540,8 @@ pub const LegacyContactInfo = struct { pub fn toContactInfo(self: *const LegacyContactInfo, allocator: std.mem.Allocator) !ContactInfo { var ci = ContactInfo.init(allocator, self.id, self.wallclock, self.shred_version); try ci.setSocket(socket_tag.GOSSIP, self.gossip); - try ci.setSocket(socket_tag.TVU, self.tvu); - try ci.setSocket(socket_tag.TVU_FORWARDS, self.tvu_forwards); + try ci.setSocket(socket_tag.TURBINE, self.turbine); + try ci.setSocket(socket_tag.TURBINE_QUIC, self.turbine_quic); try ci.setSocket(socket_tag.REPAIR, self.repair); try ci.setSocket(socket_tag.TPU, self.tpu); try ci.setSocket(socket_tag.TPU_FORWARDS, self.tpu_forwards); @@ -553,8 +556,8 @@ pub const LegacyContactInfo = struct { return .{ .id = ci.pubkey, .gossip = ci.getSocket(socket_tag.GOSSIP) orelse SocketAddr.UNSPECIFIED, - .tvu = ci.getSocket(socket_tag.TVU) orelse SocketAddr.UNSPECIFIED, - .tvu_forwards = ci.getSocket(socket_tag.TVU_FORWARDS) orelse SocketAddr.UNSPECIFIED, + .turbine = ci.getSocket(socket_tag.TURBINE) orelse SocketAddr.UNSPECIFIED, + .turbine_quic = ci.getSocket(socket_tag.TURBINE_QUIC) orelse SocketAddr.UNSPECIFIED, .repair = ci.getSocket(socket_tag.REPAIR) orelse SocketAddr.UNSPECIFIED, .tpu = ci.getSocket(socket_tag.TPU) orelse SocketAddr.UNSPECIFIED, .tpu_forwards = ci.getSocket(socket_tag.TPU_FORWARDS) orelse SocketAddr.UNSPECIFIED, @@ -987,11 +990,12 @@ pub const socket_tag = struct { pub const TPU_FORWARDS_QUIC: u8 = 7; pub const TPU_QUIC: u8 = 8; pub const TPU_VOTE: u8 = 9; - pub const TVU: u8 = 10; - pub const TVU_FORWARDS: u8 = 11; - pub const TVU_QUIC: u8 = 12; + /// Analogous to [SOCKET_TAG_TVU](https://github.com/anza-xyz/agave/blob/0d34a1a160129c4293dac248e14231e9e773b4ce/gossip/src/contact_info.rs#L36) + pub const TURBINE: u8 = 10; + /// Analogous to [SOCKET_TAG_TVU_QUIC](https://github.com/anza-xyz/agave/blob/0d34a1a160129c4293dac248e14231e9e773b4ce/gossip/src/contact_info.rs#L37) + pub const TURBINE_QUIC: u8 = 11; }; -pub const SOCKET_CACHE_SIZE: usize = socket_tag.TVU_QUIC + 1; +pub const SOCKET_CACHE_SIZE: usize = socket_tag.TURBINE_QUIC + 1; pub const ContactInfo = struct { pubkey: Pubkey, @@ -1194,16 +1198,15 @@ const NodePort = union(enum) { tpu_forwards_quic: network.EndPoint, tpu_quic: network.EndPoint, tpu_vote: network.EndPoint, - tvu: network.EndPoint, - tvu_forwards: network.EndPoint, - tvu_quic: network.EndPoint, + turbine: network.EndPoint, + turbine_quic: network.EndPoint, }; const Sockets = struct { gossip: UdpSocket, ip_echo: ?TcpListener, - tvu: ArrayList(UdpSocket), - tvu_forwards: ArrayList(UdpSocket), + turbine: ArrayList(UdpSocket), + turbine_quic: ArrayList(UdpSocket), tpu: ArrayList(UdpSocket), tpu_forwards: ArrayList(UdpSocket), tpu_vote: ArrayList(UdpSocket), @@ -1217,7 +1220,7 @@ const Sockets = struct { }; pub const SocketEntry = struct { - key: u8, // GossipMessageidentifier, e.g. tvu, tpu, etc + key: u8, // GossipMessageidentifier, e.g. turbine, tpu, etc index: u8, // IpAddr index in the accompanying addrs vector. offset: u16, // Port offset with respect to the previous entry. @@ -1240,22 +1243,8 @@ pub const SocketEntry = struct { } }; -fn socket_addrs_unspecified() [13]SocketAddr { - return .{ - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - SocketAddr.unspecified(), - }; +fn socket_addrs_unspecified() [SOCKET_CACHE_SIZE]SocketAddr { + return .{SocketAddr.unspecified()} ** SOCKET_CACHE_SIZE; } pub const RestartHeaviestFork = struct { diff --git a/src/gossip/message.zig b/src/gossip/message.zig index 79ba63fc3..5cc8aac17 100644 --- a/src/gossip/message.zig +++ b/src/gossip/message.zig @@ -307,8 +307,8 @@ test "gossip.message: pull request serializes and deserializes" { const legacy_contact_info = LegacyContactInfo{ .id = pubkey, .gossip = gossip_addr, - .tvu = unspecified_addr, - .tvu_forwards = unspecified_addr, + .turbine = unspecified_addr, + .turbine_quic = unspecified_addr, .repair = unspecified_addr, .tpu = unspecified_addr, .tpu_forwards = unspecified_addr, @@ -354,8 +354,8 @@ test "gossip.message: push message serializes and deserializes correctly" { const legacy_contact_info = LegacyContactInfo{ .id = id, .gossip = gossip_addr, - .tvu = unspecified_addr, - .tvu_forwards = unspecified_addr, + .turbine = unspecified_addr, + .turbine_quic = unspecified_addr, .repair = unspecified_addr, .tpu = unspecified_addr, .tpu_forwards = unspecified_addr, diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 51ba6e9b0..64ee697ce 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -378,7 +378,7 @@ pub const RepairPeerProvider = struct { if (!info.pubkey.equals(&self.my_pubkey) and // don't request from self info.shred_version == self.my_shred_version.load(.monotonic) and // need compatible shreds serve_repair_socket != null and // node must be able to receive repair requests - info.getSocket(socket_tag.TVU) != null) // node needs access to shreds + info.getSocket(socket_tag.TURBINE) != null) // node needs access to shreds { // exclude nodes that are known to be missing this slot if (gossip_table.get(.{ .LowestSlot = info.pubkey })) |lsv| { @@ -431,7 +431,7 @@ test "RepairService sends repair request to gossip peer" { try peer_socket.setReadTimeout(100_000); var peer_contact_info = ContactInfo.init(allocator, Pubkey.fromPublicKey(&peer_keypair.public_key), wallclock, my_shred_version.load(.unordered)); try peer_contact_info.setSocket(socket_tag.SERVE_REPAIR, SocketAddr.fromEndpoint(&peer_endpoint)); - try peer_contact_info.setSocket(socket_tag.TVU, SocketAddr.fromEndpoint(&peer_endpoint)); + try peer_contact_info.setSocket(socket_tag.TURBINE, SocketAddr.fromEndpoint(&peer_endpoint)); try gossip.insert(try SignedGossipData.initSigned(.{ .ContactInfo = peer_contact_info }, &peer_keypair), wallclock); // init service @@ -558,7 +558,7 @@ const TestPeerGenerator = struct { MissingSlot, /// There is no serve repair port specified in the peer's contact info MissingServeRepairPort, - /// There is no tvu port specified in the peer's contact info + /// There is no turbine port specified in the peer's contact info MissingTvuPort, /// The peer has a different shred version WrongShredVersion, @@ -575,7 +575,7 @@ const TestPeerGenerator = struct { try contact_info.setSocket(socket_tag.SERVE_REPAIR, serve_repair_addr); } if (peer_type != .MissingTvuPort) { - try contact_info.setSocket(socket_tag.TVU, SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 8004)); + try contact_info.setSocket(socket_tag.TURBINE, SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 8004)); } try self.gossip.insert(try SignedGossipData.initSigned(.{ .ContactInfo = contact_info }, &keypair), wallclock); switch (peer_type) { diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 6fc7dcfd8..6d3e137fe 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -30,7 +30,8 @@ const ShredReceiverMetrics = shred_collector.shred_receiver.ShredReceiverMetrics pub const ShredCollectorConfig = struct { start_slot: ?Slot, repair_port: u16, - tvu_port: u16, + /// tvu port in agave + turbine_port: u16, }; /// Resources that are required for the Shred Collector to operate. @@ -56,7 +57,7 @@ pub const ShredCollectorDependencies = struct { /// Returns a ServiceManager representing the Shred Collector. /// This can be used to join and deinit the Shred Collector. /// -/// Analogous to a subset of [Tvu::new](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/tvu.rs#L119) +/// Analogous to a subset of [Tvu::new](https://github.com/anza-xyz/agave/blob/8c5a33a81a0504fd25d0465bed35d153ff84819f/core/src/turbine.rs#L119) pub fn start( conf: ShredCollectorConfig, deps: ShredCollectorDependencies, @@ -65,7 +66,7 @@ pub fn start( var arena = service_manager.arena(); const repair_socket = try bindUdpReusable(conf.repair_port); - const tvu_socket = try bindUdpReusable(conf.tvu_port); + const turbine_socket = try bindUdpReusable(conf.turbine_port); // receiver (threads) const unverified_shred_channel = Channel(ArrayList(Packet)).init(deps.allocator, 1000); @@ -77,7 +78,7 @@ pub fn start( .exit = deps.exit, .logger = deps.logger, .repair_socket = repair_socket, - .tvu_socket = tvu_socket, + .turbine_socket = turbine_socket, .unverified_shred_sender = unverified_shred_channel, .shred_version = deps.my_shred_version, .metrics = try ShredReceiverMetrics.init(), diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index a4bf92421..6fcebe900 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -30,7 +30,7 @@ pub const ShredReceiver = struct { exit: *Atomic(bool), logger: Logger, repair_socket: Socket, - tvu_socket: Socket, + turbine_socket: Socket, /// me --> shred verifier unverified_shred_sender: *Channel(ArrayList(Packet)), shred_version: *const Atomic(u16), @@ -51,20 +51,20 @@ pub const ShredReceiver = struct { .initReceiver(self.allocator, self.logger, self.repair_socket, self.exit); defer repair_receiver.deinit(); - var tvu_receivers: [NUM_TVU_RECEIVERS]*Channel(ArrayList(Packet)) = undefined; + var turbine_receivers: [NUM_TVU_RECEIVERS]*Channel(ArrayList(Packet)) = undefined; for (0..NUM_TVU_RECEIVERS) |i| { - tvu_receivers[i] = (try SocketThread.initReceiver( + turbine_receivers[i] = (try SocketThread.initReceiver( self.allocator, self.logger, - self.tvu_socket, + self.turbine_socket, self.exit, )).channel; } - defer for (tvu_receivers) |r| r.deinit(); + defer for (turbine_receivers) |r| r.deinit(); const x = try std.Thread.spawn( .{}, Self.runPacketHandler, - .{ self, &tvu_receivers, response_sender.channel, false }, + .{ self, &turbine_receivers, response_sender.channel, false }, ); const y = try std.Thread.spawn( .{}, From a1c2f1597f7636e197621b093275373a9b7735fa Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 7 Jun 2024 11:26:18 -0400 Subject: [PATCH 49/51] feat(repair): short-circuit repair loop and log if there are no repair peers at all --- src/shred_collector/repair_service.zig | 53 ++++++++++++++++++++------ src/shred_collector/service.zig | 4 +- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 64ee697ce..3773b5c78 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -96,11 +96,29 @@ pub const RepairService = struct { self.report.deinit(); } - /// Used to run RepairService continuously. - pub const run_config = sig.utils.service_manager.RunConfig{ - .name = "Repair Service", - .min_loop_duration_ns = 100 * std.time.ns_per_ms, - }; + const min_loop_duration_ns = 100 * std.time.ns_per_ms; + + pub fn run(self: *Self) !void { + var waiting_for_peers = false; + var timer = try std.time.Timer.start(); + var last_iteration: u64 = 0; + while (!self.exit.load(.unordered)) { + if (self.sendNecessaryRepairs()) |_| { + if (waiting_for_peers) { + waiting_for_peers = false; + self.logger.infof("Acquired some repair peers.", .{}); + } + } else |e| switch (e) { + error.NoRepairPeers => if (!waiting_for_peers) { + self.logger.infof("Waiting for repair peers...", .{}); + waiting_for_peers = true; + }, + else => return e, + } + last_iteration = timer.lap(); + std.time.sleep(min_loop_duration_ns -| last_iteration); + } + } /// Identifies which repairs are needed based on the current state, /// and sends those repairs, then returns. @@ -331,9 +349,15 @@ pub const RepairPeerProvider = struct { self.cache.deinit(); } + pub const Error = error{ + /// There are no known peers at all that could handle any repair + /// request for any slot (not just the current desired slot). + NoRepairPeers, + } || Allocator.Error; + /// Selects a peer at random from gossip or cache that is expected /// to be able to handle a repair request for the specified slot. - pub fn getRandomPeer(self: *Self, slot: Slot) !?RepairPeer { + pub fn getRandomPeer(self: *Self, slot: Slot) Error!?RepairPeer { const peers = try self.getPeers(slot); if (peers.len == 0) return null; const index = self.rng.intRangeLessThan(usize, 0, peers.len); @@ -341,7 +365,7 @@ pub const RepairPeerProvider = struct { } /// Tries to get peers that could have the slot. Checks cache, falling back to gossip. - fn getPeers(self: *Self, slot: Slot) ![]RepairPeer { + fn getPeers(self: *Self, slot: Slot) Error![]RepairPeer { const now: u64 = @intCast(std.time.timestamp()); if (self.cache.get(slot)) |peers| { @@ -365,13 +389,14 @@ pub const RepairPeerProvider = struct { self: *Self, allocator: Allocator, slot: Slot, - ) error{OutOfMemory}![]RepairPeer { + ) Error![]RepairPeer { var gossip_table_lock = self.gossip_table_rw.read(); defer gossip_table_lock.unlock(); const gossip_table: *const GossipTable = gossip_table_lock.get(); const buf = try allocator.alloc(RepairPeer, gossip_table.contact_infos.count()); errdefer allocator.free(buf); - var i: usize = 0; + var potential_peers: usize = 0; // total count of all repair peers, not just the ones for this slot. + var compatible_peers: usize = 0; // number of peers who can handle this slot. var infos = gossip_table.contactInfoIterator(0); while (infos.next()) |info| { const serve_repair_socket = info.getSocket(socket_tag.SERVE_REPAIR); @@ -380,20 +405,24 @@ pub const RepairPeerProvider = struct { serve_repair_socket != null and // node must be able to receive repair requests info.getSocket(socket_tag.TURBINE) != null) // node needs access to shreds { + potential_peers += 1; // exclude nodes that are known to be missing this slot if (gossip_table.get(.{ .LowestSlot = info.pubkey })) |lsv| { if (lsv.value.data.LowestSlot[1].lowest > slot) { continue; } } - buf[i] = .{ + buf[compatible_peers] = .{ .pubkey = info.pubkey, .serve_repair_socket = serve_repair_socket.?, }; - i += 1; + compatible_peers += 1; } } - return try allocator.realloc(buf, i); + if (potential_peers == 0) { + return error.NoRepairPeers; + } + return try allocator.realloc(buf, compatible_peers); } }; diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 6d3e137fe..9229c1528 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -133,8 +133,8 @@ pub fn start( shred_tracker, ); try service_manager.spawn( - RepairService.run_config, - RepairService.sendNecessaryRepairs, + .{ .name = "Repair Service" }, + RepairService.run, .{repair_svc}, ); From 32c08136dbc729e34c92234bf7209d773c1c56a4 Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 7 Jun 2024 16:12:46 -0400 Subject: [PATCH 50/51] refactor: remove concept of package-privacy --- src/accountsdb/lib.zig | 40 ++++++++++---------- src/bloom/lib.zig | 12 +++--- src/core/lib.zig | 50 ++++++++++++------------- src/gossip/lib.zig | 34 ++++++++--------- src/net/lib.zig | 26 ++++++------- src/prometheus/lib.zig | 30 +++++++-------- src/shred_collector/lib.zig | 24 ++++++------ src/shred_collector/repair_service.zig | 2 +- src/shred_collector/service.zig | 2 +- src/shred_collector/shred_processor.zig | 2 +- src/shred_collector/shred_receiver.zig | 2 +- src/shred_collector/shred_tracker.zig | 2 +- src/shred_collector/shred_verifier.zig | 2 +- src/sync/lib.zig | 24 ++++++------ src/tests.zig | 2 +- src/trace/lib.zig | 16 ++++---- 16 files changed, 125 insertions(+), 145 deletions(-) diff --git a/src/accountsdb/lib.zig b/src/accountsdb/lib.zig index e5e50f3a3..7f74f182f 100644 --- a/src/accountsdb/lib.zig +++ b/src/accountsdb/lib.zig @@ -1,24 +1,22 @@ -pub const _private = struct { - pub const accounts_file = @import("accounts_file.zig"); - pub const bank = @import("bank.zig"); - pub const db = @import("db.zig"); - pub const download = @import("download.zig"); - pub const genesis_config = @import("genesis_config.zig"); - pub const index = @import("index.zig"); - pub const snapshots = @import("snapshots.zig"); - pub const sysvars = @import("sysvars.zig"); -}; +pub const accounts_file = @import("accounts_file.zig"); +pub const bank = @import("bank.zig"); +pub const db = @import("db.zig"); +pub const download = @import("download.zig"); +pub const genesis_config = @import("genesis_config.zig"); +pub const index = @import("index.zig"); +pub const snapshots = @import("snapshots.zig"); +pub const sysvars = @import("sysvars.zig"); -pub const AccountsDB = _private.db.AccountsDB; -pub const AccountsDBConfig = _private.db.AccountsDBConfig; -pub const AllSnapshotFields = _private.snapshots.AllSnapshotFields; -pub const Bank = _private.bank.Bank; -pub const GenesisConfig = _private.genesis_config.GenesisConfig; -pub const SnapshotFieldsAndPaths = _private.snapshots.SnapshotFieldsAndPaths; -pub const SnapshotFiles = _private.snapshots.SnapshotFiles; -pub const StatusCache = _private.snapshots.StatusCache; +pub const AccountsDB = db.AccountsDB; +pub const AccountsDBConfig = db.AccountsDBConfig; +pub const AllSnapshotFields = snapshots.AllSnapshotFields; +pub const Bank = bank.Bank; +pub const GenesisConfig = genesis_config.GenesisConfig; +pub const SnapshotFieldsAndPaths = snapshots.SnapshotFieldsAndPaths; +pub const SnapshotFiles = snapshots.SnapshotFiles; +pub const StatusCache = snapshots.StatusCache; -pub const downloadSnapshotsFromGossip = _private.download.downloadSnapshotsFromGossip; -pub const parallelUnpackZstdTarBall = _private.snapshots.parallelUnpackZstdTarBall; +pub const downloadSnapshotsFromGossip = download.downloadSnapshotsFromGossip; +pub const parallelUnpackZstdTarBall = snapshots.parallelUnpackZstdTarBall; -pub const ACCOUNT_INDEX_BINS = _private.db.ACCOUNT_INDEX_BINS; +pub const ACCOUNT_INDEX_BINS = db.ACCOUNT_INDEX_BINS; diff --git a/src/bloom/lib.zig b/src/bloom/lib.zig index 281cb69d0..2c900a387 100644 --- a/src/bloom/lib.zig +++ b/src/bloom/lib.zig @@ -1,8 +1,6 @@ -pub const _private = struct { - pub const bit_set = @import("bit_set.zig"); - pub const bit_vec = @import("bit_vec.zig"); - pub const bitvec = @import("bitvec.zig"); - pub const bloom = @import("bloom.zig"); -}; +pub const bit_set = @import("bit_set.zig"); +pub const bit_vec = @import("bit_vec.zig"); +pub const bitvec = @import("bitvec.zig"); +pub const bloom = @import("bloom.zig"); -pub const Bloom = _private.bloom.Bloom; +pub const Bloom = bloom.Bloom; diff --git a/src/core/lib.zig b/src/core/lib.zig index 4825f686c..c2d078d21 100644 --- a/src/core/lib.zig +++ b/src/core/lib.zig @@ -1,30 +1,28 @@ -pub const _private = struct { - pub const account = @import("account.zig"); - pub const hard_forks = @import("hard_forks.zig"); - pub const hash = @import("hash.zig"); - pub const pubkey = @import("pubkey.zig"); - pub const shred = @import("shred.zig"); - pub const signature = @import("signature.zig"); - pub const time = @import("time.zig"); - pub const transaction = @import("transaction.zig"); -}; +pub const account = @import("account.zig"); +pub const hard_forks = @import("hard_forks.zig"); +pub const hash = @import("hash.zig"); +pub const pubkey = @import("pubkey.zig"); +pub const shred = @import("shred.zig"); +pub const signature = @import("signature.zig"); +pub const time = @import("time.zig"); +pub const transaction = @import("transaction.zig"); -pub const Account = _private.account.Account; -pub const HardForks = _private.hard_forks.HardForks; -pub const HardFork = _private.hard_forks.HardFork; -pub const Hash = _private.hash.Hash; -pub const Nonce = _private.shred.Nonce; -pub const Pubkey = _private.pubkey.Pubkey; -pub const ShredVersion = _private.shred.ShredVersion; -pub const Signature = _private.signature.Signature; +pub const Account = account.Account; +pub const HardForks = hard_forks.HardForks; +pub const HardFork = hard_forks.HardFork; +pub const Hash = hash.Hash; +pub const Nonce = shred.Nonce; +pub const Pubkey = pubkey.Pubkey; +pub const ShredVersion = shred.ShredVersion; +pub const Signature = signature.Signature; -pub const Epoch = _private.time.Epoch; -pub const Slot = _private.time.Slot; +pub const Epoch = time.Epoch; +pub const Slot = time.Slot; -pub const CompiledInstruction = _private.transaction.CompiledInstruction; -pub const Message = _private.transaction.Message; -pub const MessageHeader = _private.transaction.MessageHeader; -pub const Transaction = _private.transaction.Transaction; +pub const CompiledInstruction = transaction.CompiledInstruction; +pub const Message = transaction.Message; +pub const MessageHeader = transaction.MessageHeader; +pub const Transaction = transaction.Transaction; -pub const SIGNATURE_LENGTH = _private.signature.SIGNATURE_LENGTH; -pub const HASH_SIZE = _private.hash.HASH_SIZE; +pub const SIGNATURE_LENGTH = signature.SIGNATURE_LENGTH; +pub const HASH_SIZE = hash.HASH_SIZE; diff --git a/src/gossip/lib.zig b/src/gossip/lib.zig index c2fbbaaed..8496893a3 100644 --- a/src/gossip/lib.zig +++ b/src/gossip/lib.zig @@ -1,26 +1,22 @@ -pub const _private = struct { - pub const active_set = @import("active_set.zig"); - pub const data = @import("data.zig"); - pub const dump_service = @import("dump_service.zig"); - pub const fuzz = @import("fuzz.zig"); - pub const message = @import("message.zig"); - pub const ping_pong = @import("ping_pong.zig"); - pub const pull_request = @import("pull_request.zig"); - pub const pull_response = @import("pull_response.zig"); - pub const service = @import("service.zig"); - pub const shards = @import("shards.zig"); - pub const table = @import("table.zig"); -}; - -pub const data = _private.data; +pub const active_set = @import("active_set.zig"); +pub const data = @import("data.zig"); +pub const dump_service = @import("dump_service.zig"); +pub const fuzz = @import("fuzz.zig"); +pub const message = @import("message.zig"); +pub const ping_pong = @import("ping_pong.zig"); +pub const pull_request = @import("pull_request.zig"); +pub const pull_response = @import("pull_response.zig"); +pub const service = @import("service.zig"); +pub const shards = @import("shards.zig"); +pub const table = @import("table.zig"); pub const ContactInfo = data.ContactInfo; -pub const GossipService = _private.service.GossipService; -pub const GossipTable = _private.table.GossipTable; +pub const GossipService = service.GossipService; +pub const GossipTable = table.GossipTable; pub const SignedGossipData = data.SignedGossipData; pub const LowestSlot = data.LowestSlot; -pub const Ping = _private.ping_pong.Ping; -pub const Pong = _private.ping_pong.Pong; +pub const Ping = ping_pong.Ping; +pub const Pong = ping_pong.Pong; pub const getWallclockMs = data.getWallclockMs; pub const socket_tag = data.socket_tag; diff --git a/src/net/lib.zig b/src/net/lib.zig index bba4ef988..8510fb754 100644 --- a/src/net/lib.zig +++ b/src/net/lib.zig @@ -1,17 +1,15 @@ -pub const _private = struct { - pub const net = @import("net.zig"); - pub const echo = @import("echo.zig"); - pub const packet = @import("packet.zig"); - pub const socket_utils = @import("socket_utils.zig"); -}; +pub const net = @import("net.zig"); +pub const echo = @import("echo.zig"); +pub const packet = @import("packet.zig"); +pub const socket_utils = @import("socket_utils.zig"); -pub const IpAddr = _private.net.IpAddr; -pub const SocketAddr = _private.net.SocketAddr; -pub const Packet = _private.packet.Packet; -pub const SocketThread = _private.socket_utils.SocketThread; +pub const IpAddr = net.IpAddr; +pub const SocketAddr = net.SocketAddr; +pub const Packet = packet.Packet; +pub const SocketThread = socket_utils.SocketThread; -pub const requestIpEcho = _private.echo.requestIpEcho; -pub const enablePortReuse = _private.net.enablePortReuse; -pub const endpointToString = _private.net.endpointToString; +pub const requestIpEcho = echo.requestIpEcho; +pub const enablePortReuse = net.enablePortReuse; +pub const endpointToString = net.endpointToString; -pub const SOCKET_TIMEOUT_US = _private.socket_utils.SOCKET_TIMEOUT_US; +pub const SOCKET_TIMEOUT_US = socket_utils.SOCKET_TIMEOUT_US; diff --git a/src/prometheus/lib.zig b/src/prometheus/lib.zig index 4aea291ae..2bee57b2f 100644 --- a/src/prometheus/lib.zig +++ b/src/prometheus/lib.zig @@ -1,18 +1,16 @@ -pub const _private = struct { - pub const counter = @import("counter.zig"); - pub const gauge_fn = @import("gauge_fn.zig"); - pub const gauge = @import("gauge.zig"); - pub const histogram = @import("histogram.zig"); - pub const http = @import("http.zig"); - pub const metric = @import("metric.zig"); - pub const registry = @import("registry.zig"); -}; +pub const counter = @import("counter.zig"); +pub const gauge_fn = @import("gauge_fn.zig"); +pub const gauge = @import("gauge.zig"); +pub const histogram = @import("histogram.zig"); +pub const http = @import("http.zig"); +pub const metric = @import("metric.zig"); +pub const registry = @import("registry.zig"); -pub const Counter = _private.counter.Counter; -pub const GaugeFn = _private.gauge_fn.GaugeFn; -pub const Gauge = _private.gauge.Gauge; -pub const Histogram = _private.histogram.Histogram; -pub const Registry = _private.registry.Registry; +pub const Counter = counter.Counter; +pub const GaugeFn = gauge_fn.GaugeFn; +pub const Gauge = gauge.Gauge; +pub const Histogram = histogram.Histogram; +pub const Registry = registry.Registry; -pub const globalRegistry = _private.registry.globalRegistry; -pub const servePrometheus = _private.http.servePrometheus; +pub const globalRegistry = registry.globalRegistry; +pub const servePrometheus = http.servePrometheus; diff --git a/src/shred_collector/lib.zig b/src/shred_collector/lib.zig index faf7797f3..e423268c1 100644 --- a/src/shred_collector/lib.zig +++ b/src/shred_collector/lib.zig @@ -1,15 +1,13 @@ -pub const _private = struct { - pub const repair_message = @import("repair_message.zig"); - pub const repair_service = @import("repair_service.zig"); - pub const service = @import("service.zig"); - pub const shred_processor = @import("shred_processor.zig"); - pub const shred_receiver = @import("shred_receiver.zig"); - pub const shred_tracker = @import("shred_tracker.zig"); - pub const shred_verifier = @import("shred_verifier.zig"); - pub const shred = @import("shred.zig"); -}; +pub const repair_message = @import("repair_message.zig"); +pub const repair_service = @import("repair_service.zig"); +pub const service = @import("service.zig"); +pub const shred_processor = @import("shred_processor.zig"); +pub const shred_receiver = @import("shred_receiver.zig"); +pub const shred_tracker = @import("shred_tracker.zig"); +pub const shred_verifier = @import("shred_verifier.zig"); +pub const shred = @import("shred.zig"); -pub const ShredCollectorConfig = _private.service.ShredCollectorConfig; -pub const ShredCollectorDependencies = _private.service.ShredCollectorDependencies; +pub const ShredCollectorConfig = service.ShredCollectorConfig; +pub const ShredCollectorDependencies = service.ShredCollectorDependencies; -pub const start = _private.service.start; +pub const start = service.start; diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 3773b5c78..4498f9de8 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -1,7 +1,7 @@ const std = @import("std"); const zig_network = @import("zig-network"); const sig = @import("../lib.zig"); -const shred_collector = @import("lib.zig")._private; +const shred_collector = @import("lib.zig"); const bincode = sig.bincode; const socket_tag = sig.gossip.socket_tag; diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 9229c1528..14ed3557f 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -1,7 +1,7 @@ const std = @import("std"); const network = @import("zig-network"); const sig = @import("../lib.zig"); -const shred_collector = @import("lib.zig")._private; +const shred_collector = @import("lib.zig"); const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; diff --git a/src/shred_collector/shred_processor.zig b/src/shred_collector/shred_processor.zig index 7afdc8973..5c1445da6 100644 --- a/src/shred_collector/shred_processor.zig +++ b/src/shred_collector/shred_processor.zig @@ -1,6 +1,6 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const shred_collector = @import("lib.zig")._private; +const shred_collector = @import("lib.zig"); const layout = shred_collector.shred.layout; diff --git a/src/shred_collector/shred_receiver.zig b/src/shred_collector/shred_receiver.zig index 6fcebe900..8db46fcce 100644 --- a/src/shred_collector/shred_receiver.zig +++ b/src/shred_collector/shred_receiver.zig @@ -1,7 +1,7 @@ const std = @import("std"); const network = @import("zig-network"); const sig = @import("../lib.zig"); -const shred_collector = @import("lib.zig")._private; +const shred_collector = @import("lib.zig"); const bincode = sig.bincode; const layout = shred_collector.shred.layout; diff --git a/src/shred_collector/shred_tracker.zig b/src/shred_collector/shred_tracker.zig index ba65c56fc..159508263 100644 --- a/src/shred_collector/shred_tracker.zig +++ b/src/shred_collector/shred_tracker.zig @@ -1,6 +1,6 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const shred_collector = @import("lib.zig")._private; +const shred_collector = @import("lib.zig"); const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; diff --git a/src/shred_collector/shred_verifier.zig b/src/shred_collector/shred_verifier.zig index fef2099e3..c1e32f0dd 100644 --- a/src/shred_collector/shred_verifier.zig +++ b/src/shred_collector/shred_verifier.zig @@ -1,6 +1,6 @@ const std = @import("std"); const sig = @import("../lib.zig"); -const shred_collector = @import("lib.zig")._private; +const shred_collector = @import("lib.zig"); const shred_layout = shred_collector.shred.layout; diff --git a/src/sync/lib.zig b/src/sync/lib.zig index 89e71ef13..a08160772 100644 --- a/src/sync/lib.zig +++ b/src/sync/lib.zig @@ -1,15 +1,13 @@ -pub const _private = struct { - pub const channel = @import("channel.zig"); - pub const mpmc = @import("mpmc.zig"); - pub const ref = @import("ref.zig"); - pub const mux = @import("mux.zig"); - pub const once_cell = @import("once_cell.zig"); - pub const thread_pool = @import("thread_pool.zig"); -}; +pub const channel = @import("channel.zig"); +pub const mpmc = @import("mpmc.zig"); +pub const ref = @import("ref.zig"); +pub const mux = @import("mux.zig"); +pub const once_cell = @import("once_cell.zig"); +pub const thread_pool = @import("thread_pool.zig"); -pub const Channel = _private.channel.Channel; -pub const Mux = _private.mux.Mux; -pub const RwMux = _private.mux.RwMux; +pub const Channel = channel.Channel; +pub const Mux = mux.Mux; +pub const RwMux = mux.RwMux; -pub const OnceCell = _private.once_cell.OnceCell; -pub const ThreadPool = _private.thread_pool.ThreadPool; +pub const OnceCell = once_cell.OnceCell; +pub const ThreadPool = thread_pool.ThreadPool; diff --git a/src/tests.zig b/src/tests.zig index 830d54c51..45190ffdc 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -8,7 +8,7 @@ test { logger.default_logger.* = logger.Logger.init(allocator, .debug); std.testing.log_level = std.log.Level.err; - refAllDeclsRecursive(lib, 3); + refAllDeclsRecursive(lib, 2); } /// Like std.testing.refAllDeclsRecursive, except: diff --git a/src/trace/lib.zig b/src/trace/lib.zig index 8fe17abc1..f7bd708c5 100644 --- a/src/trace/lib.zig +++ b/src/trace/lib.zig @@ -1,10 +1,8 @@ -pub const _private = struct { - pub const entry = @import("entry.zig"); - pub const field = @import("field.zig"); - pub const level = @import("level.zig"); - pub const log = @import("log.zig"); - pub const logfmt = @import("logfmt.zig"); -}; +pub const entry = @import("entry.zig"); +pub const field = @import("field.zig"); +pub const level = @import("level.zig"); +pub const log = @import("log.zig"); +pub const logfmt = @import("logfmt.zig"); -pub const Logger = _private.log.Logger; -pub const Level = _private.level.Level; +pub const Logger = log.Logger; +pub const Level = level.Level; From 99ff6cda5103d2a279977630f06e7cba2c0932ea Mon Sep 17 00:00:00 2001 From: Drew Nutter Date: Fri, 7 Jun 2024 16:37:01 -0400 Subject: [PATCH 51/51] refactor(gossip, shred-collector): rename turbine_port to turbine_recv_port --- src/cmd/cmd.zig | 10 +++---- src/cmd/config.zig | 2 +- src/gossip/data.zig | 36 +++++++++++++------------- src/gossip/message.zig | 8 +++--- src/shred_collector/repair_service.zig | 6 ++--- src/shred_collector/service.zig | 4 +-- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/cmd/cmd.zig b/src/cmd/cmd.zig index 8a455dbc2..c86197bf7 100644 --- a/src/cmd/cmd.zig +++ b/src/cmd/cmd.zig @@ -91,10 +91,10 @@ var repair_port_option = cli.Option{ .value_name = "Repair Port", }; -var turbine_port_option = cli.Option{ +var turbine_recv_port_option = cli.Option{ .long_name = "turbine-port", .help = "The port to run turbine shred listener (aka TVU port) - default: 8003", - .value_ref = cli.mkRef(&config.current.shred_collector.turbine_port), + .value_ref = cli.mkRef(&config.current.shred_collector.turbine_recv_port), .required = false, .value_name = "Turbine Port", }; @@ -302,7 +302,7 @@ var app = &cli.App{ &gossip_spy_node_option, &gossip_dump_option, // repair - &turbine_port_option, + &turbine_recv_port_option, &repair_port_option, &test_repair_option, // accounts-db @@ -405,7 +405,7 @@ fn validator() !void { const ip_echo_data = try getMyDataFromIpEcho(logger, entrypoints.items); const repair_port: u16 = config.current.shred_collector.repair_port; - const turbine_port: u16 = config.current.shred_collector.repair_port; + const turbine_recv_port: u16 = config.current.shred_collector.repair_port; // gossip var gossip_service = try initGossip( @@ -417,7 +417,7 @@ fn validator() !void { ip_echo_data.ip, &.{ .{ .tag = socket_tag.REPAIR, .port = repair_port }, - .{ .tag = socket_tag.TURBINE, .port = turbine_port }, + .{ .tag = socket_tag.TURBINE_RECV, .port = turbine_recv_port }, }, ); defer gossip_service.deinit(); diff --git a/src/cmd/config.zig b/src/cmd/config.zig index a5725d8c3..67f06c6ea 100644 --- a/src/cmd/config.zig +++ b/src/cmd/config.zig @@ -26,7 +26,7 @@ const GossipConfig = struct { }; const shred_collector_defaults = ShredCollectorConfig{ - .turbine_port = 8002, + .turbine_recv_port = 8002, .repair_port = 8003, .start_slot = null, }; diff --git a/src/gossip/data.zig b/src/gossip/data.zig index 7c3de51a8..db9980185 100644 --- a/src/gossip/data.zig +++ b/src/gossip/data.zig @@ -470,10 +470,10 @@ pub const LegacyContactInfo = struct { gossip: SocketAddr, /// address to connect to for replication /// analogous to `tvu` in agave - turbine: SocketAddr, + turbine_recv: SocketAddr, /// address to forward shreds to /// analogous to `tvu_quic` in agave - turbine_quic: SocketAddr, + turbine_recv_quic: SocketAddr, /// address to send repair responses to repair: SocketAddr, /// transactions address @@ -504,8 +504,8 @@ pub const LegacyContactInfo = struct { return LegacyContactInfo{ .id = id, .gossip = unspecified_addr, - .turbine = unspecified_addr, - .turbine_quic = unspecified_addr, + .turbine_recv = unspecified_addr, + .turbine_recv_quic = unspecified_addr, .repair = unspecified_addr, .tpu = unspecified_addr, .tpu_forwards = unspecified_addr, @@ -522,8 +522,8 @@ pub const LegacyContactInfo = struct { return LegacyContactInfo{ .id = Pubkey.random(rng), .gossip = SocketAddr.random(rng), - .turbine = SocketAddr.random(rng), - .turbine_quic = SocketAddr.random(rng), + .turbine_recv = SocketAddr.random(rng), + .turbine_recv_quic = SocketAddr.random(rng), .repair = SocketAddr.random(rng), .tpu = SocketAddr.random(rng), .tpu_forwards = SocketAddr.random(rng), @@ -540,8 +540,8 @@ pub const LegacyContactInfo = struct { pub fn toContactInfo(self: *const LegacyContactInfo, allocator: std.mem.Allocator) !ContactInfo { var ci = ContactInfo.init(allocator, self.id, self.wallclock, self.shred_version); try ci.setSocket(socket_tag.GOSSIP, self.gossip); - try ci.setSocket(socket_tag.TURBINE, self.turbine); - try ci.setSocket(socket_tag.TURBINE_QUIC, self.turbine_quic); + try ci.setSocket(socket_tag.TURBINE_RECV, self.turbine_recv); + try ci.setSocket(socket_tag.TURBINE_RECV_QUIC, self.turbine_recv_quic); try ci.setSocket(socket_tag.REPAIR, self.repair); try ci.setSocket(socket_tag.TPU, self.tpu); try ci.setSocket(socket_tag.TPU_FORWARDS, self.tpu_forwards); @@ -556,8 +556,8 @@ pub const LegacyContactInfo = struct { return .{ .id = ci.pubkey, .gossip = ci.getSocket(socket_tag.GOSSIP) orelse SocketAddr.UNSPECIFIED, - .turbine = ci.getSocket(socket_tag.TURBINE) orelse SocketAddr.UNSPECIFIED, - .turbine_quic = ci.getSocket(socket_tag.TURBINE_QUIC) orelse SocketAddr.UNSPECIFIED, + .turbine_recv = ci.getSocket(socket_tag.TURBINE_RECV) orelse SocketAddr.UNSPECIFIED, + .turbine_recv_quic = ci.getSocket(socket_tag.TURBINE_RECV_QUIC) orelse SocketAddr.UNSPECIFIED, .repair = ci.getSocket(socket_tag.REPAIR) orelse SocketAddr.UNSPECIFIED, .tpu = ci.getSocket(socket_tag.TPU) orelse SocketAddr.UNSPECIFIED, .tpu_forwards = ci.getSocket(socket_tag.TPU_FORWARDS) orelse SocketAddr.UNSPECIFIED, @@ -991,11 +991,11 @@ pub const socket_tag = struct { pub const TPU_QUIC: u8 = 8; pub const TPU_VOTE: u8 = 9; /// Analogous to [SOCKET_TAG_TVU](https://github.com/anza-xyz/agave/blob/0d34a1a160129c4293dac248e14231e9e773b4ce/gossip/src/contact_info.rs#L36) - pub const TURBINE: u8 = 10; + pub const TURBINE_RECV: u8 = 10; /// Analogous to [SOCKET_TAG_TVU_QUIC](https://github.com/anza-xyz/agave/blob/0d34a1a160129c4293dac248e14231e9e773b4ce/gossip/src/contact_info.rs#L37) - pub const TURBINE_QUIC: u8 = 11; + pub const TURBINE_RECV_QUIC: u8 = 11; }; -pub const SOCKET_CACHE_SIZE: usize = socket_tag.TURBINE_QUIC + 1; +pub const SOCKET_CACHE_SIZE: usize = socket_tag.TURBINE_RECV_QUIC + 1; pub const ContactInfo = struct { pubkey: Pubkey, @@ -1198,15 +1198,15 @@ const NodePort = union(enum) { tpu_forwards_quic: network.EndPoint, tpu_quic: network.EndPoint, tpu_vote: network.EndPoint, - turbine: network.EndPoint, - turbine_quic: network.EndPoint, + turbine_recv: network.EndPoint, + turbine_recv_quic: network.EndPoint, }; const Sockets = struct { gossip: UdpSocket, ip_echo: ?TcpListener, - turbine: ArrayList(UdpSocket), - turbine_quic: ArrayList(UdpSocket), + turbine_recv: ArrayList(UdpSocket), + turbine_recv_quic: ArrayList(UdpSocket), tpu: ArrayList(UdpSocket), tpu_forwards: ArrayList(UdpSocket), tpu_vote: ArrayList(UdpSocket), @@ -1220,7 +1220,7 @@ const Sockets = struct { }; pub const SocketEntry = struct { - key: u8, // GossipMessageidentifier, e.g. turbine, tpu, etc + key: u8, // GossipMessageidentifier, e.g. turbine_recv, tpu, etc index: u8, // IpAddr index in the accompanying addrs vector. offset: u16, // Port offset with respect to the previous entry. diff --git a/src/gossip/message.zig b/src/gossip/message.zig index 5cc8aac17..47d8490f5 100644 --- a/src/gossip/message.zig +++ b/src/gossip/message.zig @@ -307,8 +307,8 @@ test "gossip.message: pull request serializes and deserializes" { const legacy_contact_info = LegacyContactInfo{ .id = pubkey, .gossip = gossip_addr, - .turbine = unspecified_addr, - .turbine_quic = unspecified_addr, + .turbine_recv = unspecified_addr, + .turbine_recv_quic = unspecified_addr, .repair = unspecified_addr, .tpu = unspecified_addr, .tpu_forwards = unspecified_addr, @@ -354,8 +354,8 @@ test "gossip.message: push message serializes and deserializes correctly" { const legacy_contact_info = LegacyContactInfo{ .id = id, .gossip = gossip_addr, - .turbine = unspecified_addr, - .turbine_quic = unspecified_addr, + .turbine_recv = unspecified_addr, + .turbine_recv_quic = unspecified_addr, .repair = unspecified_addr, .tpu = unspecified_addr, .tpu_forwards = unspecified_addr, diff --git a/src/shred_collector/repair_service.zig b/src/shred_collector/repair_service.zig index 4498f9de8..3af14a305 100644 --- a/src/shred_collector/repair_service.zig +++ b/src/shred_collector/repair_service.zig @@ -403,7 +403,7 @@ pub const RepairPeerProvider = struct { if (!info.pubkey.equals(&self.my_pubkey) and // don't request from self info.shred_version == self.my_shred_version.load(.monotonic) and // need compatible shreds serve_repair_socket != null and // node must be able to receive repair requests - info.getSocket(socket_tag.TURBINE) != null) // node needs access to shreds + info.getSocket(socket_tag.TURBINE_RECV) != null) // node needs access to shreds { potential_peers += 1; // exclude nodes that are known to be missing this slot @@ -460,7 +460,7 @@ test "RepairService sends repair request to gossip peer" { try peer_socket.setReadTimeout(100_000); var peer_contact_info = ContactInfo.init(allocator, Pubkey.fromPublicKey(&peer_keypair.public_key), wallclock, my_shred_version.load(.unordered)); try peer_contact_info.setSocket(socket_tag.SERVE_REPAIR, SocketAddr.fromEndpoint(&peer_endpoint)); - try peer_contact_info.setSocket(socket_tag.TURBINE, SocketAddr.fromEndpoint(&peer_endpoint)); + try peer_contact_info.setSocket(socket_tag.TURBINE_RECV, SocketAddr.fromEndpoint(&peer_endpoint)); try gossip.insert(try SignedGossipData.initSigned(.{ .ContactInfo = peer_contact_info }, &peer_keypair), wallclock); // init service @@ -604,7 +604,7 @@ const TestPeerGenerator = struct { try contact_info.setSocket(socket_tag.SERVE_REPAIR, serve_repair_addr); } if (peer_type != .MissingTvuPort) { - try contact_info.setSocket(socket_tag.TURBINE, SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 8004)); + try contact_info.setSocket(socket_tag.TURBINE_RECV, SocketAddr.initIpv4(.{ 127, 0, 0, 1 }, 8004)); } try self.gossip.insert(try SignedGossipData.initSigned(.{ .ContactInfo = contact_info }, &keypair), wallclock); switch (peer_type) { diff --git a/src/shred_collector/service.zig b/src/shred_collector/service.zig index 14ed3557f..3838f5506 100644 --- a/src/shred_collector/service.zig +++ b/src/shred_collector/service.zig @@ -31,7 +31,7 @@ pub const ShredCollectorConfig = struct { start_slot: ?Slot, repair_port: u16, /// tvu port in agave - turbine_port: u16, + turbine_recv_port: u16, }; /// Resources that are required for the Shred Collector to operate. @@ -66,7 +66,7 @@ pub fn start( var arena = service_manager.arena(); const repair_socket = try bindUdpReusable(conf.repair_port); - const turbine_socket = try bindUdpReusable(conf.turbine_port); + const turbine_socket = try bindUdpReusable(conf.turbine_recv_port); // receiver (threads) const unverified_shred_channel = Channel(ArrayList(Packet)).init(deps.allocator, 1000);