diff --git a/lib/std/compress/deflate/huffman_code.zig b/lib/std/compress/deflate/huffman_code.zig index 4827feb24562..689ac1441a95 100644 --- a/lib/std/compress/deflate/huffman_code.zig +++ b/lib/std/compress/deflate/huffman_code.zig @@ -93,7 +93,7 @@ pub const HuffmanEncoder = struct { return; } self.lfs = list; - sort.sort(LiteralNode, self.lfs, {}, byFreq); + mem.sort(LiteralNode, self.lfs, {}, byFreq); // Get the number of literals for each bit count var bit_count = self.bitCounts(list, max_bits); @@ -270,7 +270,7 @@ pub const HuffmanEncoder = struct { var chunk = list[list.len - @intCast(u32, bits) ..]; self.lns = chunk; - sort.sort(LiteralNode, self.lns, {}, byLiteral); + mem.sort(LiteralNode, self.lns, {}, byLiteral); for (chunk) |node| { self.codes[node.literal] = HuffCode{ diff --git a/lib/std/compress/zstandard/decode/fse.zig b/lib/std/compress/zstandard/decode/fse.zig index 741fd81cccd8..232af39ccfe5 100644 --- a/lib/std/compress/zstandard/decode/fse.zig +++ b/lib/std/compress/zstandard/decode/fse.zig @@ -107,7 +107,7 @@ fn buildFseTable(values: []const u16, entries: []Table.Fse) !void { position &= entries.len - 1; } } - std.sort.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); + std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); for (0..probability) |i| { entries[temp_states[i]] = if (i < double_state_count) Table.Fse{ .symbol = @intCast(u8, symbol), diff --git a/lib/std/compress/zstandard/decode/huffman.zig b/lib/std/compress/zstandard/decode/huffman.zig index 291419826831..f5e977d0dadd 100644 --- a/lib/std/compress/zstandard/decode/huffman.zig +++ b/lib/std/compress/zstandard/decode/huffman.zig @@ -124,7 +124,7 @@ fn assignSymbols(weight_sorted_prefixed_symbols: []LiteralsSection.HuffmanTree.P }; } - std.sort.sort( + std.mem.sort( LiteralsSection.HuffmanTree.PrefixedSymbol, weight_sorted_prefixed_symbols, weights, diff --git a/lib/std/comptime_string_map.zig b/lib/std/comptime_string_map.zig index 7620ec7af8af..e6859c32c15d 100644 --- a/lib/std/comptime_string_map.zig +++ b/lib/std/comptime_string_map.zig @@ -28,7 +28,7 @@ pub fn ComptimeStringMap(comptime V: type, comptime kvs_list: anytype) type { sorted_kvs[i] = .{ .key = kv.@"0", .value = {} }; } } - std.sort.sort(KV, &sorted_kvs, {}, lenAsc); + mem.sort(KV, &sorted_kvs, {}, lenAsc); const min_len = sorted_kvs[0].key.len; const max_len = sorted_kvs[sorted_kvs.len - 1].key.len; var len_indexes: [max_len + 1]usize = undefined; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index ecc1a9f0cf39..005c2b540428 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1211,7 +1211,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn // Even though lld emits symbols in ascending order, this debug code // should work for programs linked in any valid way. // This sort is so that we can binary search later. - std.sort.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); + mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); return ModuleDebugInfo{ .base_address = undefined, diff --git a/lib/std/enums.zig b/lib/std/enums.zig index aa6edd60b175..757c616b9bcb 100644 --- a/lib/std/enums.zig +++ b/lib/std/enums.zig @@ -1314,7 +1314,7 @@ pub fn EnumIndexer(comptime E: type) type { } }; } - std.sort.sort(EnumField, &fields, {}, ascByValue); + std.mem.sort(EnumField, &fields, {}, ascByValue); const min = fields[0].value; const max = fields[fields.len - 1].value; const fields_len = fields.len; diff --git a/lib/std/http/Headers.zig b/lib/std/http/Headers.zig index 429df9368a15..fb7a9360d823 100644 --- a/lib/std/http/Headers.zig +++ b/lib/std/http/Headers.zig @@ -191,7 +191,7 @@ pub const Headers = struct { /// Sorts the headers in lexicographical order. pub fn sort(headers: *Headers) void { - std.sort.sort(Field, headers.list.items, {}, Field.lessThan); + std.mem.sort(Field, headers.list.items, {}, Field.lessThan); headers.rebuildIndex(); } diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 8cb2c00a3af2..1840a86819eb 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -566,6 +566,34 @@ test "zeroInit" { }, nested_baz); } +pub fn sort( + comptime T: type, + items: []T, + context: anytype, + comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + std.sort.block(T, items, context, lessThanFn); +} + +pub fn sortUnstable( + comptime T: type, + items: []T, + context: anytype, + comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + std.sort.pdq(T, items, context, lessThanFn); +} + +/// TODO: currently this just calls `insertionSortContext`. The block sort implementation +/// in this file needs to be adapted to use the sort context. +pub fn sortContext(length: usize, context: anytype) void { + std.sort.insertionContext(length, context); +} + +pub fn sortUnstableContext(length: usize, context: anytype) void { + std.sort.pdqContext(length, context); +} + /// Compares two slices of numbers lexicographically. O(n). pub fn order(comptime T: type, lhs: []const T, rhs: []const T) math.Order { const n = math.min(lhs.len, rhs.len); diff --git a/lib/std/meta.zig b/lib/std/meta.zig index 8adba2439aa5..d0b07b934fd6 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -985,7 +985,7 @@ pub fn declList(comptime Namespace: type, comptime Decl: type) []const *const De for (decls, 0..) |decl, i| { array[i] = &@field(Namespace, decl.name); } - std.sort.sort(*const Decl, &array, {}, S.declNameLessThan); + mem.sort(*const Decl, &array, {}, S.declNameLessThan); return &array; } } diff --git a/lib/std/multi_array_list.zig b/lib/std/multi_array_list.zig index 322471bedffe..5cd5f573d965 100644 --- a/lib/std/multi_array_list.zig +++ b/lib/std/multi_array_list.zig @@ -160,7 +160,7 @@ pub fn MultiArrayList(comptime T: type) type { return lhs.alignment > rhs.alignment; } }; - std.sort.sort(Data, &data, {}, Sort.lessThan); + mem.sort(Data, &data, {}, Sort.lessThan); var sizes_bytes: [fields.len]usize = undefined; var field_indexes: [fields.len]usize = undefined; for (data, 0..) |elem, i| { @@ -469,29 +469,19 @@ pub fn MultiArrayList(comptime T: type) type { /// `ctx` has the following method: /// `fn lessThan(ctx: @TypeOf(ctx), a_index: usize, b_index: usize) bool` pub fn sort(self: Self, ctx: anytype) void { - const SortContext = struct { - sub_ctx: @TypeOf(ctx), - slice: Slice, - - pub fn swap(sc: @This(), a_index: usize, b_index: usize) void { - inline for (fields, 0..) |field_info, i| { + var i: usize = 1; + while (i < self.len) : (i += 1) { + var j: usize = i; + while (j > 0 and ctx.lessThan(j, j - 1)) : (j -= 1) { + inline for (fields, 0..) |field_info, field_index| { if (@sizeOf(field_info.type) != 0) { - const field = @intToEnum(Field, i); - const ptr = sc.slice.items(field); - mem.swap(field_info.type, &ptr[a_index], &ptr[b_index]); + const field = @intToEnum(Field, field_index); + const ptr = self.items(field); + mem.swap(field_info.type, &ptr[j], &ptr[j - 1]); } } } - - pub fn lessThan(sc: @This(), a_index: usize, b_index: usize) bool { - return sc.sub_ctx.lessThan(a_index, b_index); - } - }; - - std.sort.sortContext(self.len, SortContext{ - .sub_ctx = ctx, - .slice = self.slice(), - }); + } } fn capacityInBytes(capacity: usize) usize { diff --git a/lib/std/net.zig b/lib/std/net.zig index 57e50a7349f7..7629ecc8f793 100644 --- a/lib/std/net.zig +++ b/lib/std/net.zig @@ -1082,7 +1082,7 @@ fn linuxLookupName( key |= (MAXADDRS - @intCast(i32, i)) << DAS_ORDER_SHIFT; addr.sortkey = key; } - std.sort.sort(LookupAddr, addrs.items, {}, addrCmpLessThan); + mem.sort(LookupAddr, addrs.items, {}, addrCmpLessThan); } const Policy = struct { diff --git a/lib/std/sort.zig b/lib/std/sort.zig index 3e219b8566b4..bf2bf40f89ed 100644 --- a/lib/std/sort.zig +++ b/lib/std/sort.zig @@ -4,1241 +4,152 @@ const testing = std.testing; const mem = std.mem; const math = std.math; -pub fn binarySearch( - comptime T: type, - key: anytype, - items: []const T, - context: anytype, - comptime compareFn: fn (context: @TypeOf(context), key: @TypeOf(key), mid_item: T) math.Order, -) ?usize { - var left: usize = 0; - var right: usize = items.len; - - while (left < right) { - // Avoid overflowing in the midpoint calculation - const mid = left + (right - left) / 2; - // Compare the key with the midpoint element - switch (compareFn(context, key, items[mid])) { - .eq => return mid, - .gt => left = mid + 1, - .lt => right = mid, - } - } - - return null; -} - -test "binarySearch" { - const S = struct { - fn order_u32(context: void, lhs: u32, rhs: u32) math.Order { - _ = context; - return math.order(lhs, rhs); - } - fn order_i32(context: void, lhs: i32, rhs: i32) math.Order { - _ = context; - return math.order(lhs, rhs); - } - }; - try testing.expectEqual( - @as(?usize, null), - binarySearch(u32, @as(u32, 1), &[_]u32{}, {}, S.order_u32), - ); - try testing.expectEqual( - @as(?usize, 0), - binarySearch(u32, @as(u32, 1), &[_]u32{1}, {}, S.order_u32), - ); - try testing.expectEqual( - @as(?usize, null), - binarySearch(u32, @as(u32, 1), &[_]u32{0}, {}, S.order_u32), - ); - try testing.expectEqual( - @as(?usize, null), - binarySearch(u32, @as(u32, 0), &[_]u32{1}, {}, S.order_u32), - ); - try testing.expectEqual( - @as(?usize, 4), - binarySearch(u32, @as(u32, 5), &[_]u32{ 1, 2, 3, 4, 5 }, {}, S.order_u32), - ); - try testing.expectEqual( - @as(?usize, 0), - binarySearch(u32, @as(u32, 2), &[_]u32{ 2, 4, 8, 16, 32, 64 }, {}, S.order_u32), - ); - try testing.expectEqual( - @as(?usize, 1), - binarySearch(i32, @as(i32, -4), &[_]i32{ -7, -4, 0, 9, 10 }, {}, S.order_i32), - ); - try testing.expectEqual( - @as(?usize, 3), - binarySearch(i32, @as(i32, 98), &[_]i32{ -100, -25, 2, 98, 99, 100 }, {}, S.order_i32), - ); - const R = struct { - b: i32, - e: i32, - - fn r(b: i32, e: i32) @This() { - return @This(){ .b = b, .e = e }; - } - - fn order(context: void, key: i32, mid_item: @This()) math.Order { - _ = context; - - if (key < mid_item.b) { - return .lt; - } - - if (key > mid_item.e) { - return .gt; - } - - return .eq; - } - }; - try testing.expectEqual( - @as(?usize, null), - binarySearch(R, @as(i32, -45), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order), - ); - try testing.expectEqual( - @as(?usize, 2), - binarySearch(R, @as(i32, 10), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order), - ); - try testing.expectEqual( - @as(?usize, 1), - binarySearch(R, @as(i32, -20), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order), - ); -} +pub const block = @import("sort/block.zig").block; +pub const pdq = @import("sort/pdq.zig").pdq; +pub const pdqContext = @import("sort/pdq.zig").pdqContext; /// Stable in-place sort. O(n) best case, O(pow(n, 2)) worst case. /// O(1) memory (no allocator required). /// Sorts in ascending order with respect to the given `lessThan` function. -/// This can be expressed in terms of `insertionSortContext` but the glue -/// code is slightly longer than the direct implementation. -pub fn insertionSort( +pub fn insertion( comptime T: type, items: []T, context: anytype, - comptime lessThan: fn (context: @TypeOf(context), lhs: T, rhs: T) bool, + comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) void { - var i: usize = 1; - while (i < items.len) : (i += 1) { - const x = items[i]; - var j: usize = i; - while (j > 0 and lessThan(context, x, items[j - 1])) : (j -= 1) { - items[j] = items[j - 1]; + const Context = struct { + items: []T, + sub_ctx: @TypeOf(context), + + pub fn lessThan(ctx: @This(), a: usize, b: usize) bool { + return lessThanFn(ctx.sub_ctx, ctx.items[a], ctx.items[b]); } - items[j] = x; - } + + pub fn swap(ctx: @This(), a: usize, b: usize) void { + return mem.swap(T, &ctx.items[a], &ctx.items[b]); + } + }; + insertionContext(0, items.len, Context{ .items = items, .sub_ctx = context }); } /// Stable in-place sort. O(n) best case, O(pow(n, 2)) worst case. /// O(1) memory (no allocator required). -/// Sorts in ascending order with respect to the given `context.lessThan` function. -pub fn insertionSortContext(len: usize, context: anytype) void { - var i: usize = 1; - while (i < len) : (i += 1) { - var j: usize = i; - while (j > 0 and context.lessThan(j, j - 1)) : (j -= 1) { +/// Sorts in ascending order with respect to the given `lessThan` function. +pub fn insertionContext(a: usize, b: usize, context: anytype) void { + var i = a + 1; + while (i < b) : (i += 1) { + var j = i; + while (j > a and context.lessThan(j, j - 1)) : (j -= 1) { context.swap(j, j - 1); } } } -const Range = struct { - start: usize, - end: usize, - - fn init(start: usize, end: usize) Range { - return Range{ - .start = start, - .end = end, - }; - } - - fn length(self: Range) usize { - return self.end - self.start; - } -}; - -const Iterator = struct { - size: usize, - power_of_two: usize, - numerator: usize, - decimal: usize, - denominator: usize, - decimal_step: usize, - numerator_step: usize, - - fn init(size2: usize, min_level: usize) Iterator { - const power_of_two = math.floorPowerOfTwo(usize, size2); - const denominator = power_of_two / min_level; - return Iterator{ - .numerator = 0, - .decimal = 0, - .size = size2, - .power_of_two = power_of_two, - .denominator = denominator, - .decimal_step = size2 / denominator, - .numerator_step = size2 % denominator, - }; - } - - fn begin(self: *Iterator) void { - self.numerator = 0; - self.decimal = 0; - } - - fn nextRange(self: *Iterator) Range { - const start = self.decimal; - - self.decimal += self.decimal_step; - self.numerator += self.numerator_step; - if (self.numerator >= self.denominator) { - self.numerator -= self.denominator; - self.decimal += 1; - } - - return Range{ - .start = start, - .end = self.decimal, - }; - } - - fn finished(self: *Iterator) bool { - return self.decimal >= self.size; - } - - fn nextLevel(self: *Iterator) bool { - self.decimal_step += self.decimal_step; - self.numerator_step += self.numerator_step; - if (self.numerator_step >= self.denominator) { - self.numerator_step -= self.denominator; - self.decimal_step += 1; - } - - return (self.decimal_step < self.size); - } - - fn length(self: *Iterator) usize { - return self.decimal_step; - } -}; - -const Pull = struct { - from: usize, - to: usize, - count: usize, - range: Range, -}; - -/// Stable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. +/// Unstable in-place sort. O(n*log(n)) best case, worst case and average case. /// O(1) memory (no allocator required). /// Sorts in ascending order with respect to the given `lessThan` function. -/// Currently implemented as block sort. -pub fn sort( +pub fn heap( comptime T: type, items: []T, context: anytype, - comptime lessThan: fn (context: @TypeOf(context), lhs: T, rhs: T) bool, + comptime lessThanFn: fn (@TypeOf(context), lhs: T, rhs: T) bool, ) void { + const Context = struct { + items: []T, + sub_ctx: @TypeOf(context), - // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c - var cache: [512]T = undefined; - - if (items.len < 4) { - if (items.len == 3) { - // hard coded insertion sort - if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); - if (lessThan(context, items[2], items[1])) { - mem.swap(T, &items[1], &items[2]); - if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); - } - } else if (items.len == 2) { - if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + pub fn lessThan(ctx: @This(), a: usize, b: usize) bool { + return lessThanFn(ctx.sub_ctx, ctx.items[a], ctx.items[b]); } - return; - } - - // sort groups of 4-8 items at a time using an unstable sorting network, - // but keep track of the original item orders to force it to be stable - // http://pages.ripco.net/~jgamble/nw.html - var iterator = Iterator.init(items.len, 4); - while (!iterator.finished()) { - var order = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7 }; - const range = iterator.nextRange(); - - const sliced_items = items[range.start..]; - switch (range.length()) { - 8 => { - swap(T, sliced_items, context, lessThan, &order, 0, 1); - swap(T, sliced_items, context, lessThan, &order, 2, 3); - swap(T, sliced_items, context, lessThan, &order, 4, 5); - swap(T, sliced_items, context, lessThan, &order, 6, 7); - swap(T, sliced_items, context, lessThan, &order, 0, 2); - swap(T, sliced_items, context, lessThan, &order, 1, 3); - swap(T, sliced_items, context, lessThan, &order, 4, 6); - swap(T, sliced_items, context, lessThan, &order, 5, 7); - swap(T, sliced_items, context, lessThan, &order, 1, 2); - swap(T, sliced_items, context, lessThan, &order, 5, 6); - swap(T, sliced_items, context, lessThan, &order, 0, 4); - swap(T, sliced_items, context, lessThan, &order, 3, 7); - swap(T, sliced_items, context, lessThan, &order, 1, 5); - swap(T, sliced_items, context, lessThan, &order, 2, 6); - swap(T, sliced_items, context, lessThan, &order, 1, 4); - swap(T, sliced_items, context, lessThan, &order, 3, 6); - swap(T, sliced_items, context, lessThan, &order, 2, 4); - swap(T, sliced_items, context, lessThan, &order, 3, 5); - swap(T, sliced_items, context, lessThan, &order, 3, 4); - }, - 7 => { - swap(T, sliced_items, context, lessThan, &order, 1, 2); - swap(T, sliced_items, context, lessThan, &order, 3, 4); - swap(T, sliced_items, context, lessThan, &order, 5, 6); - swap(T, sliced_items, context, lessThan, &order, 0, 2); - swap(T, sliced_items, context, lessThan, &order, 3, 5); - swap(T, sliced_items, context, lessThan, &order, 4, 6); - swap(T, sliced_items, context, lessThan, &order, 0, 1); - swap(T, sliced_items, context, lessThan, &order, 4, 5); - swap(T, sliced_items, context, lessThan, &order, 2, 6); - swap(T, sliced_items, context, lessThan, &order, 0, 4); - swap(T, sliced_items, context, lessThan, &order, 1, 5); - swap(T, sliced_items, context, lessThan, &order, 0, 3); - swap(T, sliced_items, context, lessThan, &order, 2, 5); - swap(T, sliced_items, context, lessThan, &order, 1, 3); - swap(T, sliced_items, context, lessThan, &order, 2, 4); - swap(T, sliced_items, context, lessThan, &order, 2, 3); - }, - 6 => { - swap(T, sliced_items, context, lessThan, &order, 1, 2); - swap(T, sliced_items, context, lessThan, &order, 4, 5); - swap(T, sliced_items, context, lessThan, &order, 0, 2); - swap(T, sliced_items, context, lessThan, &order, 3, 5); - swap(T, sliced_items, context, lessThan, &order, 0, 1); - swap(T, sliced_items, context, lessThan, &order, 3, 4); - swap(T, sliced_items, context, lessThan, &order, 2, 5); - swap(T, sliced_items, context, lessThan, &order, 0, 3); - swap(T, sliced_items, context, lessThan, &order, 1, 4); - swap(T, sliced_items, context, lessThan, &order, 2, 4); - swap(T, sliced_items, context, lessThan, &order, 1, 3); - swap(T, sliced_items, context, lessThan, &order, 2, 3); - }, - 5 => { - swap(T, sliced_items, context, lessThan, &order, 0, 1); - swap(T, sliced_items, context, lessThan, &order, 3, 4); - swap(T, sliced_items, context, lessThan, &order, 2, 4); - swap(T, sliced_items, context, lessThan, &order, 2, 3); - swap(T, sliced_items, context, lessThan, &order, 1, 4); - swap(T, sliced_items, context, lessThan, &order, 0, 3); - swap(T, sliced_items, context, lessThan, &order, 0, 2); - swap(T, sliced_items, context, lessThan, &order, 1, 3); - swap(T, sliced_items, context, lessThan, &order, 1, 2); - }, - 4 => { - swap(T, sliced_items, context, lessThan, &order, 0, 1); - swap(T, sliced_items, context, lessThan, &order, 2, 3); - swap(T, sliced_items, context, lessThan, &order, 0, 2); - swap(T, sliced_items, context, lessThan, &order, 1, 3); - swap(T, sliced_items, context, lessThan, &order, 1, 2); - }, - else => {}, - } - } - if (items.len < 8) return; - - // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc. - while (true) { - // if every A and B block will fit into the cache, use a special branch - // specifically for merging with the cache - // (we use < rather than <= since the block size might be one more than - // iterator.length()) - if (iterator.length() < cache.len) { - // if four subarrays fit into the cache, it's faster to merge both - // pairs of subarrays into the cache, - // then merge the two merged subarrays from the cache back into the original array - if ((iterator.length() + 1) * 4 <= cache.len and iterator.length() * 4 <= items.len) { - iterator.begin(); - while (!iterator.finished()) { - // merge A1 and B1 into the cache - var A1 = iterator.nextRange(); - var B1 = iterator.nextRange(); - var A2 = iterator.nextRange(); - var B2 = iterator.nextRange(); - - if (lessThan(context, items[B1.end - 1], items[A1.start])) { - // the two ranges are in reverse order, so copy them in reverse order into the cache - const a1_items = items[A1.start..A1.end]; - @memcpy(cache[B1.length()..][0..a1_items.len], a1_items); - const b1_items = items[B1.start..B1.end]; - @memcpy(cache[0..b1_items.len], b1_items); - } else if (lessThan(context, items[B1.start], items[A1.end - 1])) { - // these two ranges weren't already in order, so merge them into the cache - mergeInto(T, items, A1, B1, context, lessThan, cache[0..]); - } else { - // if A1, B1, A2, and B2 are all in order, skip doing anything else - if (!lessThan(context, items[B2.start], items[A2.end - 1]) and !lessThan(context, items[A2.start], items[B1.end - 1])) continue; - - // copy A1 and B1 into the cache in the same order - const a1_items = items[A1.start..A1.end]; - @memcpy(cache[0..a1_items.len], a1_items); - const b1_items = items[B1.start..B1.end]; - @memcpy(cache[A1.length()..][0..b1_items.len], b1_items); - } - A1 = Range.init(A1.start, B1.end); - - // merge A2 and B2 into the cache - if (lessThan(context, items[B2.end - 1], items[A2.start])) { - // the two ranges are in reverse order, so copy them in reverse order into the cache - const a2_items = items[A2.start..A2.end]; - @memcpy(cache[A1.length() + B2.length() ..][0..a2_items.len], a2_items); - const b2_items = items[B2.start..B2.end]; - @memcpy(cache[A1.length()..][0..b2_items.len], b2_items); - } else if (lessThan(context, items[B2.start], items[A2.end - 1])) { - // these two ranges weren't already in order, so merge them into the cache - mergeInto(T, items, A2, B2, context, lessThan, cache[A1.length()..]); - } else { - // copy A2 and B2 into the cache in the same order - const a2_items = items[A2.start..A2.end]; - @memcpy(cache[A1.length()..][0..a2_items.len], a2_items); - const b2_items = items[B2.start..B2.end]; - @memcpy(cache[A1.length() + A2.length() ..][0..b2_items.len], b2_items); - } - A2 = Range.init(A2.start, B2.end); - - // merge A1 and A2 from the cache into the items - const A3 = Range.init(0, A1.length()); - const B3 = Range.init(A1.length(), A1.length() + A2.length()); - - if (lessThan(context, cache[B3.end - 1], cache[A3.start])) { - // the two ranges are in reverse order, so copy them in reverse order into the items - const a3_items = cache[A3.start..A3.end]; - @memcpy(items[A1.start + A2.length() ..][0..a3_items.len], a3_items); - const b3_items = cache[B3.start..B3.end]; - @memcpy(items[A1.start..][0..b3_items.len], b3_items); - } else if (lessThan(context, cache[B3.start], cache[A3.end - 1])) { - // these two ranges weren't already in order, so merge them back into the items - mergeInto(T, cache[0..], A3, B3, context, lessThan, items[A1.start..]); - } else { - // copy A3 and B3 into the items in the same order - const a3_items = cache[A3.start..A3.end]; - @memcpy(items[A1.start..][0..a3_items.len], a3_items); - const b3_items = cache[B3.start..B3.end]; - @memcpy(items[A1.start + A1.length() ..][0..b3_items.len], b3_items); - } - } - - // we merged two levels at the same time, so we're done with this level already - // (iterator.nextLevel() is called again at the bottom of this outer merge loop) - _ = iterator.nextLevel(); - } else { - iterator.begin(); - while (!iterator.finished()) { - var A = iterator.nextRange(); - var B = iterator.nextRange(); - - if (lessThan(context, items[B.end - 1], items[A.start])) { - // the two ranges are in reverse order, so a simple rotation should fix it - mem.rotate(T, items[A.start..B.end], A.length()); - } else if (lessThan(context, items[B.start], items[A.end - 1])) { - // these two ranges weren't already in order, so we'll need to merge them! - const a_items = items[A.start..A.end]; - @memcpy(cache[0..a_items.len], a_items); - mergeExternal(T, items, A, B, context, lessThan, cache[0..]); - } - } - } - } else { - // this is where the in-place merge logic starts! - // 1. pull out two internal buffers each containing √A unique values - // 1a. adjust block_size and buffer_size if we couldn't find enough unique values - // 2. loop over the A and B subarrays within this level of the merge sort - // 3. break A and B into blocks of size 'block_size' - // 4. "tag" each of the A blocks with values from the first internal buffer - // 5. roll the A blocks through the B blocks and drop/rotate them where they belong - // 6. merge each A block with any B values that follow, using the cache or the second internal buffer - // 7. sort the second internal buffer if it exists - // 8. redistribute the two internal buffers back into the items - var block_size: usize = math.sqrt(iterator.length()); - var buffer_size = iterator.length() / block_size + 1; - - // as an optimization, we really only need to pull out the internal buffers once for each level of merges - // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level - var A: Range = undefined; - var B: Range = undefined; - var index: usize = 0; - var last: usize = 0; - var count: usize = 0; - var find: usize = 0; - var start: usize = 0; - var pull_index: usize = 0; - var pull = [_]Pull{ - Pull{ - .from = 0, - .to = 0, - .count = 0, - .range = Range.init(0, 0), - }, - Pull{ - .from = 0, - .to = 0, - .count = 0, - .range = Range.init(0, 0), - }, - }; - - var buffer1 = Range.init(0, 0); - var buffer2 = Range.init(0, 0); - - // find two internal buffers of size 'buffer_size' each - find = buffer_size + buffer_size; - var find_separately = false; - - if (block_size <= cache.len) { - // if every A block fits into the cache then we won't need the second internal buffer, - // so we really only need to find 'buffer_size' unique values - find = buffer_size; - } else if (find > iterator.length()) { - // we can't fit both buffers into the same A or B subarray, so find two buffers separately - find = buffer_size; - find_separately = true; - } - - // we need to find either a single contiguous space containing 2√A unique values (which will be split up into two buffers of size √A each), - // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values, - // OR if we couldn't find that many unique values, we need the largest possible buffer we can get - - // in the case where it couldn't find a single buffer of at least √A unique values, - // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace) - iterator.begin(); - while (!iterator.finished()) { - A = iterator.nextRange(); - B = iterator.nextRange(); - - // just store information about where the values will be pulled from and to, - // as well as how many values there are, to create the two internal buffers - - // check A for the number of unique values we need to fill an internal buffer - // these values will be pulled out to the start of A - last = A.start; - count = 1; - while (count < find) : ({ - last = index; - count += 1; - }) { - index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), context, lessThan, find - count); - if (index == A.end) break; - } - index = last; - - if (count >= buffer_size) { - // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffer - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = A.start, - }; - pull_index = 1; - - if (count == buffer_size + buffer_size) { - // we were able to find a single contiguous section containing 2√A unique values, - // so this section can be used to contain both of the internal buffers we'll need - buffer1 = Range.init(A.start, A.start + buffer_size); - buffer2 = Range.init(A.start + buffer_size, A.start + count); - break; - } else if (find == buffer_size + buffer_size) { - // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, - // so we still need to find a second separate buffer of at least √A unique values - buffer1 = Range.init(A.start, A.start + count); - find = buffer_size; - } else if (block_size <= cache.len) { - // we found the first and only internal buffer that we need, so we're done! - buffer1 = Range.init(A.start, A.start + count); - break; - } else if (find_separately) { - // found one buffer, but now find the other one - buffer1 = Range.init(A.start, A.start + count); - find_separately = false; - } else { - // we found a second buffer in an 'A' subarray containing √A unique values, so we're done! - buffer2 = Range.init(A.start, A.start + count); - break; - } - } else if (pull_index == 0 and count > buffer1.length()) { - // keep track of the largest buffer we were able to find - buffer1 = Range.init(A.start, A.start + count); - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = A.start, - }; - } - - // check B for the number of unique values we need to fill an internal buffer - // these values will be pulled out to the end of B - last = B.end - 1; - count = 1; - while (count < find) : ({ - last = index - 1; - count += 1; - }) { - index = findFirstBackward(T, items, items[last], Range.init(B.start, last), context, lessThan, find - count); - if (index == B.start) break; - } - index = last; - if (count >= buffer_size) { - // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffe - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = B.end, - }; - pull_index = 1; - - if (count == buffer_size + buffer_size) { - // we were able to find a single contiguous section containing 2√A unique values, - // so this section can be used to contain both of the internal buffers we'll need - buffer1 = Range.init(B.end - count, B.end - buffer_size); - buffer2 = Range.init(B.end - buffer_size, B.end); - break; - } else if (find == buffer_size + buffer_size) { - // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, - // so we still need to find a second separate buffer of at least √A unique values - buffer1 = Range.init(B.end - count, B.end); - find = buffer_size; - } else if (block_size <= cache.len) { - // we found the first and only internal buffer that we need, so we're done! - buffer1 = Range.init(B.end - count, B.end); - break; - } else if (find_separately) { - // found one buffer, but now find the other one - buffer1 = Range.init(B.end - count, B.end); - find_separately = false; - } else { - // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray, - // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2 - if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count; - - // we found a second buffer in an 'B' subarray containing √A unique values, so we're done! - buffer2 = Range.init(B.end - count, B.end); - break; - } - } else if (pull_index == 0 and count > buffer1.length()) { - // keep track of the largest buffer we were able to find - buffer1 = Range.init(B.end - count, B.end); - pull[pull_index] = Pull{ - .range = Range.init(A.start, B.end), - .count = count, - .from = index, - .to = B.end, - }; - } - } - - // pull out the two ranges so we can use them as internal buffers - pull_index = 0; - while (pull_index < 2) : (pull_index += 1) { - const length = pull[pull_index].count; - - if (pull[pull_index].to < pull[pull_index].from) { - // we're pulling the values out to the left, which means the start of an A subarray - index = pull[pull_index].from; - count = 1; - while (count < length) : (count += 1) { - index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), context, lessThan, length - count); - const range = Range.init(index + 1, pull[pull_index].from + 1); - mem.rotate(T, items[range.start..range.end], range.length() - count); - pull[pull_index].from = index + count; - } - } else if (pull[pull_index].to > pull[pull_index].from) { - // we're pulling values out to the right, which means the end of a B subarray - index = pull[pull_index].from + 1; - count = 1; - while (count < length) : (count += 1) { - index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), context, lessThan, length - count); - const range = Range.init(pull[pull_index].from, index - 1); - mem.rotate(T, items[range.start..range.end], count); - pull[pull_index].from = index - 1 - count; - } - } - } - - // adjust block_size and buffer_size based on the values we were able to pull out - buffer_size = buffer1.length(); - block_size = iterator.length() / buffer_size + 1; - - // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks, - // so this was originally here to test the math for adjusting block_size above - // assert((iterator.length() + 1)/block_size <= buffer_size); - - // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort! - iterator.begin(); - while (!iterator.finished()) { - A = iterator.nextRange(); - B = iterator.nextRange(); - - // remove any parts of A or B that are being used by the internal buffers - start = A.start; - if (start == pull[0].range.start) { - if (pull[0].from > pull[0].to) { - A.start += pull[0].count; - - // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge - // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4, - // which also only happens when cache.len is small or 0 since it'd otherwise use MergeExternal - if (A.length() == 0) continue; - } else if (pull[0].from < pull[0].to) { - B.end -= pull[0].count; - if (B.length() == 0) continue; - } - } - if (start == pull[1].range.start) { - if (pull[1].from > pull[1].to) { - A.start += pull[1].count; - if (A.length() == 0) continue; - } else if (pull[1].from < pull[1].to) { - B.end -= pull[1].count; - if (B.length() == 0) continue; - } - } - - if (lessThan(context, items[B.end - 1], items[A.start])) { - // the two ranges are in reverse order, so a simple rotation should fix it - mem.rotate(T, items[A.start..B.end], A.length()); - } else if (lessThan(context, items[A.end], items[A.end - 1])) { - // these two ranges weren't already in order, so we'll need to merge them! - var findA: usize = undefined; - - // break the remainder of A into blocks. firstA is the uneven-sized first A block - var blockA = Range.init(A.start, A.end); - var firstA = Range.init(A.start, A.start + blockA.length() % block_size); - - // swap the first value of each A block with the value in buffer1 - var indexA = buffer1.start; - index = firstA.end; - while (index < blockA.end) : ({ - indexA += 1; - index += block_size; - }) { - mem.swap(T, &items[indexA], &items[index]); - } - - // start rolling the A blocks through the B blocks! - // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well - var lastA = firstA; - var lastB = Range.init(0, 0); - var blockB = Range.init(B.start, B.start + math.min(block_size, B.length())); - blockA.start += firstA.length(); - indexA = buffer1.start; - - // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it - // otherwise, if the second buffer is available, block swap the contents into that - if (lastA.length() <= cache.len) { - const last_a_items = items[lastA.start..lastA.end]; - @memcpy(cache[0..last_a_items.len], last_a_items); - } else if (buffer2.length() > 0) { - blockSwap(T, items, lastA.start, buffer2.start, lastA.length()); - } - - if (blockA.length() > 0) { - while (true) { - // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, - // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. - if ((lastB.length() > 0 and !lessThan(context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { - // figure out where to split the previous B block, and rotate it at the split - const B_split = binaryFirst(T, items, items[indexA], lastB, context, lessThan); - const B_remaining = lastB.end - B_split; - - // swap the minimum A block to the beginning of the rolling A blocks - var minA = blockA.start; - findA = minA + block_size; - while (findA < blockA.end) : (findA += block_size) { - if (lessThan(context, items[findA], items[minA])) { - minA = findA; - } - } - blockSwap(T, items, blockA.start, minA, block_size); - - // swap the first item of the previous A block back with its original value, which is stored in buffer1 - mem.swap(T, &items[blockA.start], &items[indexA]); - indexA += 1; - - // locally merge the previous A block with the B values that follow it - // if lastA fits into the external cache we'll use that (with MergeExternal), - // or if the second internal buffer exists we'll use that (with MergeInternal), - // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) - - if (lastA.length() <= cache.len) { - mergeExternal(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan, cache[0..]); - } else if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan, buffer2); - } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan); - } - - if (buffer2.length() > 0 or block_size <= cache.len) { - // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway - if (block_size <= cache.len) { - @memcpy(cache[0..block_size], items[blockA.start..][0..block_size]); - } else { - blockSwap(T, items, blockA.start, buffer2.start, block_size); - } - - // this is equivalent to rotating, but faster - // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it - // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs - blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining); - } else { - // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation - mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); - } - - // update the range for the remaining A blocks, and the range remaining from the B block after it was split - lastA = Range.init(blockA.start - B_remaining, blockA.start - B_remaining + block_size); - lastB = Range.init(lastA.end, lastA.end + B_remaining); - - // if there are no more A blocks remaining, this step is finished! - blockA.start += block_size; - if (blockA.length() == 0) break; - } else if (blockB.length() < block_size) { - // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation - // the cache is disabled here since it might contain the contents of the previous A block - mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start); - - lastB = Range.init(blockA.start, blockA.start + blockB.length()); - blockA.start += blockB.length(); - blockA.end += blockB.length(); - blockB.end = blockB.start; - } else { - // roll the leftmost A block to the end by swapping it with the next B block - blockSwap(T, items, blockA.start, blockB.start, block_size); - lastB = Range.init(blockA.start, blockA.start + block_size); - - blockA.start += block_size; - blockA.end += block_size; - blockB.start += block_size; - - if (blockB.end > B.end - block_size) { - blockB.end = B.end; - } else { - blockB.end += block_size; - } - } - } - } - - // merge the last A block with the remaining B values - if (lastA.length() <= cache.len) { - mergeExternal(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan, cache[0..]); - } else if (buffer2.length() > 0) { - mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan, buffer2); - } else { - mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan); - } - } - } - - // when we're finished with this merge step we should have the one - // or two internal buffers left over, where the second buffer is all jumbled up - // insertion sort the second buffer, then redistribute the buffers - // back into the items using the opposite process used for creating the buffer - - // while an unstable sort like quicksort could be applied here, in benchmarks - // it was consistently slightly slower than a simple insertion sort, - // even for tens of millions of items. this may be because insertion - // sort is quite fast when the data is already somewhat sorted, like it is here - insertionSort(T, items[buffer2.start..buffer2.end], context, lessThan); - - pull_index = 0; - while (pull_index < 2) : (pull_index += 1) { - var unique = pull[pull_index].count * 2; - if (pull[pull_index].from > pull[pull_index].to) { - // the values were pulled out to the left, so redistribute them back to the right - var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); - while (buffer.length() > 0) { - index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), context, lessThan, unique); - const amount = index - buffer.end; - mem.rotate(T, items[buffer.start..index], buffer.length()); - buffer.start += (amount + 1); - buffer.end += amount; - unique -= 2; - } - } else if (pull[pull_index].from < pull[pull_index].to) { - // the values were pulled out to the right, so redistribute them back to the left - var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); - while (buffer.length() > 0) { - index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), context, lessThan, unique); - const amount = buffer.start - index; - mem.rotate(T, items[index..buffer.end], amount); - buffer.start -= amount; - buffer.end -= (amount + 1); - unique -= 2; - } - } - } + pub fn swap(ctx: @This(), a: usize, b: usize) void { + return mem.swap(T, &ctx.items[a], &ctx.items[b]); } - - // double the size of each A and B subarray that will be merged in the next level - if (!iterator.nextLevel()) break; - } -} - -/// TODO currently this just calls `insertionSortContext`. The block sort implementation -/// in this file needs to be adapted to use the sort context. -pub fn sortContext(len: usize, context: anytype) void { - return insertionSortContext(len, context); -} - -// merge operation without a buffer -fn mergeInPlace( - comptime T: type, - items: []T, - A_arg: Range, - B_arg: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, -) void { - if (A_arg.length() == 0 or B_arg.length() == 0) return; - - // this just repeatedly binary searches into B and rotates A into position. - // the paper suggests using the 'rotation-based Hwang and Lin algorithm' here, - // but I decided to stick with this because it had better situational performance - // - // (Hwang and Lin is designed for merging subarrays of very different sizes, - // but WikiSort almost always uses subarrays that are roughly the same size) - // - // normally this is incredibly suboptimal, but this function is only called - // when none of the A or B blocks in any subarray contained 2√A unique values, - // which places a hard limit on the number of times this will ACTUALLY need - // to binary search and rotate. - // - // according to my analysis the worst case is √A rotations performed on √A items - // once the constant factors are removed, which ends up being O(n) - // - // again, this is NOT a general-purpose solution – it only works well in this case! - // kind of like how the O(n^2) insertion sort is used in some places - - var A = A_arg; - var B = B_arg; - - while (true) { - // find the first place in B where the first item in A needs to be inserted - const mid = binaryFirst(T, items, items[A.start], B, context, lessThan); - - // rotate A into place - const amount = mid - A.end; - mem.rotate(T, items[A.start..mid], A.length()); - if (B.end == mid) break; - - // calculate the new A and B ranges - B.start = mid; - A = Range.init(A.start + amount, B.start); - A.start = binaryLast(T, items, items[A.start], A, context, lessThan); - if (A.length() == 0) break; - } -} - -// merge operation using an internal buffer -fn mergeInternal( - comptime T: type, - items: []T, - A: Range, - B: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - buffer: Range, -) void { - // whenever we find a value to add to the final array, swap it with the value that's already in that spot - // when this algorithm is finished, 'buffer' will contain its original contents, but in a different order - var A_count: usize = 0; - var B_count: usize = 0; - var insert: usize = 0; - - if (B.length() > 0 and A.length() > 0) { - while (true) { - if (!lessThan(context, items[B.start + B_count], items[buffer.start + A_count])) { - mem.swap(T, &items[A.start + insert], &items[buffer.start + A_count]); - A_count += 1; - insert += 1; - if (A_count >= A.length()) break; - } else { - mem.swap(T, &items[A.start + insert], &items[B.start + B_count]); - B_count += 1; - insert += 1; - if (B_count >= B.length()) break; - } - } - } - - // swap the remainder of A into the final array - blockSwap(T, items, buffer.start + A_count, A.start + insert, A.length() - A_count); -} - -fn blockSwap(comptime T: type, items: []T, start1: usize, start2: usize, block_size: usize) void { - var index: usize = 0; - while (index < block_size) : (index += 1) { - mem.swap(T, &items[start1 + index], &items[start2 + index]); - } -} - -// combine a linear search with a binary search to reduce the number of comparisons in situations -// where have some idea as to how many unique values there are and where the next value might be -fn findFirstForward( - comptime T: type, - items: []T, - value: T, - range: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - unique: usize, -) usize { - if (range.length() == 0) return range.start; - const skip = math.max(range.length() / unique, @as(usize, 1)); - - var index = range.start + skip; - while (lessThan(context, items[index - 1], value)) : (index += skip) { - if (index >= range.end - skip) { - return binaryFirst(T, items, value, Range.init(index, range.end), context, lessThan); - } - } - - return binaryFirst(T, items, value, Range.init(index - skip, index), context, lessThan); -} - -fn findFirstBackward( - comptime T: type, - items: []T, - value: T, - range: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - unique: usize, -) usize { - if (range.length() == 0) return range.start; - const skip = math.max(range.length() / unique, @as(usize, 1)); - - var index = range.end - skip; - while (index > range.start and !lessThan(context, items[index - 1], value)) : (index -= skip) { - if (index < range.start + skip) { - return binaryFirst(T, items, value, Range.init(range.start, index), context, lessThan); - } - } - - return binaryFirst(T, items, value, Range.init(index, index + skip), context, lessThan); -} - -fn findLastForward( - comptime T: type, - items: []T, - value: T, - range: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - unique: usize, -) usize { - if (range.length() == 0) return range.start; - const skip = math.max(range.length() / unique, @as(usize, 1)); - - var index = range.start + skip; - while (!lessThan(context, value, items[index - 1])) : (index += skip) { - if (index >= range.end - skip) { - return binaryLast(T, items, value, Range.init(index, range.end), context, lessThan); - } - } - - return binaryLast(T, items, value, Range.init(index - skip, index), context, lessThan); -} - -fn findLastBackward( - comptime T: type, - items: []T, - value: T, - range: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - unique: usize, -) usize { - if (range.length() == 0) return range.start; - const skip = math.max(range.length() / unique, @as(usize, 1)); - - var index = range.end - skip; - while (index > range.start and lessThan(context, value, items[index - 1])) : (index -= skip) { - if (index < range.start + skip) { - return binaryLast(T, items, value, Range.init(range.start, index), context, lessThan); - } - } - - return binaryLast(T, items, value, Range.init(index, index + skip), context, lessThan); + }; + heapContext(0, items.len, Context{ .items = items, .sub_ctx = context }); } -fn binaryFirst( - comptime T: type, - items: []T, - value: T, - range: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, -) usize { - var curr = range.start; - var size = range.length(); - if (range.start >= range.end) return range.end; - while (size > 0) { - const offset = size % 2; - - size /= 2; - const mid_item = items[curr + size]; - if (lessThan(context, mid_item, value)) { - curr += size + offset; - } +/// Unstable in-place sort. O(n*log(n)) best case, worst case and average case. +/// O(1) memory (no allocator required). +/// Sorts in ascending order with respect to the given `lessThan` function. +pub fn heapContext(a: usize, b: usize, context: anytype) void { + // build the heap in linear time. + var i = b / 2; + while (i > a) : (i -= 1) { + siftDown(i - 1, b, context); } - return curr; -} - -fn binaryLast( - comptime T: type, - items: []T, - value: T, - range: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, -) usize { - var curr = range.start; - var size = range.length(); - if (range.start >= range.end) return range.end; - while (size > 0) { - const offset = size % 2; - size /= 2; - const mid_item = items[curr + size]; - if (!lessThan(context, value, mid_item)) { - curr += size + offset; - } + // pop maximal elements from the heap. + i = b; + while (i > a) : (i -= 1) { + context.swap(a, i - 1); + siftDown(a, i - 1, context); } - return curr; } -fn mergeInto( - comptime T: type, - from: []T, - A: Range, - B: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - into: []T, -) void { - var A_index: usize = A.start; - var B_index: usize = B.start; - const A_last = A.end; - const B_last = B.end; - var insert_index: usize = 0; - +fn siftDown(root: usize, n: usize, context: anytype) void { + var node = root; while (true) { - if (!lessThan(context, from[B_index], from[A_index])) { - into[insert_index] = from[A_index]; - A_index += 1; - insert_index += 1; - if (A_index == A_last) { - // copy the remainder of B into the final array - const from_b = from[B_index..B_last]; - @memcpy(into[insert_index..][0..from_b.len], from_b); - break; - } - } else { - into[insert_index] = from[B_index]; - B_index += 1; - insert_index += 1; - if (B_index == B_last) { - // copy the remainder of A into the final array - const from_a = from[A_index..A_last]; - @memcpy(into[insert_index..][0..from_a.len], from_a); - break; - } - } - } -} - -fn mergeExternal( - comptime T: type, - items: []T, - A: Range, - B: Range, - context: anytype, - comptime lessThan: fn (@TypeOf(context), T, T) bool, - cache: []T, -) void { - // A fits into the cache, so use that instead of the internal buffer - var A_index: usize = 0; - var B_index: usize = B.start; - var insert_index: usize = A.start; - const A_last = A.length(); - const B_last = B.end; + var child = 2 * node + 1; + if (child >= n) break; - if (B.length() > 0 and A.length() > 0) { - while (true) { - if (!lessThan(context, items[B_index], cache[A_index])) { - items[insert_index] = cache[A_index]; - A_index += 1; - insert_index += 1; - if (A_index == A_last) break; - } else { - items[insert_index] = items[B_index]; - B_index += 1; - insert_index += 1; - if (B_index == B_last) break; - } + // choose the greater child. + if (child + 1 < n and context.lessThan(child, child + 1)) { + child += 1; } - } - // copy the remainder of A into the final array - const cache_a = cache[A_index..A_last]; - @memcpy(items[insert_index..][0..cache_a.len], cache_a); -} + // stop if the invariant holds at `node`. + if (!context.lessThan(node, child)) break; -fn swap( - comptime T: type, - items: []T, - context: anytype, - comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, - order: *[8]u8, - x: usize, - y: usize, -) void { - if (lessThan(context, items[y], items[x]) or ((order.*)[x] > (order.*)[y] and !lessThan(context, items[x], items[y]))) { - mem.swap(T, &items[x], &items[y]); - mem.swap(u8, &(order.*)[x], &(order.*)[y]); + // swap `node` with the greater child, + // move one step down, and continue sifting. + context.swap(node, child); + node = child; } } -/// Use to generate a comparator function for a given type. e.g. `sort(u8, slice, {}, comptime asc(u8))`. +/// Use to generate a comparator function for a given type. e.g. `sort(u8, slice, {}, asc(u8))`. pub fn asc(comptime T: type) fn (void, T, T) bool { - const impl = struct { - fn inner(context: void, a: T, b: T) bool { - _ = context; + return struct { + pub fn inner(_: void, a: T, b: T) bool { return a < b; } - }; - - return impl.inner; + }.inner; } -/// Use to generate a comparator function for a given type. e.g. `sort(u8, slice, {}, comptime desc(u8))`. +/// Use to generate a comparator function for a given type. e.g. `sort(u8, slice, {}, desc(u8))`. pub fn desc(comptime T: type) fn (void, T, T) bool { - const impl = struct { - fn inner(context: void, a: T, b: T) bool { - _ = context; + return struct { + pub fn inner(_: void, a: T, b: T) bool { return a > b; } - }; - - return impl.inner; + }.inner; } +const asc_u8 = asc(u8); +const asc_i32 = asc(i32); +const desc_u8 = desc(u8); +const desc_i32 = desc(i32); + +const sort_funcs = &[_]fn (comptime type, anytype, anytype, comptime anytype) void{ + block, + pdq, + insertion, + heap, +}; + +const IdAndValue = struct { + id: usize, + value: i32, + + fn lessThan(context: void, a: IdAndValue, b: IdAndValue) bool { + _ = context; + return a.value < b.value; + } +}; + test "stable sort" { - try testStableSort(); - comptime try testStableSort(); -} -fn testStableSort() !void { - var expected = [_]IdAndValue{ + const expected = [_]IdAndValue{ IdAndValue{ .id = 0, .value = 0 }, IdAndValue{ .id = 1, .value = 0 }, IdAndValue{ .id = 2, .value = 0 }, @@ -1249,6 +160,7 @@ fn testStableSort() !void { IdAndValue{ .id = 1, .value = 2 }, IdAndValue{ .id = 2, .value = 2 }, }; + var cases = [_][9]IdAndValue{ [_]IdAndValue{ IdAndValue{ .id = 0, .value = 0 }, @@ -1273,26 +185,15 @@ fn testStableSort() !void { IdAndValue{ .id = 2, .value = 0 }, }, }; + for (&cases) |*case| { - insertionSort(IdAndValue, (case.*)[0..], {}, cmpByValue); + block(IdAndValue, (case.*)[0..], {}, IdAndValue.lessThan); for (case.*, 0..) |item, i| { try testing.expect(item.id == expected[i].id); try testing.expect(item.value == expected[i].value); } } } -const IdAndValue = struct { - id: usize, - value: i32, -}; -fn cmpByValue(context: void, a: IdAndValue, b: IdAndValue) bool { - return asc_i32(context, a.value, b.value); -} - -const asc_u8 = asc(u8); -const asc_i32 = asc(i32); -const desc_u8 = desc(u8); -const desc_i32 = desc(i32); test "sort" { const u8cases = [_][]const []const u8{ @@ -1322,14 +223,6 @@ test "sort" { }, }; - for (u8cases) |case| { - var buf: [8]u8 = undefined; - const slice = buf[0..case[0].len]; - @memcpy(slice, case[0]); - sort(u8, slice, {}, asc_u8); - try testing.expect(mem.eql(u8, slice, case[1])); - } - const i32cases = [_][]const []const i32{ &[_][]const i32{ &[_]i32{}, @@ -1357,12 +250,22 @@ test "sort" { }, }; - for (i32cases) |case| { - var buf: [8]i32 = undefined; - const slice = buf[0..case[0].len]; - @memcpy(slice, case[0]); - sort(i32, slice, {}, asc_i32); - try testing.expect(mem.eql(i32, slice, case[1])); + inline for (sort_funcs) |sortFn| { + for (u8cases) |case| { + var buf: [8]u8 = undefined; + const slice = buf[0..case[0].len]; + @memcpy(slice, case[0]); + sortFn(u8, slice, {}, asc_u8); + try testing.expect(mem.eql(u8, slice, case[1])); + } + + for (i32cases) |case| { + var buf: [8]i32 = undefined; + const slice = buf[0..case[0].len]; + @memcpy(slice, case[0]); + sortFn(i32, slice, {}, asc_i32); + try testing.expect(mem.eql(i32, slice, case[1])); + } } } @@ -1394,53 +297,139 @@ test "sort descending" { }, }; - for (rev_cases) |case| { - var buf: [8]i32 = undefined; - const slice = buf[0..case[0].len]; - @memcpy(slice, case[0]); - sort(i32, slice, {}, desc_i32); - try testing.expect(mem.eql(i32, slice, case[1])); + inline for (sort_funcs) |sortFn| { + for (rev_cases) |case| { + var buf: [8]i32 = undefined; + const slice = buf[0..case[0].len]; + @memcpy(slice, case[0]); + sortFn(i32, slice, {}, desc_i32); + try testing.expect(mem.eql(i32, slice, case[1])); + } } } -test "another sort case" { - var arr = [_]i32{ 5, 3, 1, 2, 4 }; - sort(i32, arr[0..], {}, asc_i32); - - try testing.expect(mem.eql(i32, &arr, &[_]i32{ 1, 2, 3, 4, 5 })); -} - test "sort fuzz testing" { var prng = std.rand.DefaultPrng.init(0x12345678); const random = prng.random(); const test_case_count = 10; - var i: usize = 0; - while (i < test_case_count) : (i += 1) { - try fuzzTest(random); + + inline for (sort_funcs) |sortFn| { + var i: usize = 0; + while (i < test_case_count) : (i += 1) { + const array_size = random.intRangeLessThan(usize, 0, 1000); + var array = try testing.allocator.alloc(i32, array_size); + defer testing.allocator.free(array); + // populate with random data + for (array) |*item| { + item.* = random.intRangeLessThan(i32, 0, 100); + } + sortFn(i32, array, {}, asc_i32); + try testing.expect(isSorted(i32, array, {}, asc_i32)); + } } } -var fixed_buffer_mem: [100 * 1024]u8 = undefined; +pub fn binarySearch( + comptime T: type, + key: anytype, + items: []const T, + context: anytype, + comptime compareFn: fn (context: @TypeOf(context), key: @TypeOf(key), mid_item: T) math.Order, +) ?usize { + var left: usize = 0; + var right: usize = items.len; -fn fuzzTest(rng: std.rand.Random) !void { - const array_size = rng.intRangeLessThan(usize, 0, 1000); - var array = try testing.allocator.alloc(IdAndValue, array_size); - defer testing.allocator.free(array); - // populate with random data - for (array, 0..) |*item, index| { - item.id = index; - item.value = rng.intRangeLessThan(i32, 0, 100); + while (left < right) { + // Avoid overflowing in the midpoint calculation + const mid = left + (right - left) / 2; + // Compare the key with the midpoint element + switch (compareFn(context, key, items[mid])) { + .eq => return mid, + .gt => left = mid + 1, + .lt => right = mid, + } } - sort(IdAndValue, array, {}, cmpByValue); - var index: usize = 1; - while (index < array.len) : (index += 1) { - if (array[index].value == array[index - 1].value) { - try testing.expect(array[index].id > array[index - 1].id); - } else { - try testing.expect(array[index].value > array[index - 1].value); + return null; +} + +test "binarySearch" { + const S = struct { + fn order_u32(context: void, lhs: u32, rhs: u32) math.Order { + _ = context; + return math.order(lhs, rhs); } - } + fn order_i32(context: void, lhs: i32, rhs: i32) math.Order { + _ = context; + return math.order(lhs, rhs); + } + }; + try testing.expectEqual( + @as(?usize, null), + binarySearch(u32, @as(u32, 1), &[_]u32{}, {}, S.order_u32), + ); + try testing.expectEqual( + @as(?usize, 0), + binarySearch(u32, @as(u32, 1), &[_]u32{1}, {}, S.order_u32), + ); + try testing.expectEqual( + @as(?usize, null), + binarySearch(u32, @as(u32, 1), &[_]u32{0}, {}, S.order_u32), + ); + try testing.expectEqual( + @as(?usize, null), + binarySearch(u32, @as(u32, 0), &[_]u32{1}, {}, S.order_u32), + ); + try testing.expectEqual( + @as(?usize, 4), + binarySearch(u32, @as(u32, 5), &[_]u32{ 1, 2, 3, 4, 5 }, {}, S.order_u32), + ); + try testing.expectEqual( + @as(?usize, 0), + binarySearch(u32, @as(u32, 2), &[_]u32{ 2, 4, 8, 16, 32, 64 }, {}, S.order_u32), + ); + try testing.expectEqual( + @as(?usize, 1), + binarySearch(i32, @as(i32, -4), &[_]i32{ -7, -4, 0, 9, 10 }, {}, S.order_i32), + ); + try testing.expectEqual( + @as(?usize, 3), + binarySearch(i32, @as(i32, 98), &[_]i32{ -100, -25, 2, 98, 99, 100 }, {}, S.order_i32), + ); + const R = struct { + b: i32, + e: i32, + + fn r(b: i32, e: i32) @This() { + return @This(){ .b = b, .e = e }; + } + + fn order(context: void, key: i32, mid_item: @This()) math.Order { + _ = context; + + if (key < mid_item.b) { + return .lt; + } + + if (key > mid_item.e) { + return .gt; + } + + return .eq; + } + }; + try testing.expectEqual( + @as(?usize, null), + binarySearch(R, @as(i32, -45), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order), + ); + try testing.expectEqual( + @as(?usize, 2), + binarySearch(R, @as(i32, 10), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order), + ); + try testing.expectEqual( + @as(?usize, 1), + binarySearch(R, @as(i32, -20), &[_]R{ R.r(-100, -50), R.r(-40, -20), R.r(-10, 20), R.r(30, 40) }, {}, R.order), + ); } pub fn argMin( diff --git a/lib/std/sort/block.zig b/lib/std/sort/block.zig new file mode 100644 index 000000000000..6c1be9c6c240 --- /dev/null +++ b/lib/std/sort/block.zig @@ -0,0 +1,1066 @@ +const std = @import("../std.zig"); +const sort = std.sort; +const math = std.math; +const mem = std.mem; + +const Range = struct { + start: usize, + end: usize, + + fn init(start: usize, end: usize) Range { + return Range{ + .start = start, + .end = end, + }; + } + + fn length(self: Range) usize { + return self.end - self.start; + } +}; + +const Iterator = struct { + size: usize, + power_of_two: usize, + numerator: usize, + decimal: usize, + denominator: usize, + decimal_step: usize, + numerator_step: usize, + + fn init(size2: usize, min_level: usize) Iterator { + const power_of_two = math.floorPowerOfTwo(usize, size2); + const denominator = power_of_two / min_level; + return Iterator{ + .numerator = 0, + .decimal = 0, + .size = size2, + .power_of_two = power_of_two, + .denominator = denominator, + .decimal_step = size2 / denominator, + .numerator_step = size2 % denominator, + }; + } + + fn begin(self: *Iterator) void { + self.numerator = 0; + self.decimal = 0; + } + + fn nextRange(self: *Iterator) Range { + const start = self.decimal; + + self.decimal += self.decimal_step; + self.numerator += self.numerator_step; + if (self.numerator >= self.denominator) { + self.numerator -= self.denominator; + self.decimal += 1; + } + + return Range{ + .start = start, + .end = self.decimal, + }; + } + + fn finished(self: *Iterator) bool { + return self.decimal >= self.size; + } + + fn nextLevel(self: *Iterator) bool { + self.decimal_step += self.decimal_step; + self.numerator_step += self.numerator_step; + if (self.numerator_step >= self.denominator) { + self.numerator_step -= self.denominator; + self.decimal_step += 1; + } + + return (self.decimal_step < self.size); + } + + fn length(self: *Iterator) usize { + return self.decimal_step; + } +}; + +const Pull = struct { + from: usize, + to: usize, + count: usize, + range: Range, +}; + +/// Stable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. +/// O(1) memory (no allocator required). +/// Sorts in ascending order with respect to the given `lessThan` function. +/// +/// NOTE: the algorithm only work when the comparison is less-than or greater-than +/// (See https://github.com/ziglang/zig/issues/8289) +pub fn block( + comptime T: type, + items: []T, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + + // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c + var cache: [512]T = undefined; + + if (items.len < 4) { + if (items.len == 3) { + // hard coded insertion sort + if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + if (lessThan(context, items[2], items[1])) { + mem.swap(T, &items[1], &items[2]); + if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + } + } else if (items.len == 2) { + if (lessThan(context, items[1], items[0])) mem.swap(T, &items[0], &items[1]); + } + return; + } + + // sort groups of 4-8 items at a time using an unstable sorting network, + // but keep track of the original item orders to force it to be stable + // http://pages.ripco.net/~jgamble/nw.html + var iterator = Iterator.init(items.len, 4); + while (!iterator.finished()) { + var order = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7 }; + const range = iterator.nextRange(); + + const sliced_items = items[range.start..]; + switch (range.length()) { + 8 => { + swap(T, sliced_items, &order, 0, 1, context, lessThan); + swap(T, sliced_items, &order, 2, 3, context, lessThan); + swap(T, sliced_items, &order, 4, 5, context, lessThan); + swap(T, sliced_items, &order, 6, 7, context, lessThan); + swap(T, sliced_items, &order, 0, 2, context, lessThan); + swap(T, sliced_items, &order, 1, 3, context, lessThan); + swap(T, sliced_items, &order, 4, 6, context, lessThan); + swap(T, sliced_items, &order, 5, 7, context, lessThan); + swap(T, sliced_items, &order, 1, 2, context, lessThan); + swap(T, sliced_items, &order, 5, 6, context, lessThan); + swap(T, sliced_items, &order, 0, 4, context, lessThan); + swap(T, sliced_items, &order, 3, 7, context, lessThan); + swap(T, sliced_items, &order, 1, 5, context, lessThan); + swap(T, sliced_items, &order, 2, 6, context, lessThan); + swap(T, sliced_items, &order, 1, 4, context, lessThan); + swap(T, sliced_items, &order, 3, 6, context, lessThan); + swap(T, sliced_items, &order, 2, 4, context, lessThan); + swap(T, sliced_items, &order, 3, 5, context, lessThan); + swap(T, sliced_items, &order, 3, 4, context, lessThan); + }, + 7 => { + swap(T, sliced_items, &order, 1, 2, context, lessThan); + swap(T, sliced_items, &order, 3, 4, context, lessThan); + swap(T, sliced_items, &order, 5, 6, context, lessThan); + swap(T, sliced_items, &order, 0, 2, context, lessThan); + swap(T, sliced_items, &order, 3, 5, context, lessThan); + swap(T, sliced_items, &order, 4, 6, context, lessThan); + swap(T, sliced_items, &order, 0, 1, context, lessThan); + swap(T, sliced_items, &order, 4, 5, context, lessThan); + swap(T, sliced_items, &order, 2, 6, context, lessThan); + swap(T, sliced_items, &order, 0, 4, context, lessThan); + swap(T, sliced_items, &order, 1, 5, context, lessThan); + swap(T, sliced_items, &order, 0, 3, context, lessThan); + swap(T, sliced_items, &order, 2, 5, context, lessThan); + swap(T, sliced_items, &order, 1, 3, context, lessThan); + swap(T, sliced_items, &order, 2, 4, context, lessThan); + swap(T, sliced_items, &order, 2, 3, context, lessThan); + }, + 6 => { + swap(T, sliced_items, &order, 1, 2, context, lessThan); + swap(T, sliced_items, &order, 4, 5, context, lessThan); + swap(T, sliced_items, &order, 0, 2, context, lessThan); + swap(T, sliced_items, &order, 3, 5, context, lessThan); + swap(T, sliced_items, &order, 0, 1, context, lessThan); + swap(T, sliced_items, &order, 3, 4, context, lessThan); + swap(T, sliced_items, &order, 2, 5, context, lessThan); + swap(T, sliced_items, &order, 0, 3, context, lessThan); + swap(T, sliced_items, &order, 1, 4, context, lessThan); + swap(T, sliced_items, &order, 2, 4, context, lessThan); + swap(T, sliced_items, &order, 1, 3, context, lessThan); + swap(T, sliced_items, &order, 2, 3, context, lessThan); + }, + 5 => { + swap(T, sliced_items, &order, 0, 1, context, lessThan); + swap(T, sliced_items, &order, 3, 4, context, lessThan); + swap(T, sliced_items, &order, 2, 4, context, lessThan); + swap(T, sliced_items, &order, 2, 3, context, lessThan); + swap(T, sliced_items, &order, 1, 4, context, lessThan); + swap(T, sliced_items, &order, 0, 3, context, lessThan); + swap(T, sliced_items, &order, 0, 2, context, lessThan); + swap(T, sliced_items, &order, 1, 3, context, lessThan); + swap(T, sliced_items, &order, 1, 2, context, lessThan); + }, + 4 => { + swap(T, sliced_items, &order, 0, 1, context, lessThan); + swap(T, sliced_items, &order, 2, 3, context, lessThan); + swap(T, sliced_items, &order, 0, 2, context, lessThan); + swap(T, sliced_items, &order, 1, 3, context, lessThan); + swap(T, sliced_items, &order, 1, 2, context, lessThan); + }, + else => {}, + } + } + if (items.len < 8) return; + + // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc. + while (true) { + // if every A and B block will fit into the cache, use a special branch + // specifically for merging with the cache + // (we use < rather than <= since the block size might be one more than + // iterator.length()) + if (iterator.length() < cache.len) { + // if four subarrays fit into the cache, it's faster to merge both + // pairs of subarrays into the cache, + // then merge the two merged subarrays from the cache back into the original array + if ((iterator.length() + 1) * 4 <= cache.len and iterator.length() * 4 <= items.len) { + iterator.begin(); + while (!iterator.finished()) { + // merge A1 and B1 into the cache + var A1 = iterator.nextRange(); + var B1 = iterator.nextRange(); + var A2 = iterator.nextRange(); + var B2 = iterator.nextRange(); + + if (lessThan(context, items[B1.end - 1], items[A1.start])) { + // the two ranges are in reverse order, so copy them in reverse order into the cache + const a1_items = items[A1.start..A1.end]; + @memcpy(cache[B1.length()..][0..a1_items.len], a1_items); + const b1_items = items[B1.start..B1.end]; + @memcpy(cache[0..b1_items.len], b1_items); + } else if (lessThan(context, items[B1.start], items[A1.end - 1])) { + // these two ranges weren't already in order, so merge them into the cache + mergeInto(T, items, A1, B1, cache[0..], context, lessThan); + } else { + // if A1, B1, A2, and B2 are all in order, skip doing anything else + if (!lessThan(context, items[B2.start], items[A2.end - 1]) and !lessThan(context, items[A2.start], items[B1.end - 1])) continue; + + // copy A1 and B1 into the cache in the same order + const a1_items = items[A1.start..A1.end]; + @memcpy(cache[0..a1_items.len], a1_items); + const b1_items = items[B1.start..B1.end]; + @memcpy(cache[A1.length()..][0..b1_items.len], b1_items); + } + A1 = Range.init(A1.start, B1.end); + + // merge A2 and B2 into the cache + if (lessThan(context, items[B2.end - 1], items[A2.start])) { + // the two ranges are in reverse order, so copy them in reverse order into the cache + const a2_items = items[A2.start..A2.end]; + @memcpy(cache[A1.length() + B2.length() ..][0..a2_items.len], a2_items); + const b2_items = items[B2.start..B2.end]; + @memcpy(cache[A1.length()..][0..b2_items.len], b2_items); + } else if (lessThan(context, items[B2.start], items[A2.end - 1])) { + // these two ranges weren't already in order, so merge them into the cache + mergeInto(T, items, A2, B2, cache[A1.length()..], context, lessThan); + } else { + // copy A2 and B2 into the cache in the same order + const a2_items = items[A2.start..A2.end]; + @memcpy(cache[A1.length()..][0..a2_items.len], a2_items); + const b2_items = items[B2.start..B2.end]; + @memcpy(cache[A1.length() + A2.length() ..][0..b2_items.len], b2_items); + } + A2 = Range.init(A2.start, B2.end); + + // merge A1 and A2 from the cache into the items + const A3 = Range.init(0, A1.length()); + const B3 = Range.init(A1.length(), A1.length() + A2.length()); + + if (lessThan(context, cache[B3.end - 1], cache[A3.start])) { + // the two ranges are in reverse order, so copy them in reverse order into the items + const a3_items = cache[A3.start..A3.end]; + @memcpy(items[A1.start + A2.length() ..][0..a3_items.len], a3_items); + const b3_items = cache[B3.start..B3.end]; + @memcpy(items[A1.start..][0..b3_items.len], b3_items); + } else if (lessThan(context, cache[B3.start], cache[A3.end - 1])) { + // these two ranges weren't already in order, so merge them back into the items + mergeInto(T, cache[0..], A3, B3, items[A1.start..], context, lessThan); + } else { + // copy A3 and B3 into the items in the same order + const a3_items = cache[A3.start..A3.end]; + @memcpy(items[A1.start..][0..a3_items.len], a3_items); + const b3_items = cache[B3.start..B3.end]; + @memcpy(items[A1.start + A1.length() ..][0..b3_items.len], b3_items); + } + } + + // we merged two levels at the same time, so we're done with this level already + // (iterator.nextLevel() is called again at the bottom of this outer merge loop) + _ = iterator.nextLevel(); + } else { + iterator.begin(); + while (!iterator.finished()) { + var A = iterator.nextRange(); + var B = iterator.nextRange(); + + if (lessThan(context, items[B.end - 1], items[A.start])) { + // the two ranges are in reverse order, so a simple rotation should fix it + mem.rotate(T, items[A.start..B.end], A.length()); + } else if (lessThan(context, items[B.start], items[A.end - 1])) { + // these two ranges weren't already in order, so we'll need to merge them! + const a_items = items[A.start..A.end]; + @memcpy(cache[0..a_items.len], a_items); + mergeExternal(T, items, A, B, cache[0..], context, lessThan); + } + } + } + } else { + // this is where the in-place merge logic starts! + // 1. pull out two internal buffers each containing √A unique values + // 1a. adjust block_size and buffer_size if we couldn't find enough unique values + // 2. loop over the A and B subarrays within this level of the merge sort + // 3. break A and B into blocks of size 'block_size' + // 4. "tag" each of the A blocks with values from the first internal buffer + // 5. roll the A blocks through the B blocks and drop/rotate them where they belong + // 6. merge each A block with any B values that follow, using the cache or the second internal buffer + // 7. sort the second internal buffer if it exists + // 8. redistribute the two internal buffers back into the items + var block_size: usize = math.sqrt(iterator.length()); + var buffer_size = iterator.length() / block_size + 1; + + // as an optimization, we really only need to pull out the internal buffers once for each level of merges + // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level + var A: Range = undefined; + var B: Range = undefined; + var index: usize = 0; + var last: usize = 0; + var count: usize = 0; + var find: usize = 0; + var start: usize = 0; + var pull_index: usize = 0; + var pull = [_]Pull{ + Pull{ + .from = 0, + .to = 0, + .count = 0, + .range = Range.init(0, 0), + }, + Pull{ + .from = 0, + .to = 0, + .count = 0, + .range = Range.init(0, 0), + }, + }; + + var buffer1 = Range.init(0, 0); + var buffer2 = Range.init(0, 0); + + // find two internal buffers of size 'buffer_size' each + find = buffer_size + buffer_size; + var find_separately = false; + + if (block_size <= cache.len) { + // if every A block fits into the cache then we won't need the second internal buffer, + // so we really only need to find 'buffer_size' unique values + find = buffer_size; + } else if (find > iterator.length()) { + // we can't fit both buffers into the same A or B subarray, so find two buffers separately + find = buffer_size; + find_separately = true; + } + + // we need to find either a single contiguous space containing 2√A unique values (which will be split up into two buffers of size √A each), + // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values, + // OR if we couldn't find that many unique values, we need the largest possible buffer we can get + + // in the case where it couldn't find a single buffer of at least √A unique values, + // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace) + iterator.begin(); + while (!iterator.finished()) { + A = iterator.nextRange(); + B = iterator.nextRange(); + + // just store information about where the values will be pulled from and to, + // as well as how many values there are, to create the two internal buffers + + // check A for the number of unique values we need to fill an internal buffer + // these values will be pulled out to the start of A + last = A.start; + count = 1; + while (count < find) : ({ + last = index; + count += 1; + }) { + index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), find - count, context, lessThan); + if (index == A.end) break; + } + index = last; + + if (count >= buffer_size) { + // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffer + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = A.start, + }; + pull_index = 1; + + if (count == buffer_size + buffer_size) { + // we were able to find a single contiguous section containing 2√A unique values, + // so this section can be used to contain both of the internal buffers we'll need + buffer1 = Range.init(A.start, A.start + buffer_size); + buffer2 = Range.init(A.start + buffer_size, A.start + count); + break; + } else if (find == buffer_size + buffer_size) { + // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, + // so we still need to find a second separate buffer of at least √A unique values + buffer1 = Range.init(A.start, A.start + count); + find = buffer_size; + } else if (block_size <= cache.len) { + // we found the first and only internal buffer that we need, so we're done! + buffer1 = Range.init(A.start, A.start + count); + break; + } else if (find_separately) { + // found one buffer, but now find the other one + buffer1 = Range.init(A.start, A.start + count); + find_separately = false; + } else { + // we found a second buffer in an 'A' subarray containing √A unique values, so we're done! + buffer2 = Range.init(A.start, A.start + count); + break; + } + } else if (pull_index == 0 and count > buffer1.length()) { + // keep track of the largest buffer we were able to find + buffer1 = Range.init(A.start, A.start + count); + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = A.start, + }; + } + + // check B for the number of unique values we need to fill an internal buffer + // these values will be pulled out to the end of B + last = B.end - 1; + count = 1; + while (count < find) : ({ + last = index - 1; + count += 1; + }) { + index = findFirstBackward(T, items, items[last], Range.init(B.start, last), find - count, context, lessThan); + if (index == B.start) break; + } + index = last; + + if (count >= buffer_size) { + // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffe + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = B.end, + }; + pull_index = 1; + + if (count == buffer_size + buffer_size) { + // we were able to find a single contiguous section containing 2√A unique values, + // so this section can be used to contain both of the internal buffers we'll need + buffer1 = Range.init(B.end - count, B.end - buffer_size); + buffer2 = Range.init(B.end - buffer_size, B.end); + break; + } else if (find == buffer_size + buffer_size) { + // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values, + // so we still need to find a second separate buffer of at least √A unique values + buffer1 = Range.init(B.end - count, B.end); + find = buffer_size; + } else if (block_size <= cache.len) { + // we found the first and only internal buffer that we need, so we're done! + buffer1 = Range.init(B.end - count, B.end); + break; + } else if (find_separately) { + // found one buffer, but now find the other one + buffer1 = Range.init(B.end - count, B.end); + find_separately = false; + } else { + // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray, + // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2 + if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count; + + // we found a second buffer in an 'B' subarray containing √A unique values, so we're done! + buffer2 = Range.init(B.end - count, B.end); + break; + } + } else if (pull_index == 0 and count > buffer1.length()) { + // keep track of the largest buffer we were able to find + buffer1 = Range.init(B.end - count, B.end); + pull[pull_index] = Pull{ + .range = Range.init(A.start, B.end), + .count = count, + .from = index, + .to = B.end, + }; + } + } + + // pull out the two ranges so we can use them as internal buffers + pull_index = 0; + while (pull_index < 2) : (pull_index += 1) { + const length = pull[pull_index].count; + + if (pull[pull_index].to < pull[pull_index].from) { + // we're pulling the values out to the left, which means the start of an A subarray + index = pull[pull_index].from; + count = 1; + while (count < length) : (count += 1) { + index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), length - count, context, lessThan); + const range = Range.init(index + 1, pull[pull_index].from + 1); + mem.rotate(T, items[range.start..range.end], range.length() - count); + pull[pull_index].from = index + count; + } + } else if (pull[pull_index].to > pull[pull_index].from) { + // we're pulling values out to the right, which means the end of a B subarray + index = pull[pull_index].from + 1; + count = 1; + while (count < length) : (count += 1) { + index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), length - count, context, lessThan); + const range = Range.init(pull[pull_index].from, index - 1); + mem.rotate(T, items[range.start..range.end], count); + pull[pull_index].from = index - 1 - count; + } + } + } + + // adjust block_size and buffer_size based on the values we were able to pull out + buffer_size = buffer1.length(); + block_size = iterator.length() / buffer_size + 1; + + // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks, + // so this was originally here to test the math for adjusting block_size above + // assert((iterator.length() + 1)/block_size <= buffer_size); + + // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort! + iterator.begin(); + while (!iterator.finished()) { + A = iterator.nextRange(); + B = iterator.nextRange(); + + // remove any parts of A or B that are being used by the internal buffers + start = A.start; + if (start == pull[0].range.start) { + if (pull[0].from > pull[0].to) { + A.start += pull[0].count; + + // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge + // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4, + // which also only happens when cache.len is small or 0 since it'd otherwise use MergeExternal + if (A.length() == 0) continue; + } else if (pull[0].from < pull[0].to) { + B.end -= pull[0].count; + if (B.length() == 0) continue; + } + } + if (start == pull[1].range.start) { + if (pull[1].from > pull[1].to) { + A.start += pull[1].count; + if (A.length() == 0) continue; + } else if (pull[1].from < pull[1].to) { + B.end -= pull[1].count; + if (B.length() == 0) continue; + } + } + + if (lessThan(context, items[B.end - 1], items[A.start])) { + // the two ranges are in reverse order, so a simple rotation should fix it + mem.rotate(T, items[A.start..B.end], A.length()); + } else if (lessThan(context, items[A.end], items[A.end - 1])) { + // these two ranges weren't already in order, so we'll need to merge them! + var findA: usize = undefined; + + // break the remainder of A into blocks. firstA is the uneven-sized first A block + var blockA = Range.init(A.start, A.end); + var firstA = Range.init(A.start, A.start + blockA.length() % block_size); + + // swap the first value of each A block with the value in buffer1 + var indexA = buffer1.start; + index = firstA.end; + while (index < blockA.end) : ({ + indexA += 1; + index += block_size; + }) { + mem.swap(T, &items[indexA], &items[index]); + } + + // start rolling the A blocks through the B blocks! + // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well + var lastA = firstA; + var lastB = Range.init(0, 0); + var blockB = Range.init(B.start, B.start + math.min(block_size, B.length())); + blockA.start += firstA.length(); + indexA = buffer1.start; + + // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it + // otherwise, if the second buffer is available, block swap the contents into that + if (lastA.length() <= cache.len) { + const last_a_items = items[lastA.start..lastA.end]; + @memcpy(cache[0..last_a_items.len], last_a_items); + } else if (buffer2.length() > 0) { + blockSwap(T, items, lastA.start, buffer2.start, lastA.length()); + } + + if (blockA.length() > 0) { + while (true) { + // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block, + // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks. + if ((lastB.length() > 0 and !lessThan(context, items[lastB.end - 1], items[indexA])) or blockB.length() == 0) { + // figure out where to split the previous B block, and rotate it at the split + const B_split = binaryFirst(T, items, items[indexA], lastB, context, lessThan); + const B_remaining = lastB.end - B_split; + + // swap the minimum A block to the beginning of the rolling A blocks + var minA = blockA.start; + findA = minA + block_size; + while (findA < blockA.end) : (findA += block_size) { + if (lessThan(context, items[findA], items[minA])) { + minA = findA; + } + } + blockSwap(T, items, blockA.start, minA, block_size); + + // swap the first item of the previous A block back with its original value, which is stored in buffer1 + mem.swap(T, &items[blockA.start], &items[indexA]); + indexA += 1; + + // locally merge the previous A block with the B values that follow it + // if lastA fits into the external cache we'll use that (with MergeExternal), + // or if the second internal buffer exists we'll use that (with MergeInternal), + // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace) + + if (lastA.length() <= cache.len) { + mergeExternal(T, items, lastA, Range.init(lastA.end, B_split), cache[0..], context, lessThan); + } else if (buffer2.length() > 0) { + mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), buffer2, context, lessThan); + } else { + mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), context, lessThan); + } + + if (buffer2.length() > 0 or block_size <= cache.len) { + // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway + if (block_size <= cache.len) { + @memcpy(cache[0..block_size], items[blockA.start..][0..block_size]); + } else { + blockSwap(T, items, blockA.start, buffer2.start, block_size); + } + + // this is equivalent to rotating, but faster + // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it + // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs + blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining); + } else { + // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation + mem.rotate(T, items[B_split .. blockA.start + block_size], blockA.start - B_split); + } + + // update the range for the remaining A blocks, and the range remaining from the B block after it was split + lastA = Range.init(blockA.start - B_remaining, blockA.start - B_remaining + block_size); + lastB = Range.init(lastA.end, lastA.end + B_remaining); + + // if there are no more A blocks remaining, this step is finished! + blockA.start += block_size; + if (blockA.length() == 0) break; + } else if (blockB.length() < block_size) { + // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation + // the cache is disabled here since it might contain the contents of the previous A block + mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start); + + lastB = Range.init(blockA.start, blockA.start + blockB.length()); + blockA.start += blockB.length(); + blockA.end += blockB.length(); + blockB.end = blockB.start; + } else { + // roll the leftmost A block to the end by swapping it with the next B block + blockSwap(T, items, blockA.start, blockB.start, block_size); + lastB = Range.init(blockA.start, blockA.start + block_size); + + blockA.start += block_size; + blockA.end += block_size; + blockB.start += block_size; + + if (blockB.end > B.end - block_size) { + blockB.end = B.end; + } else { + blockB.end += block_size; + } + } + } + } + + // merge the last A block with the remaining B values + if (lastA.length() <= cache.len) { + mergeExternal(T, items, lastA, Range.init(lastA.end, B.end), cache[0..], context, lessThan); + } else if (buffer2.length() > 0) { + mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), buffer2, context, lessThan); + } else { + mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), context, lessThan); + } + } + } + + // when we're finished with this merge step we should have the one + // or two internal buffers left over, where the second buffer is all jumbled up + // insertion sort the second buffer, then redistribute the buffers + // back into the items using the opposite process used for creating the buffer + + // while an unstable sort like quicksort could be applied here, in benchmarks + // it was consistently slightly slower than a simple insertion sort, + // even for tens of millions of items. this may be because insertion + // sort is quite fast when the data is already somewhat sorted, like it is here + sort.insertion(T, items[buffer2.start..buffer2.end], context, lessThan); + + pull_index = 0; + while (pull_index < 2) : (pull_index += 1) { + var unique = pull[pull_index].count * 2; + if (pull[pull_index].from > pull[pull_index].to) { + // the values were pulled out to the left, so redistribute them back to the right + var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count); + while (buffer.length() > 0) { + index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), unique, context, lessThan); + const amount = index - buffer.end; + mem.rotate(T, items[buffer.start..index], buffer.length()); + buffer.start += (amount + 1); + buffer.end += amount; + unique -= 2; + } + } else if (pull[pull_index].from < pull[pull_index].to) { + // the values were pulled out to the right, so redistribute them back to the left + var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end); + while (buffer.length() > 0) { + index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), unique, context, lessThan); + const amount = buffer.start - index; + mem.rotate(T, items[index..buffer.end], amount); + buffer.start -= amount; + buffer.end -= (amount + 1); + unique -= 2; + } + } + } + } + + // double the size of each A and B subarray that will be merged in the next level + if (!iterator.nextLevel()) break; + } +} +// merge operation without a buffer +fn mergeInPlace( + comptime T: type, + items: []T, + A_arg: Range, + B_arg: Range, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + if (A_arg.length() == 0 or B_arg.length() == 0) return; + + // this just repeatedly binary searches into B and rotates A into position. + // the paper suggests using the 'rotation-based Hwang and Lin algorithm' here, + // but I decided to stick with this because it had better situational performance + // + // (Hwang and Lin is designed for merging subarrays of very different sizes, + // but WikiSort almost always uses subarrays that are roughly the same size) + // + // normally this is incredibly suboptimal, but this function is only called + // when none of the A or B blocks in any subarray contained 2√A unique values, + // which places a hard limit on the number of times this will ACTUALLY need + // to binary search and rotate. + // + // according to my analysis the worst case is √A rotations performed on √A items + // once the constant factors are removed, which ends up being O(n) + // + // again, this is NOT a general-purpose solution – it only works well in this case! + // kind of like how the O(n^2) insertion sort is used in some places + + var A = A_arg; + var B = B_arg; + + while (true) { + // find the first place in B where the first item in A needs to be inserted + const mid = binaryFirst(T, items, items[A.start], B, context, lessThan); + + // rotate A into place + const amount = mid - A.end; + mem.rotate(T, items[A.start..mid], A.length()); + if (B.end == mid) break; + + // calculate the new A and B ranges + B.start = mid; + A = Range.init(A.start + amount, B.start); + A.start = binaryLast(T, items, items[A.start], A, context, lessThan); + if (A.length() == 0) break; + } +} + +// merge operation using an internal buffer +fn mergeInternal( + comptime T: type, + items: []T, + A: Range, + B: Range, + buffer: Range, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + // whenever we find a value to add to the final array, swap it with the value that's already in that spot + // when this algorithm is finished, 'buffer' will contain its original contents, but in a different order + var A_count: usize = 0; + var B_count: usize = 0; + var insert: usize = 0; + + if (B.length() > 0 and A.length() > 0) { + while (true) { + if (!lessThan(context, items[B.start + B_count], items[buffer.start + A_count])) { + mem.swap(T, &items[A.start + insert], &items[buffer.start + A_count]); + A_count += 1; + insert += 1; + if (A_count >= A.length()) break; + } else { + mem.swap(T, &items[A.start + insert], &items[B.start + B_count]); + B_count += 1; + insert += 1; + if (B_count >= B.length()) break; + } + } + } + + // swap the remainder of A into the final array + blockSwap(T, items, buffer.start + A_count, A.start + insert, A.length() - A_count); +} + +fn blockSwap(comptime T: type, items: []T, start1: usize, start2: usize, block_size: usize) void { + var index: usize = 0; + while (index < block_size) : (index += 1) { + mem.swap(T, &items[start1 + index], &items[start2 + index]); + } +} + +// combine a linear search with a binary search to reduce the number of comparisons in situations +// where have some idea as to how many unique values there are and where the next value might be +fn findFirstForward( + comptime T: type, + items: []T, + value: T, + range: Range, + unique: usize, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) usize { + if (range.length() == 0) return range.start; + const skip = math.max(range.length() / unique, @as(usize, 1)); + + var index = range.start + skip; + while (lessThan(context, items[index - 1], value)) : (index += skip) { + if (index >= range.end - skip) { + return binaryFirst(T, items, value, Range.init(index, range.end), context, lessThan); + } + } + + return binaryFirst(T, items, value, Range.init(index - skip, index), context, lessThan); +} + +fn findFirstBackward( + comptime T: type, + items: []T, + value: T, + range: Range, + unique: usize, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) usize { + if (range.length() == 0) return range.start; + const skip = math.max(range.length() / unique, @as(usize, 1)); + + var index = range.end - skip; + while (index > range.start and !lessThan(context, items[index - 1], value)) : (index -= skip) { + if (index < range.start + skip) { + return binaryFirst(T, items, value, Range.init(range.start, index), context, lessThan); + } + } + + return binaryFirst(T, items, value, Range.init(index, index + skip), context, lessThan); +} + +fn findLastForward( + comptime T: type, + items: []T, + value: T, + range: Range, + unique: usize, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) usize { + if (range.length() == 0) return range.start; + const skip = math.max(range.length() / unique, @as(usize, 1)); + + var index = range.start + skip; + while (!lessThan(context, value, items[index - 1])) : (index += skip) { + if (index >= range.end - skip) { + return binaryLast(T, items, value, Range.init(index, range.end), context, lessThan); + } + } + + return binaryLast(T, items, value, Range.init(index - skip, index), context, lessThan); +} + +fn findLastBackward( + comptime T: type, + items: []T, + value: T, + range: Range, + unique: usize, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) usize { + if (range.length() == 0) return range.start; + const skip = math.max(range.length() / unique, @as(usize, 1)); + + var index = range.end - skip; + while (index > range.start and lessThan(context, value, items[index - 1])) : (index -= skip) { + if (index < range.start + skip) { + return binaryLast(T, items, value, Range.init(range.start, index), context, lessThan); + } + } + + return binaryLast(T, items, value, Range.init(index, index + skip), context, lessThan); +} + +fn binaryFirst( + comptime T: type, + items: []T, + value: T, + range: Range, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) usize { + var curr = range.start; + var size = range.length(); + if (range.start >= range.end) return range.end; + while (size > 0) { + const offset = size % 2; + + size /= 2; + const mid_item = items[curr + size]; + if (lessThan(context, mid_item, value)) { + curr += size + offset; + } + } + return curr; +} + +fn binaryLast( + comptime T: type, + items: []T, + value: T, + range: Range, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) usize { + var curr = range.start; + var size = range.length(); + if (range.start >= range.end) return range.end; + while (size > 0) { + const offset = size % 2; + + size /= 2; + const mid_item = items[curr + size]; + if (!lessThan(context, value, mid_item)) { + curr += size + offset; + } + } + return curr; +} + +fn mergeInto( + comptime T: type, + from: []T, + A: Range, + B: Range, + into: []T, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + var A_index: usize = A.start; + var B_index: usize = B.start; + const A_last = A.end; + const B_last = B.end; + var insert_index: usize = 0; + + while (true) { + if (!lessThan(context, from[B_index], from[A_index])) { + into[insert_index] = from[A_index]; + A_index += 1; + insert_index += 1; + if (A_index == A_last) { + // copy the remainder of B into the final array + const from_b = from[B_index..B_last]; + @memcpy(into[insert_index..][0..from_b.len], from_b); + break; + } + } else { + into[insert_index] = from[B_index]; + B_index += 1; + insert_index += 1; + if (B_index == B_last) { + // copy the remainder of A into the final array + const from_a = from[A_index..A_last]; + @memcpy(into[insert_index..][0..from_a.len], from_a); + break; + } + } + } +} + +fn mergeExternal( + comptime T: type, + items: []T, + A: Range, + B: Range, + cache: []T, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + // A fits into the cache, so use that instead of the internal buffer + var A_index: usize = 0; + var B_index: usize = B.start; + var insert_index: usize = A.start; + const A_last = A.length(); + const B_last = B.end; + + if (B.length() > 0 and A.length() > 0) { + while (true) { + if (!lessThan(context, items[B_index], cache[A_index])) { + items[insert_index] = cache[A_index]; + A_index += 1; + insert_index += 1; + if (A_index == A_last) break; + } else { + items[insert_index] = items[B_index]; + B_index += 1; + insert_index += 1; + if (B_index == B_last) break; + } + } + } + + // copy the remainder of A into the final array + const cache_a = cache[A_index..A_last]; + @memcpy(items[insert_index..][0..cache_a.len], cache_a); +} + +fn swap( + comptime T: type, + items: []T, + order: *[8]u8, + x: usize, + y: usize, + context: anytype, + comptime lessThan: fn (@TypeOf(context), lhs: T, rhs: T) bool, +) void { + if (lessThan(context, items[y], items[x]) or ((order.*)[x] > (order.*)[y] and !lessThan(context, items[x], items[y]))) { + mem.swap(T, &items[x], &items[y]); + mem.swap(u8, &(order.*)[x], &(order.*)[y]); + } +} diff --git a/lib/std/sort/pdq.zig b/lib/std/sort/pdq.zig new file mode 100644 index 000000000000..11d330047ae7 --- /dev/null +++ b/lib/std/sort/pdq.zig @@ -0,0 +1,335 @@ +const std = @import("../std.zig"); +const sort = std.sort; +const mem = std.mem; +const math = std.math; +const testing = std.testing; + +/// Unstable in-place sort. n best case, n*log(n) worst case and average case. +/// log(n) memory (no allocator required). +/// +/// Sorts in ascending order with respect to the given `lessThan` function. +pub fn pdq( + comptime T: type, + items: []T, + context: anytype, + comptime lessThanFn: fn (context: @TypeOf(context), lhs: T, rhs: T) bool, +) void { + const Context = struct { + items: []T, + sub_ctx: @TypeOf(context), + + pub fn lessThan(ctx: @This(), a: usize, b: usize) bool { + return lessThanFn(ctx.sub_ctx, ctx.items[a], ctx.items[b]); + } + + pub fn swap(ctx: @This(), a: usize, b: usize) void { + return mem.swap(T, &ctx.items[a], &ctx.items[b]); + } + }; + pdqContext(0, items.len, Context{ .items = items, .sub_ctx = context }); +} + +const Hint = enum { + increasing, + decreasing, + unknown, +}; + +/// Unstable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. +/// O(log(n)) memory (no allocator required). +/// +/// Sorts in ascending order with respect to the given `lessThan` function. +pub fn pdqContext(a: usize, b: usize, context: anytype) void { + // slices of up to this length get sorted using insertion sort. + const max_insertion = 24; + // number of allowed imbalanced partitions before switching to heap sort. + const max_limit = std.math.floorPowerOfTwo(usize, b) + 1; + + // set upper bound on stack memory usage. + const Range = struct { a: usize, b: usize, limit: usize }; + const stack_size = math.log2(math.maxInt(usize) + 1); + var stack: [stack_size]Range = undefined; + var range = Range{ .a = a, .b = b, .limit = max_limit }; + var top: usize = 0; + + while (true) { + var was_balanced = true; + var was_partitioned = true; + + while (true) { + const len = range.b - range.a; + + // very short slices get sorted using insertion sort. + if (len <= max_insertion) { + break sort.insertionContext(range.a, range.b, context); + } + + // if too many bad pivot choices were made, simply fall back to heapsort in order to + // guarantee O(n*log(n)) worst-case. + if (range.limit == 0) { + break sort.heapContext(range.a, range.b, context); + } + + // if the last partitioning was imbalanced, try breaking patterns in the slice by shuffling + // some elements around. Hopefully we'll choose a better pivot this time. + if (!was_balanced) { + breakPatterns(range.a, range.b, context); + range.limit -= 1; + } + + // choose a pivot and try guessing whether the slice is already sorted. + var pivot: usize = 0; + var hint = chosePivot(range.a, range.b, &pivot, context); + + if (hint == .decreasing) { + // The maximum number of swaps was performed, so items are likely + // in reverse order. Reverse it to make sorting faster. + reverseRange(range.a, range.b, context); + pivot = (range.b - 1) - (pivot - range.a); + hint = .increasing; + } + + // if the last partitioning was decently balanced and didn't shuffle elements, and if pivot + // selection predicts the slice is likely already sorted... + if (was_balanced and was_partitioned and hint == .increasing) { + // try identifying several out-of-order elements and shifting them to correct + // positions. If the slice ends up being completely sorted, we're done. + if (partialInsertionSort(range.a, range.b, context)) break; + } + + // if the chosen pivot is equal to the predecessor, then it's the smallest element in the + // slice. Partition the slice into elements equal to and elements greater than the pivot. + // This case is usually hit when the slice contains many duplicate elements. + if (range.a > 0 and !context.lessThan(range.a - 1, pivot)) { + range.a = partitionEqual(range.a, range.b, pivot, context); + continue; + } + + // partition the slice. + var mid = pivot; + was_partitioned = partition(range.a, range.b, &mid, context); + + const left_len = mid - range.a; + const right_len = range.b - mid; + const balanced_threshold = len / 8; + if (left_len < right_len) { + was_balanced = left_len >= balanced_threshold; + stack[top] = .{ .a = range.a, .b = mid, .limit = range.limit }; + top += 1; + range.a = mid + 1; + } else { + was_balanced = right_len >= balanced_threshold; + stack[top] = .{ .a = mid + 1, .b = range.b, .limit = range.limit }; + top += 1; + range.b = mid; + } + } + + top = math.sub(usize, top, 1) catch break; + range = stack[top]; + } +} + +/// partitions `items[a..b]` into elements smaller than `items[pivot]`, +/// followed by elements greater than or equal to `items[pivot]`. +/// +/// sets the new pivot. +/// returns `true` if already partitioned. +fn partition(a: usize, b: usize, pivot: *usize, context: anytype) bool { + // move pivot to the first place + context.swap(a, pivot.*); + + var i = a + 1; + var j = b - 1; + + while (i <= j and context.lessThan(i, a)) i += 1; + while (i <= j and !context.lessThan(j, a)) j -= 1; + + // check if items are already partitioned (no item to swap) + if (i > j) { + // put pivot back to the middle + context.swap(j, a); + pivot.* = j; + return true; + } + + context.swap(i, j); + i += 1; + j -= 1; + + while (true) { + while (i <= j and context.lessThan(i, a)) i += 1; + while (i <= j and !context.lessThan(j, a)) j -= 1; + if (i > j) break; + + context.swap(i, j); + i += 1; + j -= 1; + } + + // TODO: Enable the BlockQuicksort optimization + + context.swap(j, a); + pivot.* = j; + return false; +} + +/// partitions items into elements equal to `items[pivot]` +/// followed by elements greater than `items[pivot]`. +/// +/// it assumed that `items[a..b]` does not contain elements smaller than the `items[pivot]`. +fn partitionEqual(a: usize, b: usize, pivot: usize, context: anytype) usize { + // move pivot to the first place + context.swap(a, pivot); + + var i = a + 1; + var j = b - 1; + + while (true) { + while (i <= j and !context.lessThan(a, i)) i += 1; + while (i <= j and context.lessThan(a, j)) j -= 1; + if (i > j) break; + + context.swap(i, j); + i += 1; + j -= 1; + } + + return i; +} + +/// partially sorts a slice by shifting several out-of-order elements around. +/// +/// returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case. +fn partialInsertionSort(a: usize, b: usize, context: anytype) bool { + @setCold(true); + + // maximum number of adjacent out-of-order pairs that will get shifted + const max_steps = 5; + // if the slice is shorter than this, don't shift any elements + const shortest_shifting = 50; + + var i = a + 1; + for (0..max_steps) |_| { + // find the next pair of adjacent out-of-order elements. + while (i < b and !context.lessThan(i, i - 1)) i += 1; + + // are we done? + if (i == b) return true; + + // don't shift elements on short arrays, that has a performance cost. + if (b - a < shortest_shifting) return false; + + // swap the found pair of elements. This puts them in correct order. + context.swap(i, i - 1); + + // shift the smaller element to the left. + if (i - a >= 2) { + var j = i - 1; + while (j >= 1) : (j -= 1) { + if (!context.lessThan(j, j - 1)) break; + context.swap(j, j - 1); + } + } + + // shift the greater element to the right. + if (b - i >= 2) { + var j = i + 1; + while (j < b) : (j += 1) { + if (!context.lessThan(j, j - 1)) break; + context.swap(j, j - 1); + } + } + } + + return false; +} + +fn breakPatterns(a: usize, b: usize, context: anytype) void { + @setCold(true); + + const len = b - a; + if (len < 8) return; + + var rand = len; + const modulus = math.ceilPowerOfTwoAssert(u64, len); + + var i = a + (len / 4) * 2 - 1; + while (i <= a + (len / 4) * 2 + 1) : (i += 1) { + var other: usize = xorshift64(&rand) & (modulus - 1); + if (other >= len) other -= len; + context.swap(i, a + other); + } +} + +fn xorshift64(a: *u64) u64 { + var x = a.*; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + a.* = x; + return x; +} + +/// choses a pivot in `items[a..b]`. +/// swaps likely_sorted when `items[a..b]` seems to be already sorted. +fn chosePivot(a: usize, b: usize, pivot: *usize, context: anytype) Hint { + // minimum length for using the Tukey's ninther method + const shortest_ninther = 50; + // max_swaps is the maximum number of swaps allowed in this function + const max_swaps = 4 * 3; + + var len = b - a; + var i = a + len / 4 * 1; + var j = a + len / 4 * 2; + var k = a + len / 4 * 3; + var swaps: usize = 0; + + if (len >= 8) { + if (len >= shortest_ninther) { + // find medians in the neighborhoods of `i`, `j` and `k` + i = sort3(i - 1, i, i + 1, &swaps, context); + j = sort3(j - 1, j, j + 1, &swaps, context); + k = sort3(k - 1, k, k + 1, &swaps, context); + } + + // find the median among `i`, `j` and `k` + j = sort3(i, j, k, &swaps, context); + } + + pivot.* = j; + return switch (swaps) { + 0 => .increasing, + max_swaps => .decreasing, + else => .unknown, + }; +} + +fn sort3(a: usize, b: usize, c: usize, swaps: *usize, context: anytype) usize { + if (context.lessThan(b, a)) { + swaps.* += 1; + context.swap(b, a); + } + + if (context.lessThan(c, b)) { + swaps.* += 1; + context.swap(c, b); + } + + if (context.lessThan(b, a)) { + swaps.* += 1; + context.swap(b, a); + } + + return b; +} + +fn reverseRange(a: usize, b: usize, context: anytype) void { + var i = a; + var j = b - 1; + while (i < j) { + context.swap(i, j); + i += 1; + j -= 1; + } +} diff --git a/src/Compilation.zig b/src/Compilation.zig index b48580032981..cc2e2a916b0d 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -672,7 +672,7 @@ fn addPackageTableToCacheHash( } } // Sort the slice by package name - std.sort.sort(Package.Table.KV, packages, {}, struct { + mem.sort(Package.Table.KV, packages, {}, struct { fn lessThan(_: void, lhs: Package.Table.KV, rhs: Package.Table.KV) bool { return std.mem.lessThan(u8, lhs.key, rhs.key); } diff --git a/src/Package.zig b/src/Package.zig index f28aac885d57..cde3f38e2886 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -672,7 +672,7 @@ fn computePackageHash( } } - std.sort.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan); + mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan); var hasher = Manifest.Hash.init(.{}); var any_failures = false; diff --git a/src/RangeSet.zig b/src/RangeSet.zig index 7e501f984be9..aa051ff424fb 100644 --- a/src/RangeSet.zig +++ b/src/RangeSet.zig @@ -60,7 +60,7 @@ pub fn spans(self: *RangeSet, first: Value, last: Value, ty: Type) !bool { if (self.ranges.items.len == 0) return false; - std.sort.sort(Range, self.ranges.items, LessThanContext{ + std.mem.sort(Range, self.ranges.items, LessThanContext{ .ty = ty, .module = self.module, }, lessThan); diff --git a/src/Sema.zig b/src/Sema.zig index 6a1c889a6eee..bf49378726b9 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -30945,7 +30945,7 @@ fn resolveStructLayout(sema: *Sema, ty: Type) CompileError!void { ctx.struct_obj.fields.values()[b].ty.abiAlignment(target); } }; - std.sort.sort(u32, optimized_order, AlignSortContext{ + mem.sort(u32, optimized_order, AlignSortContext{ .struct_obj = struct_obj, .sema = sema, }, AlignSortContext.lessThan); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e83524237971..55a9694fd354 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2176,7 +2176,7 @@ fn computeFrameLayout(self: *Self) !FrameLayout { } }; const sort_context = SortContext{ .frame_align = frame_align }; - std.sort.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); + mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); } const call_frame_align = frame_align[@enumToInt(FrameIndex.call_frame)]; diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 6ed0aeeff409..625a5283b98a 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -770,7 +770,7 @@ const mnemonic_to_encodings_map = init: { @setEvalBranchQuota(30_000); const encodings = @import("encodings.zig"); var entries = encodings.table; - std.sort.sort(encodings.Entry, &entries, {}, struct { + std.mem.sort(encodings.Entry, &entries, {}, struct { fn lessThan(_: void, lhs: encodings.Entry, rhs: encodings.Entry) bool { return @enumToInt(lhs[0]) < @enumToInt(rhs[0]); } diff --git a/src/codegen/c/type.zig b/src/codegen/c/type.zig index 892914ea3d1a..8494ae7353ab 100644 --- a/src/codegen/c/type.zig +++ b/src/codegen/c/type.zig @@ -1292,7 +1292,7 @@ pub const CType = extern union { fn sortFields(self: *@This(), fields_len: usize) []Payload.Fields.Field { const Field = Payload.Fields.Field; const slice = self.storage.anon.fields[0..fields_len]; - std.sort.sort(Field, slice, {}, struct { + mem.sort(Field, slice, {}, struct { fn before(_: void, lhs: Field, rhs: Field) bool { return lhs.alignas.@"align" > rhs.alignas.@"align"; } diff --git a/src/link/Coff.zig b/src/link/Coff.zig index 81e8c57bddfb..01f18a73b3e4 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -1837,7 +1837,7 @@ fn writeBaseRelocations(self: *Coff) !void { pages.appendAssumeCapacity(page.*); } } - std.sort.sort(u32, pages.items, {}, std.sort.asc(u32)); + mem.sort(u32, pages.items, {}, std.sort.asc(u32)); var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7cc6f78c7def..b218fdbd2dfe 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -209,7 +209,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) // afterwards by address in each group. Normally, dysymtab should // be enough to guarantee the sort, but turns out not every compiler // is kind enough to specify the symbols in the correct order. - sort.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); + mem.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); var prev_sect_id: u8 = 0; var section_index_lookup: ?Entry = null; @@ -462,7 +462,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { sorted_sections[id] = .{ .header = sect, .id = @intCast(u8, id) }; } - std.sort.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress); + mem.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress); var sect_sym_index: u32 = 0; for (sorted_sections) |section| { @@ -663,7 +663,7 @@ fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void { if (self.getSourceRelocs(section)) |relocs| { try self.relocations.ensureUnusedCapacity(gpa, relocs.len); self.relocations.appendUnalignedSliceAssumeCapacity(relocs); - std.sort.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan); + mem.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan); } self.section_relocs_lookup.items[sect_id] = start; } @@ -901,7 +901,7 @@ pub fn parseDataInCode(self: *Object, gpa: Allocator) !void { const dice = @ptrCast([*]align(1) const macho.data_in_code_entry, self.contents.ptr + cmd.dataoff)[0..ndice]; try self.data_in_code.ensureTotalCapacityPrecise(gpa, dice.len); self.data_in_code.appendUnalignedSliceAssumeCapacity(dice); - std.sort.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan); + mem.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan); } fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_code_entry) bool { diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 0071657f8b94..8d2a36be9dfb 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -411,7 +411,7 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { } var slice = common_encodings_counts.values(); - std.sort.sort(CommonEncWithCount, slice, {}, CommonEncWithCount.greaterThan); + mem.sort(CommonEncWithCount, slice, {}, CommonEncWithCount.greaterThan); var i: u7 = 0; while (i < slice.len) : (i += 1) { diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index 1d7a0c94c0c8..5b386a81368b 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -39,7 +39,7 @@ pub fn finalize(rebase: *Rebase, gpa: Allocator) !void { const writer = rebase.buffer.writer(gpa); - std.sort.sort(Entry, rebase.entries.items, {}, Entry.lessThan); + std.mem.sort(Entry, rebase.entries.items, {}, Entry.lessThan); try setTypePointer(writer); diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index 98a693920a6f..14ce1587aa79 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -47,7 +47,7 @@ pub fn Bind(comptime Ctx: type, comptime Target: type) type { const writer = self.buffer.writer(gpa); - std.sort.sort(Entry, self.entries.items, ctx, Entry.lessThan); + std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan); var start: usize = 0; var seg_id: ?u8 = null; diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7e6870ecbc23..b151aee19b18 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -1441,7 +1441,7 @@ pub const Zld = struct { } } - std.sort.sort(Section, sections.items, {}, SortSection.lessThan); + mem.sort(Section, sections.items, {}, SortSection.lessThan); self.sections.shrinkRetainingCapacity(0); for (sections.items) |out| { @@ -2237,7 +2237,7 @@ pub const Zld = struct { } } - std.sort.sort(u64, addresses.items, {}, asc_u64); + mem.sort(u64, addresses.items, {}, asc_u64); var offsets = std.ArrayList(u32).init(gpa); defer offsets.deinit(); diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index cd9c44d656e3..5dfc91d4ce80 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -2143,7 +2143,7 @@ fn sortDataSegments(wasm: *Wasm) !void { } }; - std.sort.sort([]const u8, keys, {}, SortContext.sort); + mem.sort([]const u8, keys, {}, SortContext.sort); for (keys) |key| { const segment_index = wasm.data_segments.get(key).?; new_mapping.putAssumeCapacity(key, segment_index); @@ -2187,7 +2187,7 @@ fn setupInitFunctions(wasm: *Wasm) !void { } // sort the initfunctions based on their priority - std.sort.sort(InitFuncLoc, wasm.init_funcs.items, {}, InitFuncLoc.lessThan); + mem.sort(InitFuncLoc, wasm.init_funcs.items, {}, InitFuncLoc.lessThan); } /// Generates an atom containing the global error set' size. @@ -3687,7 +3687,7 @@ fn writeToFile( } }.sort; - std.sort.sort(*Atom, sorted_atoms.items, wasm, atom_sort_fn); + mem.sort(*Atom, sorted_atoms.items, wasm, atom_sort_fn); for (sorted_atoms.items) |sorted_atom| { try leb.writeULEB128(binary_writer, sorted_atom.size); @@ -4050,8 +4050,8 @@ fn emitNameSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), arena: std.mem data_segment_index += 1; } - std.sort.sort(Name, funcs.values(), {}, Name.lessThan); - std.sort.sort(Name, globals.items, {}, Name.lessThan); + mem.sort(Name, funcs.values(), {}, Name.lessThan); + mem.sort(Name, globals.items, {}, Name.lessThan); const header_offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); diff --git a/src/objcopy.zig b/src/objcopy.zig index 12129aba9c1c..c5d0e8dcb38e 100644 --- a/src/objcopy.zig +++ b/src/objcopy.zig @@ -402,7 +402,7 @@ const BinaryElfOutput = struct { } } - std.sort.sort(*BinaryElfSegment, self.segments.items, {}, segmentSortCompare); + mem.sort(*BinaryElfSegment, self.segments.items, {}, segmentSortCompare); for (self.segments.items, 0..) |firstSegment, i| { if (firstSegment.firstSection) |firstSection| { @@ -427,7 +427,7 @@ const BinaryElfOutput = struct { } } - std.sort.sort(*BinaryElfSection, self.sections.items, {}, sectionSortCompare); + mem.sort(*BinaryElfSection, self.sections.items, {}, sectionSortCompare); return self; } diff --git a/test/src/Cases.zig b/test/src/Cases.zig index 0451079a0e41..63dd2fd3da48 100644 --- a/test/src/Cases.zig +++ b/test/src/Cases.zig @@ -607,7 +607,7 @@ fn sortTestFilenames(filenames: [][]const u8) void { }; } }; - std.sort.sort([]const u8, filenames, Context{}, Context.lessThan); + std.mem.sort([]const u8, filenames, Context{}, Context.lessThan); } /// Iterates a set of filenames extracting batches that are either incremental diff --git a/tools/gen_stubs.zig b/tools/gen_stubs.zig index bc2637e197d8..95787b719a7d 100644 --- a/tools/gen_stubs.zig +++ b/tools/gen_stubs.zig @@ -437,7 +437,7 @@ fn parseElf(parse: Parse, comptime is_64: bool, comptime endian: builtin.Endian) const dynstr = elf_bytes[dynstr_offset..]; // Sort the list by address, ascending. - std.sort.sort(Sym, @alignCast(8, dyn_syms), {}, S.symbolAddrLessThan); + mem.sort(Sym, @alignCast(8, dyn_syms), {}, S.symbolAddrLessThan); for (dyn_syms) |sym| { const this_section = s(sym.st_shndx); diff --git a/tools/generate_JSONTestSuite.zig b/tools/generate_JSONTestSuite.zig index b8550959c7a6..2229cf4012fb 100644 --- a/tools/generate_JSONTestSuite.zig +++ b/tools/generate_JSONTestSuite.zig @@ -23,7 +23,7 @@ pub fn main() !void { while (try it.next()) |entry| { try names.append(try allocator.dupe(u8, entry.name)); } - std.sort.sort([]const u8, names.items, {}, (struct { + std.mem.sort([]const u8, names.items, {}, (struct { fn lessThan(_: void, a: []const u8, b: []const u8) bool { return std.mem.lessThan(u8, a, b); } diff --git a/tools/process_headers.zig b/tools/process_headers.zig index a6550a2573b3..0321c0e0eb5a 100644 --- a/tools/process_headers.zig +++ b/tools/process_headers.zig @@ -460,7 +460,7 @@ pub fn main() !void { try contents_list.append(contents); } } - std.sort.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); + std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); const best_contents = contents_list.popOrNull().?; if (best_contents.hit_count > 1) { // worth it to make it generic diff --git a/tools/update-linux-headers.zig b/tools/update-linux-headers.zig index 38fbab66458b..0f31e5e893e3 100644 --- a/tools/update-linux-headers.zig +++ b/tools/update-linux-headers.zig @@ -260,7 +260,7 @@ pub fn main() !void { try contents_list.append(contents); } } - std.sort.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); + std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); const best_contents = contents_list.popOrNull().?; if (best_contents.hit_count > 1) { // worth it to make it generic diff --git a/tools/update_clang_options.zig b/tools/update_clang_options.zig index 682ec7e152e8..feefeb0a83a6 100644 --- a/tools/update_clang_options.zig +++ b/tools/update_clang_options.zig @@ -646,7 +646,7 @@ pub fn main() anyerror!void { } // Some options have multiple matches. As an example, "-Wl,foo" matches both // "W" and "Wl,". So we sort this list in order of descending priority. - std.sort.sort(*json.ObjectMap, all_objects.items, {}, objectLessThan); + std.mem.sort(*json.ObjectMap, all_objects.items, {}, objectLessThan); var buffered_stdout = std.io.bufferedWriter(std.io.getStdOut().writer()); const stdout = buffered_stdout.writer(); diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig index 53bb365f4183..d5c3d48852b2 100644 --- a/tools/update_cpu_features.zig +++ b/tools/update_cpu_features.zig @@ -1187,8 +1187,8 @@ fn processOneTarget(job: Job) anyerror!void { for (llvm_target.extra_cpus) |extra_cpu| { try all_cpus.append(extra_cpu); } - std.sort.sort(Feature, all_features.items, {}, featureLessThan); - std.sort.sort(Cpu, all_cpus.items, {}, cpuLessThan); + mem.sort(Feature, all_features.items, {}, featureLessThan); + mem.sort(Cpu, all_cpus.items, {}, cpuLessThan); const target_sub_path = try fs.path.join(arena, &.{ "lib", "std", "target" }); var target_dir = try job.zig_src_dir.makeOpenPath(target_sub_path, .{}); @@ -1283,7 +1283,7 @@ fn processOneTarget(job: Job) anyerror!void { try dependencies.append(key.*); } } - std.sort.sort([]const u8, dependencies.items, {}, asciiLessThan); + mem.sort([]const u8, dependencies.items, {}, asciiLessThan); if (dependencies.items.len == 0) { try w.writeAll( @@ -1328,7 +1328,7 @@ fn processOneTarget(job: Job) anyerror!void { try cpu_features.append(key.*); } } - std.sort.sort([]const u8, cpu_features.items, {}, asciiLessThan); + mem.sort([]const u8, cpu_features.items, {}, asciiLessThan); if (cpu.llvm_name) |llvm_name| { try w.print( \\ pub const {} = CpuModel{{ diff --git a/tools/update_spirv_features.zig b/tools/update_spirv_features.zig index 8d398f58de5b..44d8b6a44517 100644 --- a/tools/update_spirv_features.zig +++ b/tools/update_spirv_features.zig @@ -303,7 +303,7 @@ fn gatherVersions(allocator: Allocator, registry: g.CoreRegistry) ![]const Versi } } - std.sort.sort(Version, versions.items, {}, Version.lessThan); + std.mem.sort(Version, versions.items, {}, Version.lessThan); return versions.items; }