Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sentinel-terminated pointers #3728

Merged
merged 25 commits into from
Nov 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
1aa978f
implement null terminated pointers
andrewrk Nov 14, 2019
21f344b
add null terminated pointers and arrays to self-hosted
andrewrk Nov 18, 2019
47f06be
string literals are now null terminated
andrewrk Nov 20, 2019
cf2fe25
better error message when null termination does not match
andrewrk Nov 20, 2019
fd6020c
update tests, better error messages, update self-hosted tokenizer
andrewrk Nov 20, 2019
6b623b5
update docs for null terminated stuff
andrewrk Nov 20, 2019
7597735
update the stage1 implementation to the new proposal
andrewrk Nov 23, 2019
f25182f
structs can have fields with type `var`
andrewrk Nov 23, 2019
00878a1
zig fmt: support sentinel-terminated pointer syntax
andrewrk Nov 24, 2019
2dd20aa
langref: update for sentinel-terminated types
andrewrk Nov 24, 2019
4c7b525
all tests passing
andrewrk Nov 24, 2019
44b1dc6
add type coercion: [:x]T to [*:x]T
andrewrk Nov 24, 2019
f7574f4
add test for struct with var field
andrewrk Nov 24, 2019
09ec720
fix comptime `@ptrCast` of pointers to arrays
andrewrk Nov 24, 2019
4018034
add test cases for arbitrary pointer sentinels
andrewrk Nov 24, 2019
c96d565
add compile error for incompatible pointer sentinels
andrewrk Nov 24, 2019
217a509
fix compile error regressions
andrewrk Nov 24, 2019
7eb5acd
fix casting `[N:x]T` to `[N]T` memcpying too many bytes
andrewrk Nov 24, 2019
ce96323
update cli test
andrewrk Nov 25, 2019
b9f88c3
fix compile errors for array sentinels mismatching
andrewrk Nov 25, 2019
34b1ebe
Merge remote-tracking branch 'origin/master' into null-terminated-poi…
andrewrk Nov 25, 2019
15d415e
make std.mem.toSlice use null terminated pointers
andrewrk Nov 25, 2019
29e438f
more sentinel-terminated pointers std lib integration
andrewrk Nov 25, 2019
d2cb740
add missing null terminator in windows file path helper function
andrewrk Nov 25, 2019
3217264
fix freebsd regression
andrewrk Nov 25, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions doc/docgen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,6 @@ fn tokenizeAndPrintRaw(docgen_tokenizer: *Tokenizer, out: var, source_token: Tok
.AngleBracketAngleBracketRight,
.AngleBracketAngleBracketRightEqual,
.Tilde,
.BracketStarBracket,
.BracketStarCBracket,
=> try writeEscaped(out, src[token.start..token.end]),

.Invalid, .Invalid_ampersands => return parseError(
Expand Down
148 changes: 89 additions & 59 deletions doc/langref.html.in
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,11 @@ pub fn main() void {
{#header_close#}
{#header_open|String Literals and Character Literals#}
<p>
String literals are UTF-8 encoded byte arrays.
String literals are single-item constant {#link|Pointers#} to null-terminated UTF-8 encoded byte arrays.
The type of string literals encodes both the length, and the fact that they are null-terminated,
and thus they can be {#link|coerced|Type Coercion#} to both {#link|Slices#} and
{#link|Null-Terminated Pointers|Sentinel-Terminated Pointers#}.
Dereferencing string literals converts them to {#link|Arrays#}.
</p>
<p>
Character literals have type {#syntax#}comptime_int{#endsyntax#}, the same as
Expand All @@ -558,20 +562,15 @@ const assert = @import("std").debug.assert;
const mem = @import("std").mem;

test "string literals" {
// In Zig a string literal is an array of bytes.
const normal_bytes = "hello";
assert(@typeOf(normal_bytes) == [5]u8);
assert(normal_bytes.len == 5);
assert(normal_bytes[1] == 'e');
const bytes = "hello";
assert(@typeOf(bytes) == *const [5:0]u8);
assert(bytes.len == 5);
assert(bytes[1] == 'e');
assert(bytes[5] == 0);
assert('e' == '\x65');
assert('\u{1f4a9}' == 128169);
assert('💯' == 128175);
assert(mem.eql(u8, "hello", "h\x65llo"));

// A C string literal is a null terminated pointer.
const null_terminated_bytes = c"hello";
assert(@typeOf(null_terminated_bytes) == [*]const u8);
assert(null_terminated_bytes[5] == 0);
}
{#code_end#}
{#see_also|Arrays|Zig Test|Source Encoding#}
Expand Down Expand Up @@ -641,23 +640,6 @@ const hello_world_in_c =
\\}
;
{#code_end#}
<p>
For a multiline C string literal, prepend <code>c</code> to each {#syntax#}\\{#endsyntax#}:
</p>
{#code_begin|syntax#}
const c_string_literal =
c\\#include <stdio.h>
c\\
c\\int main(int argc, char **argv) {
c\\ printf("hello world\n");
c\\ return 0;
c\\}
;
{#code_end#}
<p>
In this example the variable {#syntax#}c_string_literal{#endsyntax#} has type {#syntax#}[*]const u8{#endsyntax#} and
has a terminating null byte.
</p>
{#see_also|@embedFile#}
{#header_close#}
{#header_close#}
Expand Down Expand Up @@ -1638,12 +1620,11 @@ comptime {
assert(message.len == 5);
}

// a string literal is an array literal
const same_message = "hello";
// A string literal is a pointer to an array literal.
const same_message = "hello".*;

comptime {
assert(mem.eql(u8, message, same_message));
assert(@typeOf(message) == @typeOf(same_message));
}

test "iterate over an array" {
Expand Down Expand Up @@ -1799,6 +1780,26 @@ test "multidimensional arrays" {
}
{#code_end#}
{#header_close#}

{#header_open|Sentinel-Terminated Arrays#}
<p>
The syntax {#syntax#}[N:x]T{#endsyntax#} describes an array which has a sentinel element at the
index corresponding to {#syntax#}len{#endsyntax#}.
</p>
{#code_begin|test|null_terminated_array#}
const std = @import("std");
const assert = std.debug.assert;

test "null terminated array" {
const array = [_:0]u8 {1, 2, 3, 4};

assert(@typeOf(array) == [4:0]u8);
assert(array.len == 4);
assert(array[4] == 0);
}
{#code_end#}
{#see_also|Sentinel-Terminated Pointers|Sentinel-Terminated Slices#}
{#header_close#}
{#header_close#}

{#header_open|Vectors#}
Expand Down Expand Up @@ -1899,7 +1900,7 @@ test "pointer array access" {
}
{#code_end#}
<p>
In Zig, we prefer slices over pointers to null-terminated arrays.
In Zig, we generally prefer {#link|Slices#} rather than {#link|Sentinel-Terminated Pointers#}.
You can turn an array or pointer into a slice using slice syntax.
</p>
<p>
Expand Down Expand Up @@ -2111,6 +2112,29 @@ test "allowzero" {
}
{#code_end#}
{#header_close#}

{#header_open|Sentinel-Terminated Pointers#}
<p>
The syntax {#syntax#}[*:x]T{#endsyntax#} describes a pointer that
has a length determined by a sentinel value. This provides protection
against buffer overflow and overreads.
</p>
{#code_begin|exe_build_err#}
const std = @import("std");

// This is also available as `std.c.printf`.
pub extern "c" fn printf(format: [*:0]const u8, ...) c_int;

pub fn main() anyerror!void {
_ = printf("Hello, world!\n"); // OK

const msg = "Hello, world!\n";
const non_null_terminated_msg: [msg.len]u8 = msg.*;
_ = printf(&non_null_terminated_msg);
}
{#code_end#}
{#see_also|Sentinel-Terminated Slices|Sentinel-Terminated Arrays#}
{#header_close#}
{#header_close#}

{#header_open|Slices#}
Expand Down Expand Up @@ -2194,7 +2218,29 @@ test "slice widening" {
}
{#code_end#}
{#see_also|Pointers|for|Arrays#}

{#header_open|Sentinel-Terminated Slices#}
<p>
The syntax {#syntax#}[:x]T{#endsyntax#} is a slice which has a runtime known length
and also guarantees a sentinel value at the element indexed by the length. The type does not
guarantee that there are no sentinel elements before that. Sentinel-terminated slices allow element
access to the {#syntax#}len{#endsyntax#} index.
</p>
{#code_begin|test|null_terminated_slice#}
const std = @import("std");
const assert = std.debug.assert;

test "null terminated slice" {
const slice: [:0]const u8 = "hello";

assert(slice.len == 5);
assert(slice[5] == 0);
}
{#code_end#}
{#see_also|Sentinel-Terminated Pointers|Sentinel-Terminated Arrays#}
{#header_close#}
{#header_close#}

{#header_open|struct#}
{#code_begin|test|structs#}
// Declare a struct.
Expand Down Expand Up @@ -4817,9 +4863,9 @@ const assert = std.debug.assert;
const mem = std.mem;

test "cast *[1][*]const u8 to [*]const ?[*]const u8" {
const window_name = [1][*]const u8{c"window name"};
const window_name = [1][*]const u8{"window name"};
const x: [*]const ?[*]const u8 = &window_name;
assert(mem.eql(u8, std.mem.toSliceConst(u8, x[0].?), "window name"));
assert(mem.eql(u8, std.mem.toSliceConst(u8, @ptrCast([*:0]const u8, x[0].?)), "window name"));
}
{#code_end#}
{#header_close#}
Expand Down Expand Up @@ -4859,7 +4905,7 @@ test "float widening" {
{#code_end#}
{#header_close#}
{#header_open|Type Coercion: Arrays and Pointers#}
{#code_begin|test#}
{#code_begin|test|coerce_arrays_and_ptrs#}
const std = @import("std");
const assert = std.debug.assert;

Expand Down Expand Up @@ -4898,7 +4944,7 @@ test "[N]T to ?[]const T" {

// In this cast, the array length becomes the slice length.
test "*[N]T to []T" {
var buf: [5]u8 = "hello";
var buf: [5]u8 = "hello".*;
const x: []u8 = &buf;
assert(std.mem.eql(u8, x, "hello"));

Expand All @@ -4910,15 +4956,15 @@ test "*[N]T to []T" {
// Single-item pointers to arrays can be coerced to
// unknown length pointers.
test "*[N]T to [*]T" {
var buf: [5]u8 = "hello";
var buf: [5]u8 = "hello".*;
const x: [*]u8 = &buf;
assert(x[4] == 'o');
// x[5] would be an uncaught out of bounds pointer dereference!
}

// Likewise, it works when the destination type is an optional.
test "*[N]T to ?[*]T" {
var buf: [5]u8 = "hello";
var buf: [5]u8 = "hello".*;
const x: ?[*]u8 = &buf;
assert(x.?[4] == 'o');
}
Expand Down Expand Up @@ -5089,7 +5135,7 @@ test "coercion of zero bit types" {
This kind of type resolution chooses a type that all peer types can coerce into. Here are
some examples:
</p>
{#code_begin|test#}
{#code_begin|test|peer_type_resolution#}
const std = @import("std");
const assert = std.debug.assert;
const mem = std.mem;
Expand Down Expand Up @@ -5156,13 +5202,13 @@ fn peerTypeEmptyArrayAndSlice(a: bool, slice: []const u8) []const u8 {
}
test "peer type resolution: [0]u8, []const u8, and anyerror![]u8" {
{
var data = "hi";
var data = "hi".*;
const slice = data[0..];
assert((try peerTypeEmptyArrayAndSliceAndError(true, slice)).len == 0);
assert((try peerTypeEmptyArrayAndSliceAndError(false, slice)).len == 1);
}
comptime {
var data = "hi";
var data = "hi".*;
const slice = data[0..];
assert((try peerTypeEmptyArrayAndSliceAndError(true, slice)).len == 0);
assert((try peerTypeEmptyArrayAndSliceAndError(false, slice)).len == 1);
Expand Down Expand Up @@ -8627,7 +8673,7 @@ pub fn main() void {
<p>At compile-time:</p>
{#code_begin|test_err|index 5 outside array of size 5#}
comptime {
const array = "hello";
const array: [5]u8 = "hello".*;
const garbage = array[5];
}
{#code_end#}
Expand Down Expand Up @@ -9603,22 +9649,6 @@ test "assert in release fast mode" {
</ul>
{#see_also|Primitive Types#}
{#header_close#}
{#header_open|C String Literals#}
{#code_begin|exe#}
{#link_libc#}
extern fn puts([*]const u8) void;

pub fn main() void {
puts(c"this has a null terminator");
puts(
c\\and so
c\\does this
c\\multiline C string literal
);
}
{#code_end#}
{#see_also|String Literals and Character Literals#}
{#header_close#}

{#header_open|Import from C Header File#}
<p>
Expand All @@ -9633,7 +9663,7 @@ const c = @cImport({
@cInclude("stdio.h");
});
pub fn main() void {
_ = c.printf(c"hello\n");
_ = c.printf("hello\n");
}
{#code_end#}
<p>
Expand Down
9 changes: 2 additions & 7 deletions lib/std/buffer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ pub const Buffer = struct {
self.list.deinit();
}

pub fn toSlice(self: Buffer) []u8 {
pub fn toSlice(self: Buffer) [:0]u8 {
return self.list.toSlice()[0..self.len()];
}

pub fn toSliceConst(self: Buffer) []const u8 {
pub fn toSliceConst(self: Buffer) [:0]const u8 {
return self.list.toSliceConst()[0..self.len()];
}

Expand Down Expand Up @@ -131,11 +131,6 @@ pub const Buffer = struct {
try self.resize(m.len);
mem.copy(u8, self.list.toSlice(), m);
}

/// For passing to C functions.
pub fn ptr(self: Buffer) [*]u8 {
return self.list.items.ptr;
}
};

test "simple Buffer" {
Expand Down
8 changes: 8 additions & 0 deletions lib/std/builtin.zig
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ pub const TypeInfo = union(enum) {
alignment: comptime_int,
child: type,
is_allowzero: bool,
/// The type of the sentinel is the element type of the pointer, which is
/// the value of the `child` field in this struct. However there is no way
/// to refer to that type here, so we use `var`.
sentinel: var,

/// This data structure is used by the Zig language code generation and
/// therefore must be kept in sync with the compiler implementation.
Expand All @@ -160,6 +164,10 @@ pub const TypeInfo = union(enum) {
pub const Array = struct {
len: comptime_int,
child: type,
/// The type of the sentinel is the element type of the array, which is
/// the value of the `child` field in this struct. However there is no way
/// to refer to that type here, so we use `var`.
sentinel: var,
};

/// This data structure is used by the Zig language code generation and
Expand Down
6 changes: 3 additions & 3 deletions lib/std/c.zig
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub extern "c" fn fclose(stream: *FILE) c_int;
pub extern "c" fn fwrite(ptr: [*]const u8, size_of_type: usize, item_count: usize, stream: *FILE) usize;
pub extern "c" fn fread(ptr: [*]u8, size_of_type: usize, item_count: usize, stream: *FILE) usize;

pub extern "c" fn printf(format: [*]const u8, ...) c_int;
pub extern "c" fn printf(format: [*:0]const u8, ...) c_int;
pub extern "c" fn abort() noreturn;
pub extern "c" fn exit(code: c_int) noreturn;
pub extern "c" fn isatty(fd: fd_t) c_int;
Expand Down Expand Up @@ -102,15 +102,15 @@ pub extern "c" fn execve(path: [*]const u8, argv: [*]const ?[*]const u8, envp: [
pub extern "c" fn dup(fd: fd_t) c_int;
pub extern "c" fn dup2(old_fd: fd_t, new_fd: fd_t) c_int;
pub extern "c" fn readlink(noalias path: [*]const u8, noalias buf: [*]u8, bufsize: usize) isize;
pub extern "c" fn realpath(noalias file_name: [*]const u8, noalias resolved_name: [*]u8) ?[*]u8;
pub extern "c" fn realpath(noalias file_name: [*]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;
pub extern "c" fn sigprocmask(how: c_int, noalias set: *const sigset_t, noalias oset: ?*sigset_t) c_int;
pub extern "c" fn gettimeofday(noalias tv: ?*timeval, noalias tz: ?*timezone) c_int;
pub extern "c" fn sigaction(sig: c_int, noalias act: *const Sigaction, noalias oact: ?*Sigaction) c_int;
pub extern "c" fn nanosleep(rqtp: *const timespec, rmtp: ?*timespec) c_int;
pub extern "c" fn setreuid(ruid: c_uint, euid: c_uint) c_int;
pub extern "c" fn setregid(rgid: c_uint, egid: c_uint) c_int;
pub extern "c" fn rmdir(path: [*]const u8) c_int;
pub extern "c" fn getenv(name: [*]const u8) ?[*]u8;
pub extern "c" fn getenv(name: [*:0]const u8) ?[*:0]u8;
pub extern "c" fn sysctl(name: [*]const c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
pub extern "c" fn sysctlbyname(name: [*]const u8, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
pub extern "c" fn sysctlnametomib(name: [*]const u8, mibp: ?*c_int, sizep: ?*usize) c_int;
Expand Down
3 changes: 2 additions & 1 deletion lib/std/child_process.zig
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ pub const ChildProcess = struct {

const any_ignore = (self.stdin_behavior == StdIo.Ignore or self.stdout_behavior == StdIo.Ignore or self.stderr_behavior == StdIo.Ignore);
const dev_null_fd = if (any_ignore)
os.openC(c"/dev/null", os.O_RDWR, 0) catch |err| switch (err) {
os.openC("/dev/null", os.O_RDWR, 0) catch |err| switch (err) {
error.PathAlreadyExists => unreachable,
error.NoSpaceLeft => unreachable,
error.FileTooBig => unreachable,
Expand Down Expand Up @@ -441,6 +441,7 @@ pub const ChildProcess = struct {

const any_ignore = (self.stdin_behavior == StdIo.Ignore or self.stdout_behavior == StdIo.Ignore or self.stderr_behavior == StdIo.Ignore);

// TODO use CreateFileW here since we are using a string literal for the path
const nul_handle = if (any_ignore)
windows.CreateFile(
"NUL",
Expand Down
Loading