Skip to content

Commit

Permalink
Implement #scan_integer to efficiently parse Integer
Browse files Browse the repository at this point in the history
Fix: #113

This allows to directly parse an Integer from a String without needing
to first allocate a sub string.

Notes:

The implementation is limited by design, it's meant as a first step,
only the most straightforward, based 10 integers are supported.
  • Loading branch information
byroot committed Nov 14, 2024
1 parent 81a80a1 commit 1d5ebec
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 0 deletions.
45 changes: 45 additions & 0 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ static VALUE strscan_get_byte _((VALUE self));
static VALUE strscan_getbyte _((VALUE self));
static VALUE strscan_peek _((VALUE self, VALUE len));
static VALUE strscan_peep _((VALUE self, VALUE len));
static VALUE strscan_scan_integer _((VALUE self));
static VALUE strscan_unscan _((VALUE self));
static VALUE strscan_bol_p _((VALUE self));
static VALUE strscan_eos_p _((VALUE self));
Expand Down Expand Up @@ -1266,6 +1267,48 @@ strscan_peep(VALUE self, VALUE vlen)
return strscan_peek(self, vlen);
}

/*
* call-seq:
* scan_integer
*
* Equivalent to #scan with a \-?\d+ pattern, and returns an Integer or nil.
*/
static VALUE
strscan_scan_integer(VALUE self)
{
struct strscanner *p;

GET_SCANNER(self, p);

char *ptr = CURPTR(p);

long len = 0;
if (ptr[len] == '-' || ptr[len] == '+') {
len++;
}

if (!isdigit(ptr[len])) {
return Qnil;
}

MATCHED(p);
p->prev = p->curr;

while(isdigit(ptr[len])) {
len++;
}

VALUE buffer_v;
char *buffer = ALLOCV_N(char, buffer_v, len + 1);

MEMCPY(buffer, CURPTR(p), char, len);
buffer[len] = '\0';
VALUE integer = rb_cstr2inum(buffer, 10);
RB_GC_GUARD(buffer_v);
p->curr += len;
return integer;
}

/*
* :markup: markdown
* :include: strscan/link_refs.txt
Expand Down Expand Up @@ -2204,6 +2247,8 @@ Init_strscan(void)
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
rb_define_method(StringScanner, "peep", strscan_peep, 1);

rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0);

rb_define_method(StringScanner, "unscan", strscan_unscan, 0);

rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
Expand Down
36 changes: 36 additions & 0 deletions test/strscan/test_stringscanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,42 @@ def test_named_captures
assert_equal(9, scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/))
assert_equal({"f" => "foo", "r" => "bar", "z" => "baz"}, scan.named_captures)
end

def test_scan_integer
s = create_string_scanner('abc')
assert_nil s.scan_integer
assert_equal 0, s.pos

s = create_string_scanner('123abc')
assert_equal 123, s.scan_integer
assert_equal 3, s.pos

s = create_string_scanner('-123abc')
assert_equal -123, s.scan_integer
assert_equal 4, s.pos

s = create_string_scanner('+123')
assert_equal 123, s.scan_integer
assert_equal 4, s.pos

s = create_string_scanner('-abc')
assert_nil s.scan_integer
assert_equal 0, s.pos

huge_integer = '1' * 2_000
s = create_string_scanner(huge_integer)
assert_equal huge_integer.to_i, s.scan_integer
assert_equal 2_000, s.pos
end

def test_scan_integer_unmatch
s = create_string_scanner('123abc')
assert_equal 123, s.scan_integer
assert_equal 3, s.pos

s.unscan
assert_equal 0, s.pos
end
end

class TestStringScanner < Test::Unit::TestCase
Expand Down

0 comments on commit 1d5ebec

Please sign in to comment.