From 1d5ebec4d7ab8d954c776f69af48ead0048d3487 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 14 Nov 2024 08:44:42 +0100 Subject: [PATCH] Implement #scan_integer to efficiently parse Integer Fix: https://github.com/ruby/strscan/issues/113 This allows to directly parse an Integer from a String without needing to first allocate a sub string. Notes: The implementation is limited by design, it's meant as a first step, only the most straightforward, based 10 integers are supported. --- ext/strscan/strscan.c | 45 ++++++++++++++++++++++++++++++ test/strscan/test_stringscanner.rb | 36 ++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 0448b9c16b..898ff02c4a 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -115,6 +115,7 @@ static VALUE strscan_get_byte _((VALUE self)); static VALUE strscan_getbyte _((VALUE self)); static VALUE strscan_peek _((VALUE self, VALUE len)); static VALUE strscan_peep _((VALUE self, VALUE len)); +static VALUE strscan_scan_integer _((VALUE self)); static VALUE strscan_unscan _((VALUE self)); static VALUE strscan_bol_p _((VALUE self)); static VALUE strscan_eos_p _((VALUE self)); @@ -1266,6 +1267,48 @@ strscan_peep(VALUE self, VALUE vlen) return strscan_peek(self, vlen); } +/* + * call-seq: + * scan_integer + * + * Equivalent to #scan with a \-?\d+ pattern, and returns an Integer or nil. + */ +static VALUE +strscan_scan_integer(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + + char *ptr = CURPTR(p); + + long len = 0; + if (ptr[len] == '-' || ptr[len] == '+') { + len++; + } + + if (!isdigit(ptr[len])) { + return Qnil; + } + + MATCHED(p); + p->prev = p->curr; + + while(isdigit(ptr[len])) { + len++; + } + + VALUE buffer_v; + char *buffer = ALLOCV_N(char, buffer_v, len + 1); + + MEMCPY(buffer, CURPTR(p), char, len); + buffer[len] = '\0'; + VALUE integer = rb_cstr2inum(buffer, 10); + RB_GC_GUARD(buffer_v); + p->curr += len; + return integer; +} + /* * :markup: markdown * :include: strscan/link_refs.txt @@ -2204,6 +2247,8 @@ Init_strscan(void) rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); rb_define_method(StringScanner, "peep", strscan_peep, 1); + rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0); + rb_define_method(StringScanner, "unscan", strscan_unscan, 0); rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 54fd5027cf..dcf3f65c91 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -890,6 +890,42 @@ def test_named_captures assert_equal(9, scan.match?(/(?foo)(?bar)(?baz)/)) assert_equal({"f" => "foo", "r" => "bar", "z" => "baz"}, scan.named_captures) end + + def test_scan_integer + s = create_string_scanner('abc') + assert_nil s.scan_integer + assert_equal 0, s.pos + + s = create_string_scanner('123abc') + assert_equal 123, s.scan_integer + assert_equal 3, s.pos + + s = create_string_scanner('-123abc') + assert_equal -123, s.scan_integer + assert_equal 4, s.pos + + s = create_string_scanner('+123') + assert_equal 123, s.scan_integer + assert_equal 4, s.pos + + s = create_string_scanner('-abc') + assert_nil s.scan_integer + assert_equal 0, s.pos + + huge_integer = '1' * 2_000 + s = create_string_scanner(huge_integer) + assert_equal huge_integer.to_i, s.scan_integer + assert_equal 2_000, s.pos + end + + def test_scan_integer_unmatch + s = create_string_scanner('123abc') + assert_equal 123, s.scan_integer + assert_equal 3, s.pos + + s.unscan + assert_equal 0, s.pos + end end class TestStringScanner < Test::Unit::TestCase