diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index 7d3e7494fc..2455ef33ce 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -54,6 +54,7 @@ import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; import org.jruby.util.StringSupport; +import org.jruby.util.ConvertBytes; import java.util.Iterator; @@ -556,6 +557,47 @@ public IRubyObject peep(ThreadContext context, IRubyObject length) { return peek(context, length); } + @JRubyMethod(name = "scan_integer") + public IRubyObject scan_integer(ThreadContext context) { + final Ruby runtime = context.runtime; + check(context); + clearMatched(); + + if (!str.getEncoding().isAsciiCompatible()) { + throw getRuntime().newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding()); + } + + + ByteList bytes = str.getByteList(); + int curr = this.curr; + + int bite = bytes.get(curr); + if (bite == '-' || bite == '+') { + curr++; + bite = bytes.get(curr); + } + + if (!(bite >= '0' && bite <= '9')) { + return runtime.getNil(); + } + + while (bite >= '0' && bite <= '9') { + curr++; + if (curr >= bytes.getRealSize()) { + break; + } + bite = bytes.get(curr); + } + + int length = curr - this.curr; + prev = this.curr; + this.curr = curr; + setMatched(); + adjustRegisters(); + + return ConvertBytes.byteListToInum(runtime, bytes, prev, curr, 10, true); + } + @JRubyMethod(name = "unscan") public IRubyObject unscan(ThreadContext context) { check(context); diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 0448b9c16b..f30e0b9118 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -21,6 +21,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs); #endif #include +#include #define STRSCAN_VERSION "3.1.1" @@ -115,6 +116,7 @@ static VALUE strscan_get_byte _((VALUE self)); static VALUE strscan_getbyte _((VALUE self)); static VALUE strscan_peek _((VALUE self, VALUE len)); static VALUE strscan_peep _((VALUE self, VALUE len)); +static VALUE strscan_scan_integer _((VALUE self)); static VALUE strscan_unscan _((VALUE self)); static VALUE strscan_bol_p _((VALUE self)); static VALUE strscan_eos_p _((VALUE self)); @@ -1266,6 +1268,55 @@ strscan_peep(VALUE self, VALUE vlen) return strscan_peek(self, vlen); } +/* + * call-seq: + * scan_integer + * + * Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil. + * + * The scanned string must be encoded with an ASCII compatible encoding, otherwise + * Encoding::CompatibilityError will be raised. + */ +static VALUE +strscan_scan_integer(VALUE self) +{ + char *ptr, *buffer; + long len = 0; + VALUE buffer_v, integer; + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + + rb_must_asciicompat(p->str); + + ptr = CURPTR(p); + + if (ptr[len] == '-' || ptr[len] == '+') { + len++; + } + + if (!isdigit(ptr[len])) { + return Qnil; + } + + MATCHED(p); + p->prev = p->curr; + + while(isdigit(ptr[len])) { + len++; + } + + buffer = ALLOCV_N(char, buffer_v, len + 1); + + MEMCPY(buffer, CURPTR(p), char, len); + buffer[len] = '\0'; + integer = rb_cstr2inum(buffer, 10); + RB_GC_GUARD(buffer_v); + p->curr += len; + return integer; +} + /* * :markup: markdown * :include: strscan/link_refs.txt @@ -2204,6 +2255,8 @@ Init_strscan(void) rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); rb_define_method(StringScanner, "peep", strscan_peep, 1); + rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0); + rb_define_method(StringScanner, "unscan", strscan_unscan, 0); rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 54fd5027cf..ae05254d49 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -890,6 +890,61 @@ def test_named_captures assert_equal(9, scan.match?(/(?foo)(?bar)(?baz)/)) assert_equal({"f" => "foo", "r" => "bar", "z" => "baz"}, scan.named_captures) end + + def test_scan_integer + omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby" + + s = create_string_scanner('abc') + assert_nil s.scan_integer + assert_equal 0, s.pos + refute_predicate s, :matched? + + s = create_string_scanner('123abc') + assert_equal 123, s.scan_integer + assert_equal 3, s.pos + assert_predicate s, :matched? + + s = create_string_scanner('-123abc') + assert_equal -123, s.scan_integer + assert_equal 4, s.pos + assert_predicate s, :matched? + + s = create_string_scanner('+123') + assert_equal 123, s.scan_integer + assert_equal 4, s.pos + assert_predicate s, :matched? + + s = create_string_scanner('-abc') + assert_nil s.scan_integer + assert_equal 0, s.pos + refute_predicate s, :matched? + + huge_integer = '1' * 2_000 + s = create_string_scanner(huge_integer) + assert_equal huge_integer.to_i, s.scan_integer + assert_equal 2_000, s.pos + assert_predicate s, :matched? + end + + def test_scan_integer_unmatch + omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby" + + s = create_string_scanner('123abc') + assert_equal 123, s.scan_integer + assert_equal 3, s.pos + + s.unscan + assert_equal 0, s.pos + end + + def test_scan_integer_encoding + omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby" + + s = create_string_scanner('123abc'.encode(Encoding::UTF_32LE)) + assert_raise(Encoding::CompatibilityError) do + s.scan_integer + end + end end class TestStringScanner < Test::Unit::TestCase