diff --git a/CHANGELOG.md b/CHANGELOG.md index 43aad941e7fd..865868ee39a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Compatibility: * Implement the `Data` class from Ruby 3.2 (#3039, @moste00, @eregon). * Make `Coverage.start` and `Coverage.result` accept parameters (#3149, @mtortonesi, @andrykonchin). * Implement `rb_check_funcall()` (@eregon). +* Implement `MatchData#{byteoffset,deconstruct,deconstruct_keys}` from Ruby 3.2 (#3039, @rwstauner). Performance: diff --git a/spec/ruby/core/matchdata/begin_spec.rb b/spec/ruby/core/matchdata/begin_spec.rb index 85c454da562d..54b4e0a33fe4 100644 --- a/spec/ruby/core/matchdata/begin_spec.rb +++ b/spec/ruby/core/matchdata/begin_spec.rb @@ -36,6 +36,18 @@ match_data = /(.)(.)(\d+)(\d)/.match("THX1138.") match_data.begin(obj).should == 2 end + + it "raises IndexError if index is out of bounds" do + match_data = /(?foo)(?bar)/.match("foobar") + + -> { + match_data.begin(-1) + }.should raise_error(IndexError, "index -1 out of matches") + + -> { + match_data.begin(3) + }.should raise_error(IndexError, "index 3 out of matches") + end end context "when passed a String argument" do @@ -68,6 +80,14 @@ match_data = /(?<æ>.)(.)(?\d+)(\d)/.match("THX1138.") match_data.begin("æ").should == 1 end + + it "raises IndexError if there is no group with the provided name" do + match_data = /(?foo)(?bar)/.match("foobar") + + -> { + match_data.begin("y") + }.should raise_error(IndexError, "undefined group name reference: y") + end end context "when passed a Symbol argument" do @@ -100,5 +120,13 @@ match_data = /(?<æ>.)(.)(?\d+)(\d)/.match("THX1138.") match_data.begin(:æ).should == 1 end + + it "raises IndexError if there is no group with the provided name" do + match_data = /(?foo)(?bar)/.match("foobar") + + -> { + match_data.begin(:y) + }.should raise_error(IndexError, "undefined group name reference: y") + end end end diff --git a/spec/ruby/core/matchdata/byteoffset_spec.rb b/spec/ruby/core/matchdata/byteoffset_spec.rb index 603609783492..b27267fd0ece 100644 --- a/spec/ruby/core/matchdata/byteoffset_spec.rb +++ b/spec/ruby/core/matchdata/byteoffset_spec.rb @@ -60,7 +60,7 @@ def obj.to_int; 2; end m.byteoffset(obj).should == [3, 6] end - it "raises IndexError if there is no group with provided name" do + it "raises IndexError if there is no group with the provided name" do m = /(?foo)(?bar)/.match("foobar") -> { @@ -72,7 +72,7 @@ def obj.to_int; 2; end }.should raise_error(IndexError, "undefined group name reference: y") end - it "raises IndexError if index is out of matches" do + it "raises IndexError if index is out of bounds" do m = /(?foo)(?bar)/.match("foobar") -> { diff --git a/spec/tags/core/matchdata/byteoffset_tags.txt b/spec/tags/core/matchdata/byteoffset_tags.txt deleted file mode 100644 index e4e300b51ea2..000000000000 --- a/spec/tags/core/matchdata/byteoffset_tags.txt +++ /dev/null @@ -1,11 +0,0 @@ -fails:MatchData#byteoffset returns beginning and ending byte-based offset of whole matched substring for 0 element -fails:MatchData#byteoffset returns beginning and ending byte-based offset of n-th match, all the subsequent elements are capturing groups -fails:MatchData#byteoffset accepts String as a reference to a named capture -fails:MatchData#byteoffset accepts Symbol as a reference to a named capture -fails:MatchData#byteoffset returns [nil, nil] if a capturing group is optional and doesn't match -fails:MatchData#byteoffset returns correct beginning and ending byte-based offset for multi-byte strings -fails:MatchData#byteoffset returns [nil, nil] if a capturing group is optional and doesn't match for multi-byte string -fails:MatchData#byteoffset converts argument into integer if is not String nor Symbol -fails:MatchData#byteoffset raises IndexError if there is no group with provided name -fails:MatchData#byteoffset raises IndexError if index is out of matches -fails:MatchData#byteoffset raises TypeError if can't convert argument into Integer diff --git a/spec/tags/core/matchdata/deconstruct_keys_tags.txt b/spec/tags/core/matchdata/deconstruct_keys_tags.txt deleted file mode 100644 index 06cb01539358..000000000000 --- a/spec/tags/core/matchdata/deconstruct_keys_tags.txt +++ /dev/null @@ -1,9 +0,0 @@ -fails:MatchData#deconstruct_keys returns whole hash for nil as an argument -fails:MatchData#deconstruct_keys returns only specified keys -fails:MatchData#deconstruct_keys requires one argument -fails:MatchData#deconstruct_keys it raises error when argument is neither nil nor array -fails:MatchData#deconstruct_keys returns {} when passed [] -fails:MatchData#deconstruct_keys does not accept non-Symbol keys -fails:MatchData#deconstruct_keys process keys till the first non-existing one -fails:MatchData#deconstruct_keys returns {} when there are no named captured groups at all -fails:MatchData#deconstruct_keys returns {} when passed more keys than named captured groups diff --git a/spec/tags/core/matchdata/deconstruct_tags.txt b/spec/tags/core/matchdata/deconstruct_tags.txt deleted file mode 100644 index 964df9ef8543..000000000000 --- a/spec/tags/core/matchdata/deconstruct_tags.txt +++ /dev/null @@ -1,2 +0,0 @@ -fails:MatchData#deconstruct returns an array of the match captures -fails:MatchData#deconstruct returns instances of String when given a String subclass diff --git a/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java b/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java index c67d0c40649a..e05ad73afdf2 100644 --- a/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java +++ b/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java @@ -627,6 +627,13 @@ Object byteBegin(RubyMatchData matchData, int index, } } + @Specialization(guards = "!inBounds(matchData, index)") + Object byteBeginError(RubyMatchData matchData, int index) { + throw new RaiseException( + getContext(), + coreExceptions().indexError(StringUtils.format("index %d out of matches", index), this)); + } + protected boolean inBounds(RubyMatchData matchData, int index) { return index >= 0 && index < matchData.region.numRegs; } @@ -649,6 +656,13 @@ Object byteEnd(RubyMatchData matchData, int index, } } + @Specialization(guards = "!inBounds(matchData, index)") + Object byteEndError(RubyMatchData matchData, int index) { + throw new RaiseException( + getContext(), + coreExceptions().indexError(StringUtils.format("index %d out of matches", index), this)); + } + protected boolean inBounds(RubyMatchData matchData, int index) { return index >= 0 && index < matchData.region.numRegs; } diff --git a/src/main/ruby/truffleruby/core/match_data.rb b/src/main/ruby/truffleruby/core/match_data.rb index 8dcb21765409..d36c184c019c 100644 --- a/src/main/ruby/truffleruby/core/match_data.rb +++ b/src/main/ruby/truffleruby/core/match_data.rb @@ -40,6 +40,11 @@ class << self undef_method :allocate end + def byteoffset(idx) + backref = backref_from_arg(idx) + [Primitive.match_data_byte_begin(self, backref), Primitive.match_data_byte_end(self, backref)] + end + def offset(idx) [self.begin(idx), self.end(idx)] end @@ -61,6 +66,26 @@ def string def captures to_a[1..-1] end + alias_method :deconstruct, :captures + + def deconstruct_keys(array_of_names) + Truffle::Type.rb_check_type(array_of_names, Array) unless Primitive.nil?(array_of_names) + + hash = named_captures.transform_keys(&:to_sym) + return hash if Primitive.nil?(array_of_names) + + ret = {} + return ret if array_of_names.size > hash.size + + array_of_names.each do |key| + Truffle::Type.rb_check_type(key, Symbol) + value = Primitive.hash_get_or_undefined(hash, key) + break if Primitive.undefined?(value) + ret[key] = value + end + + ret + end def names regexp.names @@ -71,26 +96,12 @@ def named_captures end def begin(index) - backref = if Primitive.is_a?(index, String) || Primitive.is_a?(index, Symbol) - names_to_backref = Hash[Primitive.regexp_names(self.regexp)] - names_to_backref[index.to_sym].last - else - Truffle::Type.coerce_to(index, Integer, :to_int) - end - - + backref = backref_from_arg(index) Primitive.match_data_begin(self, backref) end def end(index) - backref = if Primitive.is_a?(index, String) || Primitive.is_a?(index, Symbol) - names_to_backref = Hash[Primitive.regexp_names(self.regexp)] - names_to_backref[index.to_sym].last - else - Truffle::Type.coerce_to(index, Integer, :to_int) - end - - + backref = backref_from_arg(index) Primitive.match_data_end(self, backref) end @@ -153,6 +164,21 @@ def match_length(n) def to_s self[0] end + + private + + def backref_from_arg(index) + if Primitive.is_a?(index, String) || Primitive.is_a?(index, Symbol) + names_to_backref = Hash[Primitive.regexp_names(self.regexp)] + array = names_to_backref[index.to_sym] + + raise IndexError, "undefined group name reference: #{index}" unless array + + return array.last + end + + Primitive.rb_to_int(index) + end end Truffle::KernelOperations.define_hooked_variable( diff --git a/test/mri/excludes/TestRegexp.rb b/test/mri/excludes/TestRegexp.rb index f6419d17d434..34669538e14d 100644 --- a/test/mri/excludes/TestRegexp.rb +++ b/test/mri/excludes/TestRegexp.rb @@ -39,10 +39,7 @@ exclude :test_match_control_meta_escape, "<0> expected but was" exclude :test_initialize_option, " expected but was" exclude :test_initialize_bool_warning, "expected: /expected true or false as ignorecase/" -exclude :test_match_byteoffset_begin_end, "NoMethodError: undefined method `byteoffset' for #" -exclude :test_match_data_deconstruct, "NoMethodError: undefined method `deconstruct' for #" exclude :test_linear_time_p, "NoMethodError: undefined method `linear_time?' for Regexp:Class" -exclude :test_match_data_deconstruct_keys, "NoMethodError: undefined method `deconstruct_keys' for #" exclude :test_extended_comment_invalid_escape_bug_18294, "assert_separately failed with error message" exclude :test_timeout_nil, "NoMethodError: undefined method `timeout=' for Regexp:Class" exclude :test_timeout_shorter_than_global, "NoMethodError: undefined method `timeout=' for Regexp:Class"