From a25fcbf7af4298c0b88ec9d844656f2f5e34b519 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 23 Jan 2025 21:19:20 +0900 Subject: [PATCH 1/3] Escape reserved characters in scheme name Fix #89 --- lib/uri/common.rb | 44 +++++++++++++++++++++++++++++++++-------- test/uri/test_common.rb | 21 ++++++++++---------- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/lib/uri/common.rb b/lib/uri/common.rb index c3fe0b4..83cd679 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -88,6 +88,38 @@ def make_components_hash(klass, array_hash) end module Schemes + class << self + ReservedChars = ".+-" + EscapedChars = "\uFE52\uFE62\uFE63" + + def escape(name) + unless name and name.ascii_only? + return nil + end + name.upcase.tr(ReservedChars, EscapedChars) + end + + def unescape(name) + name.tr(EscapedChars, ReservedChars).encode(Encoding::US_ASCII).upcase + end + + def find(name) + const_get(name, false) if name and const_defined?(name, false) + end + + def register(name, klass) + unless scheme = escape(name) + raise ArgumentError, "invalid characater as scheme - #{name}" + end + const_set(scheme, klass) + end + + def list + constants.map { |name| + [unescape(name.to_s), const_get(name)] + }.to_h + end + end end private_constant :Schemes @@ -100,7 +132,7 @@ module Schemes # Note that after calling String#upcase on +scheme+, it must be a valid # constant name. def self.register_scheme(scheme, klass) - Schemes.const_set(scheme.to_s.upcase, klass) + Schemes.register(scheme, klass) end # Returns a hash of the defined schemes: @@ -118,9 +150,7 @@ def self.register_scheme(scheme, klass) # # Related: URI.register_scheme. def self.scheme_list - Schemes.constants.map { |name| - [name.to_s.upcase, Schemes.const_get(name)] - }.to_h + Schemes.list end INITIAL_SCHEMES = scheme_list @@ -144,12 +174,10 @@ def self.scheme_list # # => # # def self.for(scheme, *arguments, default: Generic) - const_name = scheme.to_s.upcase + const_name = Schemes.escape(scheme) uri_class = INITIAL_SCHEMES[const_name] - uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false) - Schemes.const_get(const_name, false) - end + uri_class ||= Schemes.find(const_name) uri_class ||= default return uri_class.new(scheme, *arguments) diff --git a/test/uri/test_common.rb b/test/uri/test_common.rb index 6326aec..01f1697 100644 --- a/test/uri/test_common.rb +++ b/test/uri/test_common.rb @@ -113,17 +113,18 @@ def test_register_scheme_lowercase def test_register_scheme_with_symbols # Valid schemes from https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml - some_uri_class = Class.new(URI::Generic) - assert_raise(NameError) { URI.register_scheme 'ms-search', some_uri_class } - assert_raise(NameError) { URI.register_scheme 'microsoft.windows.camera', some_uri_class } - assert_raise(NameError) { URI.register_scheme 'coaps+ws', some_uri_class } + list = [] + %w[ms-search microsoft.windows.camera coaps+ws].each {|name| + list << [name, URI.register_scheme(name, Class.new(URI::Generic))] + } - ms_search_class = Class.new(URI::Generic) - URI.register_scheme 'MS_SEARCH', ms_search_class - begin - assert_equal URI::Generic, URI.parse('ms-search://localhost').class - ensure - URI.const_get(:Schemes).send(:remove_const, :MS_SEARCH) + list.each do |scheme, uri_class| + assert_equal uri_class, URI.parse("#{scheme}://localhost").class + end + ensure + schemes = URI.const_get(:Schemes) + list.each do |scheme, | + schemes.send(:remove_const, schemes.escape(scheme)) end end From 912378789cb1931a6a6f331b2bfa20969c6799f3 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 23 Jan 2025 22:07:04 +0900 Subject: [PATCH 2/3] Use Lo category chars as escaped chars TruffleRuby does not allow Symbol categories as identifiers. --- lib/uri/common.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/uri/common.rb b/lib/uri/common.rb index 83cd679..ed7c2fb 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -90,7 +90,9 @@ def make_components_hash(klass, array_hash) module Schemes class << self ReservedChars = ".+-" - EscapedChars = "\uFE52\uFE62\uFE63" + EscapedChars = "\u01C0\u01C1\u01C2" + # Use Lo category chars as escaped chars for TruffleRuby, which + # does not allow Symbol categories as identifiers. def escape(name) unless name and name.ascii_only? From 4ff80db407e59897812c5df9d4528607dea05bf1 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 27 Feb 2025 13:28:32 +0900 Subject: [PATCH 3/3] Fix a typo Co-authored-by: Olle Jonsson --- lib/uri/common.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/uri/common.rb b/lib/uri/common.rb index ed7c2fb..37f9268 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -111,7 +111,7 @@ def find(name) def register(name, klass) unless scheme = escape(name) - raise ArgumentError, "invalid characater as scheme - #{name}" + raise ArgumentError, "invalid character as scheme - #{name}" end const_set(scheme, klass) end