diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5e9cd8ec13..50b05b092ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -282,12 +282,12 @@ jobs: fail-fast: false matrix: sys: ["enable", "disable"] - runs-on: macos-10.15 + runs-on: macos-12 steps: - uses: actions/checkout@v2 with: submodules: true - - uses: vmactions/freebsd-vm@v0.1.5 + - uses: vmactions/freebsd-vm@v0.2.0 with: usesh: true prepare: pkg install -y ruby devel/ruby-gems pkgconf libxml2 libxslt diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml new file mode 100644 index 00000000000..7278f3f5462 --- /dev/null +++ b/.github/workflows/downstream.yml @@ -0,0 +1,73 @@ +name: downstream +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + downstream: + name: downstream-${{matrix.name}} + strategy: + fail-fast: false + matrix: + include: + - url: https://github.com/flavorjones/loofah + name: loofah + command: "bundle exec rake test" + - url: https://github.com/rails/rails-html-sanitizer + name: rails-html-sanitizer + command: "bundle exec rake test" + - url: https://github.com/rgrove/sanitize + name: sanitize + command: "bundle exec rake test" + - url: https://github.com/ebeigarts/signer + name: signer + command: "bundle exec rake spec" + - url: https://github.com/WinRb/Viewpoint + name: viewpoint + command: "bundle exec rspec spec" + - url: https://github.com/rails/rails + name: xmlmini + command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/" + - url: https://github.com/pythonicrubyist/creek + name: creek + command: "bundle exec rake spec" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile + - run: | + git clone --depth=1 ${{matrix.url}} ${{matrix.name}} + cd ${{matrix.name}} + if grep nokogiri Gemfile ; then + sed -i 's/\(.*nokogiri.*\)/\1, path: ".."/' Gemfile + else + echo "gem 'nokogiri', path: '..'" >> Gemfile + fi + if egrep "add_development_dependency.*\bbundler\b" *gemspec ; then + sed -i 's/.*add_development_dependency.*\bbundler\b.*//' *gemspec + fi + bundle install --local || bundle install + ${{matrix.command}} diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index f8b01d550a7..dff65b4a1eb 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -141,9 +141,17 @@ public class XmlReader extends RubyObject public IRubyObject attribute_nodes(ThreadContext context) { + context.runtime.getWarnings().warn("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); return currentNode().getAttributesNodes(); } + @JRubyMethod + public IRubyObject + attribute_hash(ThreadContext context) + { + return currentNode().getAttributes(context); + } + @JRubyMethod(name = "attributes?") public IRubyObject attributes_p(ThreadContext context) diff --git a/ext/java/nokogiri/internals/ReaderNode.java b/ext/java/nokogiri/internals/ReaderNode.java index f671075db9f..4dc17f62e4d 100644 --- a/ext/java/nokogiri/internals/ReaderNode.java +++ b/ext/java/nokogiri/internals/ReaderNode.java @@ -112,9 +112,10 @@ public abstract class ReaderNode getAttributes(ThreadContext context) { final Ruby runtime = context.runtime; - if (attributeList == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (attributeList == null) { return hash; } for (int i = 0; i < attributeList.length; i++) { + if (isNamespace(attributeList.names.get(i))) { continue; } IRubyObject k = stringOrBlank(runtime, attributeList.names.get(i)); IRubyObject v = stringOrBlank(runtime, attributeList.values.get(i)); hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) @@ -150,8 +151,8 @@ public abstract class ReaderNode getNamespaces(ThreadContext context) { final Ruby runtime = context.runtime; - if (namespaces == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (namespaces == null) { return hash; } for (Map.Entry entry : namespaces.entrySet()) { IRubyObject k = stringOrBlank(runtime, entry.getKey()); IRubyObject v = stringOrBlank(runtime, entry.getValue()); diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 333155eea7e..46593b9cae9 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -974,6 +974,7 @@ def compile have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 have_func("rb_gc_location") # introduced in Ruby 2.7 +have_func("rb_category_warning") # introduced in Ruby 3.0 have_func("vasprintf") diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 76394f32e25..51ddbf8d1eb 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -202,6 +202,12 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) +#if HAVE_RB_CATEGORY_WARNING +# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define NOKO_WARN_DEPRECATION(message) rb_warning(message) +#endif + void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data, xmlStructuredErrorFunc handler); diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 2507f577f12..c80d2d4247c 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1806,7 +1806,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass) } if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) { // TODO: deprecate allowing Node - rb_warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); } Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node); diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index 4f87e18f144..022f8ff5310 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader) return (0); } +// TODO: merge this function into the `namespaces` method implementation static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { @@ -150,7 +151,11 @@ namespaces(VALUE self) /* :call-seq: attribute_nodes() → Array - Get the attributes of the current node as an Array of Attr + Get the attributes of the current node as an Array of XML:Attr + + ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri. + + See related: #attribute_hash, #attributes */ static VALUE rb_xml_reader_attribute_nodes(VALUE rb_reader) @@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) VALUE attr_nodes; int j; + // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598 + // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c + NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); if (! has_attributes(c_reader)) { @@ -181,6 +190,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) return attr_nodes; } +/* + :call-seq: attribute_hash() → Hash + + Get the attributes of the current node as a Hash of names and values. + + See related: #attributes and #namespaces + */ +static VALUE +rb_xml_reader_attribute_hash(VALUE rb_reader) +{ + VALUE rb_attributes = rb_hash_new(); + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + xmlAttrPtr c_property; + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + if (!has_attributes(c_reader)) { + return rb_attributes; + } + + c_node = xmlTextReaderExpand(c_reader); + c_property = c_node->properties; + while (c_property != NULL) { + VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name); + VALUE rb_value = Qnil; + xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property); + + if (c_value) { + rb_value = NOKOGIRI_STR_NEW2(c_value); + xmlFree(c_value); + } + + rb_hash_aset(rb_attributes, rb_name, rb_value); + + c_property = c_property->next; + } + + return rb_attributes; +} + /* * call-seq: * attribute_at(index) @@ -696,6 +746,7 @@ noko_init_xml_reader() rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1); rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0); rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0); + rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0); rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0); rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0); rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0); diff --git a/lib/nokogiri/xml/reader.rb b/lib/nokogiri/xml/reader.rb index df13216afed..5f637cf9535 100644 --- a/lib/nokogiri/xml/reader.rb +++ b/lib/nokogiri/xml/reader.rb @@ -83,16 +83,14 @@ def initialize(source, url = nil, encoding = nil) # :nodoc: end private :initialize - # Get the attributes of the current node as a Hash + # Get the attributes and namespaces of the current node as a Hash. # - # [Returns] (Hash) Attribute names and values + # This is the union of Reader#attribute_hash and Reader#namespaces + # + # [Returns] + # (Hash) Attribute names and values, and namespace prefixes and hrefs. def attributes - attrs_hash = attribute_nodes.each_with_object({}) do |node, hash| - hash[node.name] = node.to_s - end - ns = namespaces - attrs_hash.merge!(ns) if ns - attrs_hash + attribute_hash.merge(namespaces) end ### diff --git a/test/xml/test_reader.rb b/test/xml/test_reader.rb index b3019691e6a..ab119656c7a 100644 --- a/test/xml/test_reader.rb +++ b/test/xml/test_reader.rb @@ -228,23 +228,61 @@ def test_attributes? reader.map(&:attributes?)) end - def test_attributes - reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) - - snuggles! - - eoxml + def test_reader_attributes + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML assert_empty(reader.attributes) assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/", "xmlns" => "http://mothership.connection.com/", }, - {}, { "awesome" => "true" }, {}, { "awesome" => "true" }, {}, + {}, + { "awesome" => "true" }, + {}, + { "awesome" => "true" }, + {}, { "xmlns:tenderlove" => "http://tenderlovemaking.com/", "xmlns" => "http://mothership.connection.com/", },], reader.map(&:attributes)) end + def test_reader_attributes_hash + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML + assert_empty(reader.attribute_hash) + assert_equal([{}, + {}, + { "awesome" => "true" }, + {}, + { "awesome" => "true" }, + {}, + {},], + reader.map(&:attribute_hash)) + end + + def test_reader_namespaces + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML + assert_empty(reader.namespaces) + assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/", + "xmlns" => "http://mothership.connection.com/", }, + {}, + {}, + {}, + {}, + {}, + { "xmlns:tenderlove" => "http://tenderlovemaking.com/", + "xmlns" => "http://mothership.connection.com/", },], + reader.map(&:namespaces)) + end + def test_attribute_roundtrip reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) " - 10000.times { |j| xml << "" } - xml << "" - xml = xml.join("\n") + skip_unless_libxml2("valgrind tests should only run with libxml2") - Nokogiri::XML::Reader.from_memory(xml).each(&:attributes) + refute_valgrind_errors do + xml = [] + xml << "" + 10000.times { |j| xml << "" } + xml << "" + xml = xml.join("\n") + + Nokogiri::XML::Reader.from_memory(xml).each(&:attributes) + end end def test_correct_outer_xml_inclusion