diff --git a/CHANGELOG.md b/CHANGELOG.md index 9493df10..7960026c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## Unreleased + +* Fix a bug which resulted in validation errors on 'Start Button' elements [#237](https://github.com/alphagov/govspeak/pull/237) + ## 6.8.0 * Drop support for Ruby 2.6 which reaches End of Life (EOL) on 31/03/2022 diff --git a/lib/govspeak.rb b/lib/govspeak.rb index be7bf9e0..134ab2a2 100644 --- a/lib/govspeak.rb +++ b/lib/govspeak.rb @@ -54,6 +54,7 @@ def initialize(source, options = {}) @images = options.delete(:images) || [] @allowed_elements = options.delete(:allowed_elements) || [] + @allowed_image_hosts = options.delete(:allowed_image_hosts) || [] @attachments = Array.wrap(options.delete(:attachments)) @links = Array.wrap(options.delete(:links)) @contacts = Array.wrap(options.delete(:contacts)) @@ -69,7 +70,8 @@ def initialize(source, options = {}) def to_html @to_html ||= begin html = if @options[:sanitize] - HtmlSanitizer.new(kramdown_doc.to_html).sanitize(allowed_elements: @allowed_elements) + HtmlSanitizer.new(kramdown_doc.to_html, allowed_image_hosts: @allowed_image_hosts) + .sanitize(allowed_elements: @allowed_elements) else kramdown_doc.to_html end diff --git a/lib/govspeak/html_sanitizer.rb b/lib/govspeak/html_sanitizer.rb index edaadd52..9458f45e 100644 --- a/lib/govspeak/html_sanitizer.rb +++ b/lib/govspeak/html_sanitizer.rb @@ -46,7 +46,17 @@ def sanitize(allowed_elements: []) transformers << ImageSourceWhitelister.new(@allowed_image_hosts) end - Sanitize.clean(@dirty_html, Sanitize::Config.merge(sanitize_config(allowed_elements: allowed_elements), transformers: transformers)) + # It would be cleaner to move this `transformers` key into the `sanitize_config` method rather + # than having to use Sanitize::Config.merge() twice in succession. However, `sanitize_config` + # is a public method and it looks like other projects depend on it behaving the way it + # currently does – i.e. to return Sanitize config without any transformers. + # e.g. https://github.com/alphagov/hmrc-manuals-api/blob/4a83f78d0bb839520155623fd9b63b3b12a3b13a/app/validators/no_dangerous_html_in_text_fields_validator.rb#L44 + config_with_transformers = Sanitize::Config.merge( + sanitize_config(allowed_elements: allowed_elements), + transformers: transformers, + ) + + Sanitize.clean(@dirty_html, config_with_transformers) end def sanitize_config(allowed_elements: []) diff --git a/lib/govspeak/html_validator.rb b/lib/govspeak/html_validator.rb index 7fdada95..2e73fe43 100644 --- a/lib/govspeak/html_validator.rb +++ b/lib/govspeak/html_validator.rb @@ -1,9 +1,9 @@ class Govspeak::HtmlValidator attr_reader :govspeak_string - def initialize(govspeak_string, sanitization_options = {}) + def initialize(govspeak_string, options = {}) @govspeak_string = govspeak_string.dup.force_encoding(Encoding::UTF_8) - @sanitization_options = sanitization_options + @allowed_image_hosts = options[:allowed_image_hosts] end def invalid? @@ -11,17 +11,23 @@ def invalid? end def valid? - dirty_html = govspeak_to_html - clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize + dirty_html = govspeak_to_html(sanitize: false) + clean_html = govspeak_to_html(sanitize: true) normalise_html(dirty_html) == normalise_html(clean_html) end +private + # Make whitespace in html tags consistent def normalise_html(html) Nokogiri::HTML5.fragment(html).to_s end - def govspeak_to_html - Govspeak::Document.new(govspeak_string, sanitize: false).to_html + def govspeak_to_html(sanitize:) + Govspeak::Document.new( + govspeak_string, + sanitize: sanitize, + allowed_image_hosts: @allowed_image_hosts, + ).to_html end end