From 36a8e704ed97141be350d87a25ed3f36ae6a099b Mon Sep 17 00:00:00 2001 From: pezholio Date: Mon, 21 Oct 2024 13:23:54 +0100 Subject: [PATCH] Add an `EmbedExtractor` This allows us to extract embed codes, their uuids and their types when given a block of Govspeak. --- lib/govspeak.rb | 2 ++ lib/govspeak/embed_extractor.rb | 17 +++++++++ lib/govspeak/embedded_content.rb | 15 ++++++++ test/embed_extractor_test.rb | 59 ++++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+) create mode 100644 lib/govspeak/embed_extractor.rb create mode 100644 lib/govspeak/embedded_content.rb create mode 100644 test/embed_extractor_test.rb diff --git a/lib/govspeak.rb b/lib/govspeak.rb index 16890ed..e2ff764 100644 --- a/lib/govspeak.rb +++ b/lib/govspeak.rb @@ -14,6 +14,8 @@ require "govspeak/html_validator" require "govspeak/html_sanitizer" require "govspeak/blockquote_extra_quote_remover" +require "govspeak/embed_extractor" +require "govspeak/embedded_content" require "govspeak/post_processor" require "govspeak/link_extractor" require "govspeak/template_renderer" diff --git a/lib/govspeak/embed_extractor.rb b/lib/govspeak/embed_extractor.rb new file mode 100644 index 0000000..8dfe00d --- /dev/null +++ b/lib/govspeak/embed_extractor.rb @@ -0,0 +1,17 @@ +module Govspeak + class EmbedExtractor + def initialize(document) + @document = document + end + + def content_references + @content_references ||= @document.scan(EmbeddedContent::EMBED_REGEX).map { |match| + EmbeddedContent.new(document_type: match[1], content_id: match[2], embed_code: match[0]) + }.uniq + end + + def content_ids + @content_ids ||= content_references.map(&:content_id) + end + end +end diff --git a/lib/govspeak/embedded_content.rb b/lib/govspeak/embedded_content.rb new file mode 100644 index 0000000..e5de143 --- /dev/null +++ b/lib/govspeak/embedded_content.rb @@ -0,0 +1,15 @@ +module Govspeak + class EmbeddedContent + SUPPORTED_DOCUMENT_TYPES = %w[contact content_block_email_address].freeze + UUID_REGEX = /([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/ + EMBED_REGEX = /({{embed:(#{SUPPORTED_DOCUMENT_TYPES.join('|')}):#{UUID_REGEX}}})/ + + attr_reader :document_type, :content_id, :embed_code + + def initialize(document_type:, content_id:, embed_code:) + @document_type = document_type + @content_id = content_id + @embed_code = embed_code + end + end +end diff --git a/test/embed_extractor_test.rb b/test/embed_extractor_test.rb new file mode 100644 index 0000000..0afe41e --- /dev/null +++ b/test/embed_extractor_test.rb @@ -0,0 +1,59 @@ +require "test_helper" + +class EmbedExtractorTest < Minitest::Test + extend Minitest::Spec::DSL + + describe "EmbedExtractor" do + subject { Govspeak::EmbedExtractor.new(document) } + + describe "when there is no embedded content" do + let(:document) { "foo" } + + describe "#content_references" do + it "returns an empty array" do + assert_equal [], subject.content_references + end + end + + describe "#content_ids" do + it "returns an empty array" do + assert_equal [], subject.content_ids + end + end + end + + describe "when there is embedded content" do + let(:contact_uuid) { SecureRandom.uuid } + let(:content_block_email_address_uuid) { SecureRandom.uuid } + + let(:document) do + """ + {{embed:contact:#{contact_uuid}}} + {{embed:content_block_email_address:#{content_block_email_address_uuid}}} + """ + end + + describe "#content_references" do + it "returns all references" do + result = subject.content_references + + assert_equal 2, result.count + + assert_equal "contact", result[0].document_type + assert_equal contact_uuid, result[0].content_id + assert_equal "{{embed:contact:#{contact_uuid}}}", result[0].embed_code + + assert_equal "content_block_email_address", result[1].document_type + assert_equal content_block_email_address_uuid, result[1].content_id + assert_equal "{{embed:content_block_email_address:#{content_block_email_address_uuid}}}", result[1].embed_code + end + end + + describe "#content_ids" do + it "returns all uuids as an array" do + assert_equal [contact_uuid, content_block_email_address_uuid], subject.content_ids + end + end + end + end +end