Skip to content

Commit

Permalink
Add an EmbedExtractor
Browse files Browse the repository at this point in the history
This allows us to extract embed codes, their uuids and their types when
given a block of Govspeak.
  • Loading branch information
pezholio committed Oct 21, 2024
1 parent 6a36feb commit 36a8e70
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 0 deletions.
2 changes: 2 additions & 0 deletions lib/govspeak.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
require "govspeak/html_validator"
require "govspeak/html_sanitizer"
require "govspeak/blockquote_extra_quote_remover"
require "govspeak/embed_extractor"
require "govspeak/embedded_content"
require "govspeak/post_processor"
require "govspeak/link_extractor"
require "govspeak/template_renderer"
Expand Down
17 changes: 17 additions & 0 deletions lib/govspeak/embed_extractor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module Govspeak
class EmbedExtractor
def initialize(document)
@document = document
end

def content_references
@content_references ||= @document.scan(EmbeddedContent::EMBED_REGEX).map { |match|
EmbeddedContent.new(document_type: match[1], content_id: match[2], embed_code: match[0])
}.uniq
end

def content_ids
@content_ids ||= content_references.map(&:content_id)
end
end
end
15 changes: 15 additions & 0 deletions lib/govspeak/embedded_content.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module Govspeak
class EmbeddedContent
SUPPORTED_DOCUMENT_TYPES = %w[contact content_block_email_address].freeze
UUID_REGEX = /([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/
EMBED_REGEX = /({{embed:(#{SUPPORTED_DOCUMENT_TYPES.join('|')}):#{UUID_REGEX}}})/

attr_reader :document_type, :content_id, :embed_code

def initialize(document_type:, content_id:, embed_code:)
@document_type = document_type
@content_id = content_id
@embed_code = embed_code
end
end
end
59 changes: 59 additions & 0 deletions test/embed_extractor_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
require "test_helper"

class EmbedExtractorTest < Minitest::Test
extend Minitest::Spec::DSL

describe "EmbedExtractor" do
subject { Govspeak::EmbedExtractor.new(document) }

describe "when there is no embedded content" do
let(:document) { "foo" }

describe "#content_references" do
it "returns an empty array" do
assert_equal [], subject.content_references
end
end

describe "#content_ids" do
it "returns an empty array" do
assert_equal [], subject.content_ids
end
end
end

describe "when there is embedded content" do
let(:contact_uuid) { SecureRandom.uuid }
let(:content_block_email_address_uuid) { SecureRandom.uuid }

let(:document) do
"""
{{embed:contact:#{contact_uuid}}}
{{embed:content_block_email_address:#{content_block_email_address_uuid}}}
"""
end

describe "#content_references" do
it "returns all references" do
result = subject.content_references

assert_equal 2, result.count

assert_equal "contact", result[0].document_type
assert_equal contact_uuid, result[0].content_id
assert_equal "{{embed:contact:#{contact_uuid}}}", result[0].embed_code

assert_equal "content_block_email_address", result[1].document_type
assert_equal content_block_email_address_uuid, result[1].content_id
assert_equal "{{embed:content_block_email_address:#{content_block_email_address_uuid}}}", result[1].embed_code
end
end

describe "#content_ids" do
it "returns all uuids as an array" do
assert_equal [contact_uuid, content_block_email_address_uuid], subject.content_ids
end
end
end
end
end

0 comments on commit 36a8e70

Please sign in to comment.