Skip to content

Commit

Permalink
spike out pandoc tests
Browse files Browse the repository at this point in the history
  • Loading branch information
benbalter committed May 22, 2024
1 parent b425c1b commit 8159ff4
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 3 deletions.
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.2.2
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruby 3.1.2
11 changes: 9 additions & 2 deletions lib/word-to-markdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
require_relative 'word-to-markdown/version'
require_relative 'word-to-markdown/document'
require_relative 'word-to-markdown/converter'
require_relative 'word-to-markdown/pandoc-converter'
require_relative 'nokogiri/xml/element'
require_relative 'cliver/dependency_ext'

Expand Down Expand Up @@ -43,9 +44,15 @@ class WordToMarkdown
# @param path [string] Path to the Word document
# @param tmpdir [string] Path to a working directory to use
# @return [WordToMarkdown] WordToMarkdown object with the converted document
def initialize(path, tmpdir = nil)
def initialize(path, tmpdir = nil, use_pandoc = false)
@document = WordToMarkdown::Document.new path, tmpdir
@converter = WordToMarkdown::Converter.new @document

@converter = if use_pandoc
WordToMarkdown::PandocConverter.new @document
else
WordToMarkdown::Converter.new @document
end

converter.convert!
end

Expand Down
1 change: 1 addition & 0 deletions lib/word-to-markdown/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class NotFoundError < StandardError; end
class ConversionError < StandardError; end

attr_reader :path, :tmpdir
attr_writer :markdown, :raw_html

# @param path [string] Path to the Word document
# @param tmpdir [string] Path to a working directory to use
Expand Down
25 changes: 25 additions & 0 deletions lib/word-to-markdown/pandoc-converter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

class WordToMarkdown
class PandocConverter
attr_reader :document

# @param document [WordToMarkdown::Document] The document to convert
def initialize(document)
@document = document
end

def convert!
document.raw_html = pandoc.to_html
#raw_markdown = pandoc.to_markdown # NOTE: Try GFM, CommonMark, or + Extensions
#document.markdown = document.send(:scrub_whitespace, raw_markdown)
end

private

def pandoc
require 'pandoc-ruby'
@pandoc ||= PandocRuby.new([document.path], from: 'docx')
end
end
end
7 changes: 6 additions & 1 deletion test/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ def fixture_path(fixture = '')
end

def validate_fixture(fixture, expected)
assert_equal expected, WordToMarkdown.new(fixture_path(fixture)).to_s
path = fixture_path(fixture)
old = WordToMarkdown.new(path).to_s
assert_equal expected, old, "LibreOffice"

new = WordToMarkdown.new(path, nil, true).to_s
assert_equal expected, new, "Pandoc"
end

def stub_doc(html)
Expand Down
1 change: 1 addition & 0 deletions word-to-markdown.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Gem::Specification.new do |s|
s.add_dependency('cliver', '~> 0.3')
s.add_dependency('descriptive_statistics', '~> 2.5')
s.add_dependency('nokogiri-styles', '~> 0.1')
s.add_dependency('pandoc-ruby', '~> 2.0')
s.add_dependency('premailer', '~> 1.8')
s.add_dependency('reverse_markdown', '>= 1', '< 3')
s.add_dependency('sys-proctable', '~> 1.0')
Expand Down

0 comments on commit 8159ff4

Please sign in to comment.