diff --git a/src/converter/fixer.jl b/src/converter/fixer.jl index 06b299ed9..bb61c727d 100644 --- a/src/converter/fixer.jl +++ b/src/converter/fixer.jl @@ -14,7 +14,7 @@ function find_and_fix_md_links(hs::String)::String # the regexes very readable... # here we're looking for [id]: link; 1=id 2=link - m_link_defs = collect(eachmatch(r"[((?:(?!]).)*?)]:\s((?:(?!\<\/p\>)\S)+)", hs)) + m_link_defs = collect(eachmatch(r"[((?:(?!]).)*?)]:\s+((?:(?!\<\/p\>)\S)+)", hs)) def_names = [def.captures[1] for def in m_link_defs] def_links = [def.captures[2] for def in m_link_defs] diff --git a/src/converter/md.jl b/src/converter/md.jl index 066c7819a..3e07b3b5d 100644 --- a/src/converter/md.jl +++ b/src/converter/md.jl @@ -39,9 +39,11 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}(); #> 2. Open-Close blocks (OCBlocks) #>> a. find them blocks, tokens = find_all_ocblocks(tokens, MD_OCB_ALL) - #>> b. now that blocks have been found, line-returns can be dropped + #>> b. merge CODE_BLOCK_IND which are separated by emptyness + merge_indented_code_blocks!(blocks, mds) + #>> c. now that blocks have been found, line-returns can be dropped filter!(τ -> τ.name ∉ L_RETURNS, tokens) - #>> c. filter out "fake headers" (opening ### that are not at the start of a line) + #>> d. filter out "fake headers" (opening ### that are not at the start of a line) filter!(β -> validate_header_block(β), blocks) #> 3. LaTeX commands diff --git a/src/converter/md_blocks.jl b/src/converter/md_blocks.jl index ebd7a7c2b..2e8aeeb9d 100644 --- a/src/converter/md_blocks.jl +++ b/src/converter/md_blocks.jl @@ -11,10 +11,10 @@ function convert_block(β::AbstractBlock, lxcontext::LxContext)::AbstractString # Return relevant interpolated string based on case βn = β.name βn ∈ MD_HEADER && return convert_header(β) - βn == :CODE_INLINE && return md2html(β.ss; stripp=true, code=true) + βn == :CODE_INLINE && return html_code_inline(content(β) |> Markdown.htmlesc) βn == :CODE_BLOCK_LANG && return convert_code_block(β.ss) βn == :CODE_BLOCK_IND && return convert_indented_code_block(β.ss) - βn == :CODE_BLOCK && return md2html(β.ss; code=true) + βn == :CODE_BLOCK && return md2html(β.ss) βn == :ESCAPE && return chop(β.ss, head=3, tail=3) # Math block --> needs to call further processing to resolve possible latex @@ -185,5 +185,5 @@ function convert_indented_code_block(ss::SubString)::String # 1. decrease indentation of all lines (either frontal \n\t or \n⎵⎵⎵⎵) code = replace(ss, r"\n(?:\t| {4})" => "\n") # 2. return; lang is a LOCAL_PAGE_VARS that is julia by default and can be set - return html_code(code, "{{fill lang}}") + return html_code(strip(code), "{{fill lang}}") end diff --git a/src/converter/md_utils.jl b/src/converter/md_utils.jl index 808eac407..4edabf8d6 100644 --- a/src/converter/md_utils.jl +++ b/src/converter/md_utils.jl @@ -6,13 +6,11 @@ that don't need to be further considered and don't contain anything else than ma The boolean `stripp` indicates whether to remove the inserted `
` and `
` by the base markdown processor, this is relevant for things that are parsed within latex commands etc. """ -function md2html(ss::AbstractString; stripp::Bool=false, code::Bool=false)::AbstractString - +function md2html(ss::AbstractString; stripp::Bool=false)::AbstractString + # if there's nothing, return that... isempty(ss) && return ss - # Use Julia's Markdown parser followed by Julia's MD->HTML conversion partial = ss |> fix_inserts |> Markdown.parse |> Markdown.html - # In some cases, base converter adds...
\n which we might not want stripp || return partial startswith(partial, "") && (partial = chop(partial, head=3))
diff --git a/src/misc_html.jl b/src/misc_html.jl
index 8b53a70ac..a8928a7af 100644
--- a/src/misc_html.jl
+++ b/src/misc_html.jl
@@ -32,7 +32,7 @@ html_img(src::AbstractString, alt::AbstractString="") =
"""
$(SIGNATURES)
-Convenience function to introduce an image.
+Convenience function to introduce a code block.
"""
function html_code(c::AbstractString, lang::AbstractString="")
isempty(c) && return ""
@@ -43,6 +43,13 @@ end
"""
$(SIGNATURES)
+Convenience function to introduce inline code.
+"""
+html_code_inline(c::AbstractString) = "$c
"
+
+"""
+$(SIGNATURES)
+
Insertion of a visible red message in HTML to show there was a problem.
"""
html_err(mess::String="") = "
// $mess //
" diff --git a/src/parser/ocblocks.jl b/src/parser/ocblocks.jl index be2131825..a24744836 100644 --- a/src/parser/ocblocks.jl +++ b/src/parser/ocblocks.jl @@ -5,7 +5,8 @@ Find active blocks between an opening token (`otoken`) and a closing token `ctok nested (e.g. braces). Return the list of such blocks. If `deactivate` is `true`, all the tokens within the block will be marked as inactive (for further, separate processing). """ -function find_ocblocks(tokens::Vector{Token}, ocproto::OCProto; inmath=false) +function find_ocblocks(tokens::Vector{Token}, ocproto::OCProto; + inmath=false)::Tuple{Vector{OCBlock}, Vector{Token}} ntokens = length(tokens) active_tokens = ones(Bool, length(tokens)) @@ -127,8 +128,8 @@ function find_indented_blocks(tokens::Vector{Token}, st::String)::Vector{Token} # blocks. for i in 1:length(lr_idx)-1 # capture start and finish of the line (from line return to line return) - start = from(tokens[lr_idx[i]]) # first :LINE_RETURN - finish = from(tokens[lr_idx[i+1]]) # next :LINE_RETURN + start = from(tokens[lr_idx[i]]) # first :LINE_RETURN + finish = from(tokens[lr_idx[i+1]]) # next :LINE_RETURN line = subs(st, start, finish) indent = "" if startswith(line, "\n ") @@ -155,3 +156,55 @@ function find_indented_blocks(tokens::Vector{Token}, st::String)::Vector{Token} end return tokens end + + +""" +$SIGNATURES + +When two indented code blocks follow each other and there's nothing in between (empty line(s)), +merge them into a super block. +""" +function merge_indented_code_blocks!(blocks::Vector{OCBlock}, mds::String)::Nothing + # indices of CODE_BLOCK_IND + idx = [i for i in eachindex(blocks) if blocks[i].name == :CODE_BLOCK_IND] + isempty(idx) && return + # check if they're separated by something or nothing + inter_space = [(subs(mds, to(blocks[idx[i]]), from(blocks[idx[i+1]])) |> strip |> length) > 0 + for i in 1:length(idx)-1] + + curseq = Int[] # to keep track of current list of blocks to merge + del_blocks = Int[] # to keep track of blocks that will be removed afterwards + + # if there's no inter_space, add to the list, if there is, close and merge + for i in eachindex(inter_space) + if inter_space[i] && !isempty(curseq) + # close and merge all in curseq and empty curseq + form_super_block!(blocks, idx, curseq, del_blocks) + elseif !inter_space[i] + push!(curseq, i) + end + end + !isempty(curseq) && form_super_block!(blocks, idx, curseq, del_blocks) + # remove the blocks that have been merged + deleteat!(blocks, del_blocks) + return +end + + +""" +$SIGNATURES + +Helper function to [`merge_indented_code_blocks`](@ref). +""" +function form_super_block!(blocks::Vector{OCBlock}, idx::Vector{Int}, + curseq::Vector{Int}, del_blocks::Vector{Int})::Nothing + push!(curseq, curseq[end]+1) + first_block = blocks[idx[curseq[1]]] + last_block = blocks[idx[curseq[end]]] + # replace the first block with the super block + blocks[idx[curseq[1]]] = OCBlock(:CODE_BLOCK_IND, (otok(first_block) => ctok(last_block))) + # append all blocks but the first to the delete list + append!(del_blocks, curseq[2:end]) + empty!(curseq) + return +end diff --git a/test/converter/markdown3.jl b/test/converter/markdown3.jl index 95573867d..a4d119489 100644 --- a/test/converter/markdown3.jl +++ b/test/converter/markdown3.jl @@ -55,7 +55,7 @@ end tokens, = steps[:tokenization] @test tokens[7].name == :CHAR_LINEBREAK h = st |> seval - @test isapproxstr(st |> seval, """ + @test isapproxstr(st |> seval, raw"""Hello \ blah \ end
and B \ c
end
and
A \ b
@@ -285,4 +285,51 @@ end
end
""") + + st = raw""" + A + + function foo() + + return 2 + + end + + function bar() + return 3 + end + + B + + function baz() + return 5 + + end + + C + """ * J.EOS + isapproxstr(st |> seval, raw""" +A
function foo()
+
+ return 2
+
+ end
+
+ function bar()
+ return 3
+ end
+ B function baz()
+ return 5
+
+ end
+ C
+ """)
+end
+
+
+@testset "More ``" begin
+ st = raw"""
+ A ``blah``.
+ """ * J.EOS
+ isapproxstr(st |> seval, """A blah
.