From ed90a4de3cb954a66888e69a8f9ae600285c986b Mon Sep 17 00:00:00 2001
From: Thibaut Lienart <ltib@me.com>
Date: Mon, 9 Sep 2019 15:19:45 +0200
Subject: [PATCH] closes #207 through introduction of LR_INDENT tokens

---
 src/converter/md.jl         |  5 +++-
 src/converter/md_blocks.jl  | 20 +++++++++++--
 src/converter/md_utils.jl   |  3 +-
 src/jd_vars.jl              |  3 +-
 src/parser/md_tokens.jl     | 23 ++++++++++-----
 src/parser/ocblocks.jl      | 44 +++++++++++++++++++++++++++
 test/converter/eval.jl      | 16 +++++-----
 test/converter/lx_simple.jl | 11 +++++--
 test/converter/markdown2.jl |  2 +-
 test/converter/markdown3.jl | 59 +++++++++++++++++++++++++++++++++++++
 test/global/postprocess.jl  |  2 +-
 test/manager/utils.jl       |  6 ++--
 test/test_utils.jl          |  4 +--
 13 files changed, 167 insertions(+), 31 deletions(-)
diff --git a/src/converter/md.jl b/src/converter/md.jl
index 3f3f7c5d0..066c7819a 100644
--- a/src/converter/md.jl
+++ b/src/converter/md.jl
@@ -33,11 +33,14 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     #> 1. Tokenize
     tokens = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
 
+    #> 1'. Find indented blocks
+    tokens = find_indented_blocks(tokens, mds)
+
     #> 2. Open-Close blocks (OCBlocks)
     #>> a. find them
     blocks, tokens = find_all_ocblocks(tokens, MD_OCB_ALL)
     #>> b. now that blocks have been found, line-returns can be dropped
-    filter!(τ -> τ.name != :LINE_RETURN, tokens)
+    filter!(τ -> τ.name ∉ L_RETURNS, tokens)
     #>> c. filter out "fake headers" (opening ### that are not at the start of a line)
     filter!(β -> validate_header_block(β), blocks)
 
diff --git a/src/converter/md_blocks.jl b/src/converter/md_blocks.jl
index 69bfc18cd..ebd7a7c2b 100644
--- a/src/converter/md_blocks.jl
+++ b/src/converter/md_blocks.jl
@@ -13,6 +13,7 @@ function convert_block(β::AbstractBlock, lxcontext::LxContext)::AbstractString
     βn ∈  MD_HEADER        && return convert_header(β)
     βn == :CODE_INLINE     && return md2html(β.ss; stripp=true, code=true)
     βn == :CODE_BLOCK_LANG && return convert_code_block(β.ss)
+    βn == :CODE_BLOCK_IND  && return convert_indented_code_block(β.ss)
     βn == :CODE_BLOCK      && return md2html(β.ss; code=true)
     βn == :ESCAPE          && return chop(β.ss, head=3, tail=3)
 
@@ -126,11 +127,11 @@ function convert_code_block(ss::SubString)::String
     code  = m.captures[3]
 
     if isnothing(rpath)
-        return "<pre><code class=\"language-$lang\">$code</code></pre>"
+        return html_code(code, lang)
     end
     if lang!="julia"
-        @warn "Eval of non-julia code blocks is not supported at the moment"
-        return "<pre><code class=\"language-$lang\">$code</code></pre>"
+        @warn "Eval of non-julia code blocks is not yet supported."
+        return html_code(code, lang)
     end
     # path currently has an indicative `:` we don't care about
     rpath = rpath[2:end]
@@ -173,3 +174,16 @@ function convert_code_block(ss::SubString)::String
     # step 3, insertion of code stripping of "hide" lines.
     return resolve_lx_input_hlcode(rpath, "julia")
 end
+
+
+"""
+$(SIGNATURES)
+
+Helper function for the indented code block case of `convert_block`.
+"""
+function convert_indented_code_block(ss::SubString)::String
+    # 1. decrease indentation of all lines (either frontal \n\t or \n⎵⎵⎵⎵)
+    code = replace(ss, r"\n(?:\t| {4})" => "\n")
+    # 2. return; lang is a LOCAL_PAGE_VARS that is julia by default and can be set
+    return html_code(code, "{{fill lang}}")
+end
diff --git a/src/converter/md_utils.jl b/src/converter/md_utils.jl
index 500968485..808eac407 100644
--- a/src/converter/md_utils.jl
+++ b/src/converter/md_utils.jl
@@ -7,9 +7,10 @@ The boolean `stripp` indicates whether to remove the inserted `<p>` and `</p>` b
 processor, this is relevant for things that are parsed within latex commands etc.
 """
 function md2html(ss::AbstractString; stripp::Bool=false, code::Bool=false)::AbstractString
+
     isempty(ss) && return ss
 
-    # Use the base Markdown -> Html converter
+    # Use Julia's Markdown parser followed by Julia's MD->HTML conversion
     partial = ss |> fix_inserts |> Markdown.parse |> Markdown.html
 
     # In some cases, base converter adds <p>...</p>\n which we might not want
diff --git a/src/jd_vars.jl b/src/jd_vars.jl
index ab32c2240..16fd1bb94 100644
--- a/src/jd_vars.jl
+++ b/src/jd_vars.jl
@@ -52,6 +52,7 @@ is processed.
     LOCAL_PAGE_VARS["jd_ctime"] = Pair(Date(1), (Date,))   # time of creation
     LOCAL_PAGE_VARS["jd_mtime"] = Pair(Date(1), (Date,))   # time of last modification
     LOCAL_PAGE_VARS["jd_rpath"] = Pair("",      (String,)) # local path to file src/[...]/blah.md
+    LOCAL_PAGE_VARS["lang"]     = Pair("julia", (String,)) # default lang for indented code
     return nothing
 end
 
@@ -62,7 +63,7 @@ PAGE_HEADERS
 Keep track of seen headers. The key amounts to the ordering (~ordered dict), the value contains
 the title, the refstring version of the title, the occurence number and the level (1, ..., 6).
 """
-const PAGE_HEADERS = Dict{Int, Tuple{AbstractString,AbstractString,Int,Int}}()
+const PAGE_HEADERS = Dict{Int,Tuple{AbstractString,AbstractString,Int,Int}}()
 
 
 """
diff --git a/src/parser/md_tokens.jl b/src/parser/md_tokens.jl
index 64934b26c..c7b4dfbd9 100644
--- a/src/parser/md_tokens.jl
+++ b/src/parser/md_tokens.jl
@@ -105,6 +105,14 @@ The first group captures the name (`var`), the second the assignment (`value`).
 const MD_DEF_PAT = r"@def\s+(\S+)\s*?=\s*?(\S.*)"
 
 
+"""
+L_RETURNS
+
+Convenience tuple containing the name for standard line returns and line returns followed by an
+indentation (either a quadruple space or a tab).
+"""
+const L_RETURNS = (:LINE_RETURN, :LR_INDENT)
+
 """
 MD_OCB
 
@@ -121,18 +129,19 @@ const MD_OCB = [
     OCProto(:COMMENT,         :COMMENT_OPEN, (:COMMENT_CLOSE,),    false),
     OCProto(:CODE_BLOCK_LANG, :CODE_LANG,    (:CODE_TRIPLE,),      false),
     OCProto(:CODE_BLOCK,      :CODE_TRIPLE,  (:CODE_TRIPLE,),      false),
+    OCProto(:CODE_BLOCK_IND,  :LR_INDENT,    (:LINE_RETURN,),      false),
     OCProto(:CODE_INLINE,     :CODE_DOUBLE,  (:CODE_DOUBLE,),      false),
     OCProto(:CODE_INLINE,     :CODE_SINGLE,  (:CODE_SINGLE,),      false),
     OCProto(:ESCAPE,          :ESCAPE,       (:ESCAPE,),           false),
     # ------------------------------------------------------------------
-    OCProto(:H1,              :H1_OPEN,      (:LINE_RETURN, :EOS), false), # see [^3]
-    OCProto(:H2,              :H2_OPEN,      (:LINE_RETURN, :EOS), false),
-    OCProto(:H3,              :H3_OPEN,      (:LINE_RETURN, :EOS), false),
-    OCProto(:H4,              :H4_OPEN,      (:LINE_RETURN, :EOS), false),
-    OCProto(:H5,              :H5_OPEN,      (:LINE_RETURN, :EOS), false),
-    OCProto(:H6,              :H6_OPEN,      (:LINE_RETURN, :EOS), false),
+    OCProto(:H1,              :H1_OPEN,      (L_RETURNS..., :EOS), false), # see [^3]
+    OCProto(:H2,              :H2_OPEN,      (L_RETURNS..., :EOS), false),
+    OCProto(:H3,              :H3_OPEN,      (L_RETURNS..., :EOS), false),
+    OCProto(:H4,              :H4_OPEN,      (L_RETURNS..., :EOS), false),
+    OCProto(:H5,              :H5_OPEN,      (L_RETURNS..., :EOS), false),
+    OCProto(:H6,              :H6_OPEN,      (L_RETURNS..., :EOS), false),
     # ------------------------------------------------------------------
-    OCProto(:MD_DEF,          :MD_DEF_OPEN,  (:LINE_RETURN, :EOS), false), # see [^4]
+    OCProto(:MD_DEF,          :MD_DEF_OPEN,  (L_RETURNS..., :EOS), false), # see [^4]
     OCProto(:LXB,             :LXB_OPEN,     (:LXB_CLOSE,),        true ),
     OCProto(:DIV,             :DIV_OPEN,     (:DIV_CLOSE,),        true ),
     ]
diff --git a/src/parser/ocblocks.jl b/src/parser/ocblocks.jl
index d295f5ec8..be2131825 100644
--- a/src/parser/ocblocks.jl
+++ b/src/parser/ocblocks.jl
@@ -111,3 +111,47 @@ function merge_blocks(lvb::Vector{<:AbstractBlock}...)
     sort!(blocks, by=(β->from(β)))
     return blocks
 end
+
+
+"""
+$(SIGNATURES)
+
+Find indented lines.
+"""
+function find_indented_blocks(tokens::Vector{Token}, st::String)::Vector{Token}
+    # index of the line return tokens
+    lr_idx = [j for j in eachindex(tokens) if tokens[j].name == :LINE_RETURN]
+    # go over all line return tokens; if they are followed by either four spaces
+    # or by a tab, then check if the line is empty or looks like a list, otherwise
+    # change the token for a LR_INDENT token which will be captured as part of code
+    # blocks.
+    for i in 1:length(lr_idx)-1
+        # capture start and finish of the line (from line return to line return)
+        start  = from(tokens[lr_idx[i]])  # first :LINE_RETURN
+        finish = from(tokens[lr_idx[i+1]]) # next :LINE_RETURN
+        line   = subs(st, start, finish)
+        indent = ""
+        if startswith(line, "\n    ")
+            indent = "    "
+        elseif startswith(line, "\n\t")
+            indent = "\t"
+        else
+            continue
+        end
+        # is there something on that line? if so, does it start with a list indicator
+        # like `*`, `-`, `+` or [0-9](.|\)) ? in which case this takes precedence (commonmark)
+        # TODO: document clearly that with fenced code blocks there are far fewer cases for issues
+        code_line = subs(st, nextind(st, start+length(indent)), prevind(st, finish))
+        scl       = strip(code_line)
+        isempty(scl) && continue
+        # list takes precedence (this may cause clash but then just use fenced code blocks...)
+        looks_like_a_list = scl[1] ∈ ('*', '-', '+') ||
+                            (length(scl) ≥ 2 &&
+                                scl[1] ∈ ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') &&
+                                scl[2] ∈ ('.', ')'))
+        looks_like_a_list && continue
+        # if here, it looks like a code line (and will be considered as such)
+        tokens[lr_idx[i]] = Token(:LR_INDENT, subs(st, start, start+length(indent)))
+    end
+    return tokens
+end
diff --git a/test/converter/eval.jl b/test/converter/eval.jl
index 352adfc4b..68d4c28f5 100644
--- a/test/converter/eval.jl
+++ b/test/converter/eval.jl
@@ -1,4 +1,4 @@
-@testset "Eval code" begin
+@testset "Evalcode" begin
     # see `converter/md_blocks:convert_code_block`
     # see `converter/lx/resolve_lx_input_*`
     # --------------------------------------------
@@ -27,7 +27,7 @@
     @test occursin("then: <pre><code>25</code></pre> done.", h)
 end
 
-@testset "Eval code (errs)" begin
+@testset "Eval (errs)" begin
     # see `converter/md_blocks:convert_code_block`
     # --------------------------------------------
     h = raw"""
@@ -42,7 +42,7 @@ end
     @test occursin("code: <pre><code class=\"language-python\">a = 5\nprint(a**2)\n</code></pre> done.", h)
 end
 
-@testset "Eval (rel-input)" begin
+@testset "Eval (rinput)" begin
     h = raw"""
         Simple code:
         ```julia:/scripts/test2
@@ -92,7 +92,7 @@ end
     @test occursin("then: <pre><code>25</code></pre> done.", h)
 end
 
-@testset "Eval code (module)" begin
+@testset "Eval (module)" begin
     h = raw"""
         Simple code:
         ```julia:scripts/test1
@@ -108,7 +108,7 @@ end
     @test occursin("then: <pre><code>54</code></pre> done.", h)
 end
 
-@testset "Eval code (img)" begin
+@testset "Eval (img)" begin
     h = raw"""
         Simple code:
         ```julia:scripts/test1
@@ -121,7 +121,7 @@ end
     @test occursin("then: <img src=\"/assets/scripts/output/test1.png\" alt=\"\"> done.", h)
 end
 
-@testset "Eval code (exception)" begin
+@testset "Eval (throw)" begin
     h = raw"""
         Simple code:
         ```julia:scripts/test1
@@ -135,7 +135,7 @@ end
     @test occursin("then: <pre><code>There was an error running the code: DomainError", h)
 end
 
-@testset "Eval code (no-julia)" begin
+@testset "Eval (nojl)" begin
     h = raw"""
         Simple code:
         ```python:scripts/test1
@@ -144,7 +144,7 @@ end
         done.
         """ * J.EOS
 
-    @test (@test_logs (:warn, "Eval of non-julia code blocks is not supported at the moment") h |> seval) == "<p>Simple code: <pre><code class=\"language-python\">sqrt(-1)\n</code></pre> done.</p>\n"
+    @test (@test_logs (:warn, "Eval of non-julia code blocks is not yet supported.") h |> seval) == "<p>Simple code: <pre><code class=\"language-python\">sqrt(-1)\n</code></pre> done.</p>\n"
 end
 
 # temporary fix for 186: make error appear and also use `abspath` in internal include
diff --git a/test/converter/lx_simple.jl b/test/converter/lx_simple.jl
index 2e4a6b35e..2d9ec3031 100644
--- a/test/converter/lx_simple.jl
+++ b/test/converter/lx_simple.jl
@@ -50,7 +50,10 @@ end
             """)
 end
 
-@testset "table: source with header" begin
+@testset "table" begin
+    #
+    # has header in source
+    #
     testcsv = "h1,h2,h3\nstring1, 1.567, 0\n,,\n l i n e ,.158,99999999"
     write(joinpath(J.PATHS[:assets], "testcsv.csv"), testcsv)
     # no header specified
@@ -87,9 +90,11 @@ end
     shouldbe = """<p>A table: <p><span style=\"color:red;\">// header size (2) and number of columns (3) do not match //</span></p>
             Done.</p>"""
     @test isapproxstr(h, shouldbe)
-end
 
-@testset "table: source without header" begin
+    #
+    # does not have header in source
+    #
+
     testcsv = "string1, 1.567, 0\n,,\n l i n e ,.158,99999999"
     write(joinpath(J.PATHS[:assets], "testcsv.csv"), testcsv)
     # no header specified
diff --git a/test/converter/markdown2.jl b/test/converter/markdown2.jl
index 8bcac9712..fc4e9f701 100644
--- a/test/converter/markdown2.jl
+++ b/test/converter/markdown2.jl
@@ -5,7 +5,7 @@ function inter(st::String)
     return steps[:inter_md].inter_md, steps[:inter_html].inter_html
 end
 
-@testset "Code+italic (#163)" begin
+@testset "issue163" begin
     st = raw"""A _B `C` D_ E""" * J.EOS
     imd, ih = inter(st)
     @test imd == "A _B  ##JDINSERT##  D_ E"
diff --git a/test/converter/markdown3.jl b/test/converter/markdown3.jl
index 48a0f3aaf..95573867d 100644
--- a/test/converter/markdown3.jl
+++ b/test/converter/markdown3.jl
@@ -227,3 +227,62 @@ end
                         D
                       </p>""")
 end
+
+
+@testset "IndCode" begin # issue 207
+    st = raw"""
+        A
+            a = 1+1
+            if a > 1
+                @show a
+            end
+            b = 2
+            @show a+b
+        end
+        """ * J.EOS
+    @test isapproxstr(st |> seval, raw"""
+                        <p>
+                        A
+                        <pre><code class="language-julia">
+                        a = 1+1
+                        if a > 1
+                            @show a
+                        end
+                        b = 2
+                        @show a+b
+                        </code></pre>
+                        end
+                        </p>
+                        """)
+
+    st = raw"""
+        A `single` and ```python blah``` and
+            a = 1+1
+        then
+        * blah
+            + blih
+            + bloh
+        end
+        """ * J.EOS
+    @test isapproxstr(st |> seval, raw"""
+                        <p>
+                        A <code>single</code> and
+                        <pre><code class="language-python">
+                        blah
+                        </code></pre>
+                        and
+                        <pre><code class="language-julia">
+                        a = 1+1
+                        </code></pre>
+                        then</p>
+                        <ul>
+                          <li><p>blah</p>
+                            <ul>
+                              <li><p>blih</p></li>
+                              <li><p>bloh</p></li>
+                            </ul>
+                          </li>
+                        </ul>
+                        <p>end</p>
+                        """)
+end
diff --git a/test/global/postprocess.jl b/test/global/postprocess.jl
index 7ae644c7b..055a15242 100644
--- a/test/global/postprocess.jl
+++ b/test/global/postprocess.jl
@@ -1,4 +1,4 @@
-@testset "Generation and optimisation" begin
+@testset "Gen&Opt" begin
     isdir("basic") && rm("basic", recursive=true, force=true)
     newsite("basic")
 
diff --git a/test/manager/utils.jl b/test/manager/utils.jl
index 479a6aad6..028885d10 100644
--- a/test/manager/utils.jl
+++ b/test/manager/utils.jl
@@ -15,7 +15,7 @@ write(temp_css, "some css")
 
 JuDoc.process_config()
 
-@testset "Prep outdir" begin # ✅ aug 15, 2018
+@testset "Prep outdir" begin
     JuDoc.prepare_output_dir()
     @test isdir(JuDoc.PATHS[:pub])
     @test isdir(JuDoc.PATHS[:css])
@@ -29,7 +29,7 @@ JuDoc.process_config()
     @test !isfile(temp_out)
 end
 
-@testset "Scan dir" begin # ✅ aug 16, 2018
+@testset "Scan dir" begin
     println("🐝 Testing file tracking...:")
     # it also tests add_if_new_file and last
     md_files = Dict{Pair{String, String}, Float64}()
@@ -44,7 +44,7 @@ end
     @test other_files[JuDoc.PATHS[:src_pages]=>"temp.rnd"] == mtime(temp_rnd)
 end
 
-@testset "Config+write" begin # ✅ 4 Sept, 2018
+@testset "Config+write" begin
     JuDoc.process_config()
     @test JuDoc.GLOBAL_PAGE_VARS["author"].first == "Stefan Zweig"
     rm(temp_config)
diff --git a/test/test_utils.jl b/test/test_utils.jl
index c4f710b60..2f4b93277 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -27,8 +27,8 @@ end
 function seval(st)
     J.def_GLOBAL_PAGE_VARS!()
     J.def_GLOBAL_LXDEFS!()
-    m, _ = J.convert_md(st, collect(values(J.GLOBAL_LXDEFS)))
-    h = J.convert_html(m, J.PageVars())
+    m, v = J.convert_md(st, collect(values(J.GLOBAL_LXDEFS)))
+    h = J.convert_html(m, v)
     return h
 end