From 4eb309a26344fdf3031fff38ecbe24d080b68060 Mon Sep 17 00:00:00 2001
From: NTBBloodbath <bloodbathalchemist@protonmail.com>
Date: Tue, 4 Mar 2025 16:10:26 -0400
Subject: [PATCH] feat: add support for Table of Contents

---
 src/converter/html.rs |  54 +++++++++++++++++---
 src/converter/meta.rs |  17 ++++++-
 src/shared/mod.rs     |  13 +++--
 src/tera_functions.rs | 112 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 181 insertions(+), 15 deletions(-)

diff --git a/src/converter/html.rs b/src/converter/html.rs
index 6e8e707..6e93276 100644
--- a/src/converter/html.rs
+++ b/src/converter/html.rs
@@ -21,6 +21,14 @@ struct CarryOverTag {
     parameters: Vec<String>,
 }
 
+/// ToC entries
+#[derive(Clone, Debug)]
+pub struct TocEntry {
+    level: u16,
+    title: String,
+    id: String,
+}
+
 /// Converts paragraph segment tokens to a String
 fn paragraph_tokens_to_string(tokens: &[ParagraphSegmentToken]) -> String {
     let mut s = String::new();
@@ -275,6 +283,7 @@ trait NorgToHtml {
         strong_carry: Vec<CarryOverTag>,
         weak_carry: Vec<CarryOverTag>,
         root_url: &str,
+        toc: &mut Vec<TocEntry>
     ) -> String;
 }
 
@@ -285,6 +294,7 @@ impl NorgToHtml for NorgAST {
         strong_carry: Vec<CarryOverTag>,
         mut weak_carry: Vec<CarryOverTag>,
         root_url: &str,
+        toc: &mut Vec<TocEntry>,
     ) -> String {
         match self {
             NorgAST::Paragraph(s) => {
@@ -319,13 +329,14 @@ impl NorgToHtml for NorgAST {
 
                 // Regex to remove possible links from heading title ids
                 let re = Regex::new(r"-?<.*>").unwrap();
+                let heading_id = re.replace(&heading_title.replace(" ", "-"), "").to_string();
 
                 match level {
                     1..=6 => {
                         section.push(format!(
                             "<h{} id=\"{}\"",
                             level,
-                            re.replace(&heading_title.replace(" ", "-"), "")
+                            heading_id,
                         ));
                         if !weak_carry.is_empty() {
                             for weak_carryover in weak_carry.clone() {
@@ -341,7 +352,7 @@ impl NorgToHtml for NorgAST {
                     _ => {
                         section.push(format!(
                             "<h6 id=\"{}\"",
-                            re.replace(&heading_title.replace(" ", "-"), "")
+                            heading_id,
                         ));
                         if !weak_carry.is_empty() {
                             for weak_carryover in weak_carry.clone() {
@@ -354,7 +365,14 @@ impl NorgToHtml for NorgAST {
                         section.push(format!(">{}</h6>", heading_title));
                     }
                 }
-                section.push(to_html(content, &strong_carry, &weak_carry, root_url));
+                let entry = TocEntry {
+                    level: *level,
+                    title: heading_title.clone(),
+                    id: heading_id.clone(),
+                };
+                toc.push(entry);
+
+                section.push(to_html(content, &strong_carry, &weak_carry, root_url, toc));
 
                 section.join(" ")
             }
@@ -396,7 +414,7 @@ impl NorgToHtml for NorgAST {
                         list.push("</li>".to_string());
                         if !content.is_empty() {
                             list.push(get_list_tag(modifier_type.clone(), true));
-                            list.push(to_html(content, &strong_carry, &weak_carry, root_url));
+                            list.push(to_html(content, &strong_carry, &weak_carry, root_url, toc));
                             list.push(get_list_tag(modifier_type.clone(), false));
                         }
                         if *level == 1 {
@@ -417,7 +435,7 @@ impl NorgToHtml for NorgAST {
                         }
                         quote.push(format!(">{}", mod_text));
                         if !content.is_empty() {
-                            quote.push(to_html(content, &strong_carry, &weak_carry, root_url));
+                            quote.push(to_html(content, &strong_carry, &weak_carry, root_url, toc));
                         }
                         quote.push("</blockquote>".to_string());
                         quote.join(" ")
@@ -501,6 +519,7 @@ impl NorgToHtml for NorgAST {
                         &strong_carry,
                         &weak_carry,
                         root_url,
+                        toc,
                     )
                 }
                 CarryoverTag::Macro => {
@@ -573,17 +592,36 @@ fn to_html(
     strong_carry: &[CarryOverTag],
     weak_carry: &[CarryOverTag],
     root_url: &str,
+    toc: &mut Vec<TocEntry>,
 ) -> String {
     let mut res = String::new();
     for node in ast {
-        res.push_str(&node.to_html(strong_carry.to_vec(), weak_carry.to_vec(), root_url));
+        res.push_str(&node.to_html(strong_carry.to_vec(), weak_carry.to_vec(), root_url, toc));
     }
 
     res
 }
 
-pub fn convert(document: String, root_url: &str) -> String {
+/// Convert TOC entries to TOML
+pub fn toc_to_toml(toc: &[TocEntry]) -> toml::Value {
+    let mut items = toml::value::Array::new();
+
+    for entry in toc {
+        let mut table = toml::value::Table::new();
+        table.insert("level".into(), toml::Value::Integer(entry.level as i64));
+        table.insert("title".into(), toml::Value::String(entry.title.clone()));
+        table.insert("id".into(), toml::Value::String(entry.id.clone()));
+        items.push(toml::Value::Table(table));
+    }
+
+    toml::Value::Array(items)
+}
+
+pub fn convert(document: String, root_url: &str) -> (String, Vec<TocEntry>) {
     let ast = parse_tree(&document).unwrap();
+    let mut toc = Vec::<TocEntry>::new();
     // We do not have any carryover tag when starting to convert the document
-    to_html(&ast, &[], &[], root_url)
+    let html = to_html(&ast, &[], &[], root_url, &mut toc);
+
+    (html, toc)
 }
diff --git a/src/converter/meta.rs b/src/converter/meta.rs
index 4f63c79..1a48cf8 100644
--- a/src/converter/meta.rs
+++ b/src/converter/meta.rs
@@ -87,12 +87,25 @@ fn extract_meta(input: &str) -> String {
     result.join("\n")
 }
 
+pub fn merge_toc_into_metadata(
+    mut metadata: toml::Value,
+    toc: toml::Value
+) -> toml::Value {
+    if let toml::Value::Table(ref mut table) = metadata {
+        table.insert("toc".to_string(), toc);
+    }
+    metadata
+}
+
 /// Extracts and converts Norg metadata to TOML format
-pub fn convert(document: &str) -> Result<toml::Value, Error> {
+pub fn convert(document: &str, toc: Option<toml::Value>) -> Result<toml::Value, Error> {
     let extracted_meta = extract_meta(document);
     let meta = parse_metadata(&extracted_meta).expect("Failed to parse metadata");
 
-    let toml_value = norg_meta_to_toml(&meta).expect("Failed to convert metadata to TOML");
+    let mut toml_value = norg_meta_to_toml(&meta).expect("Failed to convert metadata to TOML");
+    if let Some(toc) = toc {
+        toml_value = merge_toc_into_metadata(toml_value, toc);
+    }
 
     Ok(toml_value)
 }
diff --git a/src/shared/mod.rs b/src/shared/mod.rs
index d3799a8..2beb738 100644
--- a/src/shared/mod.rs
+++ b/src/shared/mod.rs
@@ -96,10 +96,13 @@ pub async fn convert_document(
 
         // Convert html content
         let norg_document = tokio::fs::read_to_string(file_path).await?;
-        let norg_html = converter::html::convert(norg_document.clone(), root_url);
+        let (norg_html, toc) = converter::html::convert(norg_document.clone(), root_url);
 
         // Convert metadata
-        let norg_meta = converter::meta::convert(&norg_document)?;
+        let norg_meta = converter::meta::convert(
+            &norg_document,
+            Some(converter::html::toc_to_toml(&toc))
+        )?;
         let meta_toml = toml::to_string_pretty(&norg_meta)?;
 
         // Check if the current document is a draft post and also whether we should finish the conversion
@@ -212,12 +215,12 @@ pub async fn init_tera(templates_dir: &str, theme_templates_dir: &Path) -> Resul
             };
         tera.extend(&tera_theme)?;
     }
-    tera.build_inheritance_chains().map_err(|e| {
-        eyre!("Failed to build templates inheritance: {}", e)
-    })?;
+    tera.build_inheritance_chains()
+        .map_err(|e| eyre!("Failed to build templates inheritance: {}", e))?;
 
     // Register functions
     tera.register_function("now", crate::tera_functions::NowFunction);
+    tera.register_function("generate_toc", crate::tera_functions::GenerateToc);
 
     Ok(tera)
 }
diff --git a/src/tera_functions.rs b/src/tera_functions.rs
index 1081894..4693a73 100644
--- a/src/tera_functions.rs
+++ b/src/tera_functions.rs
@@ -19,3 +19,115 @@ impl Function for NowFunction {
         Ok(Value::String(now.format(format).to_string()))
     }
 }
+
+#[derive(Debug, Clone)]
+struct TocNode {
+    level: u8,
+    title: String,
+    id: String,
+    children: Vec<usize>,
+}
+
+#[derive(Debug)]
+struct TocTree {
+    nodes: Vec<TocNode>,
+    root_indices: Vec<usize>,
+}
+
+fn parse_toc(value: &Value) -> Result<TocTree> {
+    let entries = value.as_array().ok_or("TOC must be an array").unwrap();
+    let mut tree = TocTree {
+        nodes: Vec::new(),
+        root_indices: Vec::new(),
+    };
+    let mut stack: Vec<usize> = Vec::new();  // Store indices instead of references
+
+    for entry in entries {
+        let level = entry.get("level")
+            .and_then(|v| v.as_i64())
+            .ok_or("Missing or invalid level").unwrap() as u8;
+
+        let title = entry.get("title")
+            .and_then(|v| v.as_str())
+            .unwrap_or_default()
+            .to_string();
+
+        let id = entry.get("id")
+            .and_then(|v| v.as_str())
+            .unwrap_or_default()
+            .to_string();
+
+        // Find the parent index
+        let parent_idx = stack.iter().rev().find(|&&idx| {
+            tree.nodes[idx].level < level
+        }).copied();
+
+        // Create new node
+        let node_idx = tree.nodes.len();
+        tree.nodes.push(TocNode {
+            level,
+            title,
+            id,
+            children: Vec::new(),
+        });
+
+        // Add to parent or root
+        if let Some(parent_idx) = parent_idx {
+            tree.nodes[parent_idx].children.push(node_idx);
+        } else {
+            tree.root_indices.push(node_idx);
+        }
+
+        // Update stack
+        while stack.last().map(|&idx| tree.nodes[idx].level >= level).unwrap_or(false) {
+            stack.pop();
+        }
+        stack.push(node_idx);
+    }
+
+    Ok(tree)
+}
+
+fn generate_nested_html(tree: &TocTree, list_type: &str) -> String {
+    fn render_node(tree: &TocTree, node_idx: usize, list_type: &str) -> String {
+        let node = &tree.nodes[node_idx];
+
+        let mut html = format!("<li><a href=\"#{}\">{}</a>", node.id, node.title);
+
+        if !node.children.is_empty() {
+            html.push_str(&format!("<{}>", list_type));
+            for &child_idx in &node.children {
+                html.push_str(&render_node(tree, child_idx, list_type));
+            }
+            html.push_str(&format!("</{}>", list_type));
+        }
+
+        html.push_str("</li>");
+        html
+    }
+
+    let mut html = format!("<{}>", list_type);
+    for &root_idx in &tree.root_indices {
+        html.push_str(&render_node(tree, root_idx, list_type));
+    }
+    html.push_str(&format!("</{}>", list_type));
+    html
+}
+
+pub struct GenerateToc;
+impl Function for GenerateToc {
+    fn call(&self, args: &HashMap<String, Value>) -> Result<Value, Error> {
+        let toc = args.get("toc").expect("Missing 'toc' argument");
+        let list_type = args.get("list_type")
+            .and_then(|v| v.as_str())
+            .unwrap_or("ol");
+
+        let nodes = parse_toc(toc).unwrap();
+        let html = generate_nested_html(&nodes, list_type);
+        Ok(Value::String(html))
+    }
+
+    fn is_safe(&self) -> bool {
+        true
+    }
+}