Skip to content

Commit

Permalink
feat(lua): implemented a parser combinator
Browse files Browse the repository at this point in the history
* Added PoC parser for bib files
* Added initial utils for timing in Lua
  • Loading branch information
lervag committed Dec 9, 2024
1 parent 4013b01 commit 3401dc8
Show file tree
Hide file tree
Showing 11 changed files with 1,126 additions and 25 deletions.
12 changes: 8 additions & 4 deletions autoload/vimtex/test.vim
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

function! vimtex#test#finished() abort " {{{1
for l:error in v:errors
let l:match = matchlist(l:error, '\(.*\) line \(\d\+\): \(.*\)')
let l:match = matchlist(l:error, '\v(.{-})( line (\d+))?: (.*)')
let l:file = fnamemodify(l:match[1], ':.')
let l:lnum = l:match[2]
let l:msg = l:match[3]
let l:lnum = l:match[3]
let l:msg = l:match[4]

if l:msg =~# 'Expected .*but got'
call s:print_expected_but_got(l:file, l:lnum, l:msg)
Expand Down Expand Up @@ -94,7 +94,11 @@ endfunction
" }}}1

function! s:print_expected_but_got(file, lnum, msg) abort " {{{1
echo printf("%s:%d\n", a:file, a:lnum)
if !empty(a:lnum)
echo printf("%s:%d\n", a:file, a:lnum)
else
echo printf("%s:\n", a:file)
endif

let l:intro = matchstr(a:msg, '.\{-}\ze\s*\(: \)\?Expected ')
if !empty(l:intro)
Expand Down
197 changes: 182 additions & 15 deletions lua/vimtex/parser/bib.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,150 @@
-- Email: [email protected]
--

local pc = require "vimtex.parser.combinators"
local g = require "vimtex.parser.general"

---@class BibReference
---@field type string
---@field key string
---@field source_lnum integer
---@field source_file string
---@field unparsed_content string?

---------------------------------
-- Parser generator elements here
---------------------------------

---@type string
local FILE

local identifier = g.alnum
.. pc.many_flat(pc.shift:filter(function(result)
local b = string.byte(result)
return (b >= 45 and b <= 58)
or (b >= 65 and b <= 90)
or (b == 95)
or (b >= 97 and b <= 122)
end, "identifier: did not match"))

local value_quoted_single =
pc.between(g.dq, g.dq)(pc.many_flat(pc.choice { g.dq_escaped, g.not_dq }))

local value_quoted = pc.separated_by1(
pc.sequence { g.whitespaces_maybe, g.char "#", g.whitespaces_maybe }
)(pc.choice {
value_quoted_single,
identifier:map(function(result)
return "##" .. result .. "##"
end),
}):map(table.concat)

local value_braced_inc
value_braced_inc = pc.sequence_flat {
g.lb,
pc.many_flat(pc.choice {
g.nb,
pc.lazy(function()
return value_braced_inc
end),
}),
g.rb,
}
local value_braced_content = pc.many_flat(pc.choice { g.nb, value_braced_inc })
local value_braced = pc.between(g.lb, g.rb)(value_braced_content)

local value_parser = pc.choice { value_braced, g.digits, value_quoted }

local tag_pair = pc.sequence({
g.whitespaces_maybe,
identifier,
g.whitespaces_maybe,
g.eq,
g.whitespaces_maybe,
value_parser,
}):map(function(result)
return { result[2], result[6] }
end)

local entry = pc.sequence({
pc.between(g.at, g.whitespaces_maybe)(g.letters),
pc.between(g.lb, g.rb)(
pc.sequence {
pc.line_number,
g.whitespaces_maybe,
pc.left({ identifier, g.whitespaces_maybe, g.comma }):maybe "",
pc.separated_by(g.whitespaces_maybe + g.comma)(tag_pair),
pc.right { g.whitespaces_maybe, value_braced_content },
}
),
}):map(function(results)
local type = results[1]
local lnum = results[2][1]
local key = results[2][3]
local tag_pairs = results[2][4]
local unparsed_content = results[2][5]:gsub("^%s*", ""):gsub("%s*$", "")

local tag_pairs_parsed = {}
for _, pair in ipairs(tag_pairs) do
tag_pairs_parsed[pair[1]] = pair[2]
end

local bibref = {
type = type,
source_file = FILE,
source_lnum = lnum,
key = #key > 0 and key or nil,
unparsed_content = #unparsed_content > 0 and unparsed_content or nil,
}

return vim.tbl_extend("keep", bibref, tag_pairs_parsed)
end)

local comment = pc.sequence({
g.char "%",
pc.many_flat(g.not_nl),
}):ignore()

local parser = pc.many1(pc.right {
g.whitespaces_maybe,
pc.choice { entry, comment },
}):map(function(results)
local string_map = {}
for _, bibstr in
ipairs(vim.tbl_filter(function(e)
return e.type == "string"
end, results))
do
for key, value in pairs(bibstr) do
if key ~= "type" then
string_map[key] = value
end
end
end

---@type BibReference[]
local references = vim.tbl_filter(function(e)
return e.type ~= "string" and e.type ~= "comment" and e.type ~= "preamble"
end, results)

for string_name, string_value in pairs(string_map) do
for _, reference in ipairs(references) do
for name, value in pairs(reference) do
if type(value) == "string" then
reference[name] =
value:gsub("##" .. vim.pesc(string_name) .. "##", string_value)
end
end
end
end

return references
end)

---------------------------------
-- Manual parser elements here
---------------------------------

---Get the index for an end of pattern match or -1
---@param string string
---@param pattern string
Expand Down Expand Up @@ -147,7 +291,7 @@ end
---Parse an item
---@param item table
---@param strings table<string, string>
---@return nil
---@return BibReference?
local function parse_item(item, strings)
local key, body = item.body:match "^([^,%s]+)%s*,%s*(.*)"
if not key then
Expand Down Expand Up @@ -196,34 +340,28 @@ local function parse_string(raw_string)
return "", ""
end

---Read content of file filename
---@param filename string
---@return string[]
local function readfile(filename)
local f = assert(io.open(filename, "r"))
local lines = vim.split(f:read "*a", "\n")
f:close()

return lines
end

local M = {}

---Parse the specified bibtex file
---The parser adheres to the format description found here:
---http://www.bibtex.org/Format/
---@param filename string
---@return table[]
M.parse = function(filename)
---@return BibReference[]
function M.parse(filename)
if not vim.fn.filereadable(filename) then
return {}
end

local f = assert(io.open(filename, "r"))
local lines = vim.split(f:read "*a", "\n")
f:close()

local items = {}
local strings = {}

local item = {}
local key, value
for lnum, line in ipairs(readfile(filename)) do
for lnum, line in ipairs(lines) do
if vim.tbl_isempty(item) then
item = parse_head(filename, lnum, line)
else
Expand All @@ -250,4 +388,33 @@ M.parse = function(filename)
return result
end

---Parse bib entries from an input string
---@param input_string string
---@return BibReference[]
function M.pc_parse_string(input_string)
FILE = "__string__"
local parsed = parser:run(input_string)
if parsed.error then
return {}
else
return parsed.result
end
end

---Parse bib entries from a specified file
---@param filename string
---@return BibReference[]
function M.pc_parse_file(filename)
FILE = filename
local file = assert(io.open(filename, "r"), "Could not read file")
local parsed = parser:run(file:read "*a")
file:close()

if parsed.error then
return {}
else
return parsed.result
end
end

return M
Loading

0 comments on commit 3401dc8

Please sign in to comment.