-
Notifications
You must be signed in to change notification settings - Fork 393
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(lua): implemented a parser combinator
* Added PoC parser for bib files * Added initial utils for timing in Lua
- Loading branch information
Showing
11 changed files
with
1,126 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,150 @@ | |
-- Email: [email protected] | ||
-- | ||
|
||
local pc = require "vimtex.parser.combinators" | ||
local g = require "vimtex.parser.general" | ||
|
||
---@class BibReference | ||
---@field type string | ||
---@field key string | ||
---@field source_lnum integer | ||
---@field source_file string | ||
---@field unparsed_content string? | ||
|
||
--------------------------------- | ||
-- Parser generator elements here | ||
--------------------------------- | ||
|
||
---@type string | ||
local FILE | ||
|
||
local identifier = g.alnum | ||
.. pc.many_flat(pc.shift:filter(function(result) | ||
local b = string.byte(result) | ||
return (b >= 45 and b <= 58) | ||
or (b >= 65 and b <= 90) | ||
or (b == 95) | ||
or (b >= 97 and b <= 122) | ||
end, "identifier: did not match")) | ||
|
||
local value_quoted_single = | ||
pc.between(g.dq, g.dq)(pc.many_flat(pc.choice { g.dq_escaped, g.not_dq })) | ||
|
||
local value_quoted = pc.separated_by1( | ||
pc.sequence { g.whitespaces_maybe, g.char "#", g.whitespaces_maybe } | ||
)(pc.choice { | ||
value_quoted_single, | ||
identifier:map(function(result) | ||
return "##" .. result .. "##" | ||
end), | ||
}):map(table.concat) | ||
|
||
local value_braced_inc | ||
value_braced_inc = pc.sequence_flat { | ||
g.lb, | ||
pc.many_flat(pc.choice { | ||
g.nb, | ||
pc.lazy(function() | ||
return value_braced_inc | ||
end), | ||
}), | ||
g.rb, | ||
} | ||
local value_braced_content = pc.many_flat(pc.choice { g.nb, value_braced_inc }) | ||
local value_braced = pc.between(g.lb, g.rb)(value_braced_content) | ||
|
||
local value_parser = pc.choice { value_braced, g.digits, value_quoted } | ||
|
||
local tag_pair = pc.sequence({ | ||
g.whitespaces_maybe, | ||
identifier, | ||
g.whitespaces_maybe, | ||
g.eq, | ||
g.whitespaces_maybe, | ||
value_parser, | ||
}):map(function(result) | ||
return { result[2], result[6] } | ||
end) | ||
|
||
local entry = pc.sequence({ | ||
pc.between(g.at, g.whitespaces_maybe)(g.letters), | ||
pc.between(g.lb, g.rb)( | ||
pc.sequence { | ||
pc.line_number, | ||
g.whitespaces_maybe, | ||
pc.left({ identifier, g.whitespaces_maybe, g.comma }):maybe "", | ||
pc.separated_by(g.whitespaces_maybe + g.comma)(tag_pair), | ||
pc.right { g.whitespaces_maybe, value_braced_content }, | ||
} | ||
), | ||
}):map(function(results) | ||
local type = results[1] | ||
local lnum = results[2][1] | ||
local key = results[2][3] | ||
local tag_pairs = results[2][4] | ||
local unparsed_content = results[2][5]:gsub("^%s*", ""):gsub("%s*$", "") | ||
|
||
local tag_pairs_parsed = {} | ||
for _, pair in ipairs(tag_pairs) do | ||
tag_pairs_parsed[pair[1]] = pair[2] | ||
end | ||
|
||
local bibref = { | ||
type = type, | ||
source_file = FILE, | ||
source_lnum = lnum, | ||
key = #key > 0 and key or nil, | ||
unparsed_content = #unparsed_content > 0 and unparsed_content or nil, | ||
} | ||
|
||
return vim.tbl_extend("keep", bibref, tag_pairs_parsed) | ||
end) | ||
|
||
local comment = pc.sequence({ | ||
g.char "%", | ||
pc.many_flat(g.not_nl), | ||
}):ignore() | ||
|
||
local parser = pc.many1(pc.right { | ||
g.whitespaces_maybe, | ||
pc.choice { entry, comment }, | ||
}):map(function(results) | ||
local string_map = {} | ||
for _, bibstr in | ||
ipairs(vim.tbl_filter(function(e) | ||
return e.type == "string" | ||
end, results)) | ||
do | ||
for key, value in pairs(bibstr) do | ||
if key ~= "type" then | ||
string_map[key] = value | ||
end | ||
end | ||
end | ||
|
||
---@type BibReference[] | ||
local references = vim.tbl_filter(function(e) | ||
return e.type ~= "string" and e.type ~= "comment" and e.type ~= "preamble" | ||
end, results) | ||
|
||
for string_name, string_value in pairs(string_map) do | ||
for _, reference in ipairs(references) do | ||
for name, value in pairs(reference) do | ||
if type(value) == "string" then | ||
reference[name] = | ||
value:gsub("##" .. vim.pesc(string_name) .. "##", string_value) | ||
end | ||
end | ||
end | ||
end | ||
|
||
return references | ||
end) | ||
|
||
--------------------------------- | ||
-- Manual parser elements here | ||
--------------------------------- | ||
|
||
---Get the index for an end of pattern match or -1 | ||
---@param string string | ||
---@param pattern string | ||
|
@@ -147,7 +291,7 @@ end | |
---Parse an item | ||
---@param item table | ||
---@param strings table<string, string> | ||
---@return nil | ||
---@return BibReference? | ||
local function parse_item(item, strings) | ||
local key, body = item.body:match "^([^,%s]+)%s*,%s*(.*)" | ||
if not key then | ||
|
@@ -196,34 +340,28 @@ local function parse_string(raw_string) | |
return "", "" | ||
end | ||
|
||
---Read content of file filename | ||
---@param filename string | ||
---@return string[] | ||
local function readfile(filename) | ||
local f = assert(io.open(filename, "r")) | ||
local lines = vim.split(f:read "*a", "\n") | ||
f:close() | ||
|
||
return lines | ||
end | ||
|
||
local M = {} | ||
|
||
---Parse the specified bibtex file | ||
---The parser adheres to the format description found here: | ||
---http://www.bibtex.org/Format/ | ||
---@param filename string | ||
---@return table[] | ||
M.parse = function(filename) | ||
---@return BibReference[] | ||
function M.parse(filename) | ||
if not vim.fn.filereadable(filename) then | ||
return {} | ||
end | ||
|
||
local f = assert(io.open(filename, "r")) | ||
local lines = vim.split(f:read "*a", "\n") | ||
f:close() | ||
|
||
local items = {} | ||
local strings = {} | ||
|
||
local item = {} | ||
local key, value | ||
for lnum, line in ipairs(readfile(filename)) do | ||
for lnum, line in ipairs(lines) do | ||
if vim.tbl_isempty(item) then | ||
item = parse_head(filename, lnum, line) | ||
else | ||
|
@@ -250,4 +388,33 @@ M.parse = function(filename) | |
return result | ||
end | ||
|
||
---Parse bib entries from an input string | ||
---@param input_string string | ||
---@return BibReference[] | ||
function M.pc_parse_string(input_string) | ||
FILE = "__string__" | ||
local parsed = parser:run(input_string) | ||
if parsed.error then | ||
return {} | ||
else | ||
return parsed.result | ||
end | ||
end | ||
|
||
---Parse bib entries from a specified file | ||
---@param filename string | ||
---@return BibReference[] | ||
function M.pc_parse_file(filename) | ||
FILE = filename | ||
local file = assert(io.open(filename, "r"), "Could not read file") | ||
local parsed = parser:run(file:read "*a") | ||
file:close() | ||
|
||
if parsed.error then | ||
return {} | ||
else | ||
return parsed.result | ||
end | ||
end | ||
|
||
return M |
Oops, something went wrong.