I am writing a Lua filter for pandoc that adds a glossary function to HTML output of a markdown file. The goal is to add mouseover text to each occurrence of an acronym or key definition in the document.
I would like to be able to include acronyms when they occur in a list (surrounded by punctuation), but not by letters (e.g. so CO isn't highlighted in a word such as cobalt).
My MWE fails on this count because strings in the Pandoc AST include adjacent punctuation (e.g. Str "CO/DBP/SBP,"
or Str "CO,",Space,Str "SBP,"
).
-- # MWE
-- Parse glossary file (summarised here for brevity)
local glossary = {CO = "Cardiac Output", DBP = "Diastolic Blood Pressure", SBP = "Systolic Blood Pressure"}
-- Substitute glossary term for span with a mouseover link
function Str(elem)
for key, value in next, glossary do
if elem.text == key then
return pandoc.Span (key, {title = value, class = "glossary"})
end
end
end
I have had a play with string.sub
and string.find
but wasn`t able to get anything workable, chiefly because I wasn't sure how to go about returning both the new Span and the Str (minus its new Span). Any help would be appreciated!
My test markdown contains:
# Acronyms: SBP, DBP & CO
Spaced acronyms: CO and SBP and DBP.
In a comma-separated list: CO, SBP, DBP; with backslashes; CO/DBP/SBP, and in bullet points:
* CO
* SBP
* DBP
You can just return a table with multiple elements. My idea was to look for the first separator and then replace the glossary entries with spans:
-- Parse glossary file (summarised here for brevity)
local glossary = {CO = "Cardiac Output", DBP = "Diastolic Blood Pressure", SBP = "Systolic Blood Pressure"}
local Set = function(list)
local set = {}
for i,v in ipairs(list) do
set[v] = true
end
return set
end
local findSeparator = function(text)
local separator = Set{",", "/", " "}
for i = 1, #text do
local s = string.sub(text,i,i)
if separator[s] then
return s
end
end
end
local separatedList = function(text)
local found
local t = {}
local separator = findSeparator(text)
if not separator then return end
for abb in string.gmatch(text, "%P+") do
if glossary[abb] then
found = true
t[#t+1] = pandoc.Span(abb, {title = abb, class = "glossary"})
t[#t+1] = pandoc.Str(separator)
end
end
if found then
-- remove last separator if there are more then one elements in the list
-- because otherwise the seperator is part of the element and needs to stay
if #t > 2 then t[#t] = nil end
return t
end
end
local glossarize = {
Str = function(el)
if glossary[el.text] then
return pandoc.Span(el.text, {title = glossary[el.text], class = "glossary"})
else
return separatedList(el.text)
end
end
}
function Pandoc(doc)
local div = pandoc.Div(doc.blocks)
local blocks = pandoc.walk_block(div, glossarize).content
return pandoc.Pandoc(blocks, doc.meta)
end