-- Copyright 2016 Jeffrey Kegler
-- Permission is hereby granted, free of charge, to any person obtaining a
-- copy of this software and associated documentation files (the "Software"),
-- to deal in the Software without restriction, including without limitation
-- the rights to use, copy, modify, merge, publish, distribute, sublicense,
-- and/or sell copies of the Software, and to permit persons to whom the
-- Software is furnished to do so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included
-- in all copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-- THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-- ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-- OTHER DEALINGS IN THE SOFTWARE.
--
-- [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
-- The following is the description of the intended full argument syntax,
-- almost, all of which is *not* implement at present. Arguments
-- have the form `key=value`, where key and value have no
-- internal spaces. Allowed keys include
-- `in` -- where the value is an input file.
-- `out:section` -- where `section` is the name of a section
-- and the value is an output file.
--
-- If an argument does not contain an equal sign (`=`), the key `in`
-- is assumed and the argument is considered to be "abbreviated". All
-- arguments after the first abbreviated argument are treated as abbreviated.
-- If an abbreviated argument contains an equal sign, so that it is
-- `section=file_name`, it is treated as if it was `out:section=file_name`.
--
-- The portion now implemented allows only "abbreviated" arguments,
-- and only one input argument.
require 'inspect' -- delete after development
local file_ids = {}
local outputs = {}
local inputs = {}
for arg_ix = 1,#arg do
local this_arg = arg[arg_ix]
if not this_arg:find("=") then
inputs[#inputs+1] = this_arg
goto NEXT_ARG
end
local section, file_name = this_arg:match("^([^=]+)%=(.*)") -- no space around =
if not section or not file_name
or #section < 1 or #file_name < 1 then
error("Bad option: " .. this_arg)
end
outputs[section] = file_name
::NEXT_ARG::
end
if #inputs ~= 1 then
error("There must be exactly one input file")
end
local sections = {}
local input_handle
input_handle, error_message = io.open(inputs[1])
if not input_handle then error(error_message) end
-- We need random access to the input, so we incur the
-- cost of reading the entire input into memory.
local lines = {}
for line in input_handle:lines() do
if line:find('\t') then error("line contains tab: " .. line) end
if line:find(' $') then error("line has trailing space: " .. line) end
lines[#lines+1] = line
end
local function problem_in_line(line_no)
return string.format("Problem in line %d: %s\n", line_no, lines[line_no])
end
-- Morphemes are the smallest meaningful pieces. They
-- are instructions (which may be multi-line) and, otherwise,
-- individual lines.
local function instruction_iter()
return coroutine.wrap(
function()
local line_no = 0
while true do
line_no = line_no+1
local line = lines[line_no]
if not line then return end
local prefix, equals = line:match('^(%s*)[-][-]%[(=)%[ *miranda:')
if prefix then
if prefix:find('[^ ]') then
error(string.format(
'Problem in line %d: %s\n Prefix must be all spaces',
line_no, line)
)
end
local first_line_no = line_no
while line_no do
local line = lines[line_no]
if not line:sub(1, #prefix) ~= prefix then
error(string.format(
'Problem in line %d: %s\n'
.. ' Lines does not have prefix of %d spaces\n'
.. ' All lines of a long instruction must have its prefix\n',
line_no, line, #prefix
)
)
end
if line:match('%]' .. equals .. '%]') then
local instruction = parse_instruction(first_line_no, line_no)
coroutine.yield(instruction)
goto NEXT_LINE
end
line_no = line_no+1
end
end
prefix = line:find('^(%s*)[-][-] *miranda:')
if prefix then
local instruction = parse_instruction(line_no)
coroutine.yield(instruction)
goto NEXT_LINE
end
::NEXT_LINE::
end
end
)
end
function parse_section_name(raw_name)
local text = raw_name:lower():gsub(' $', '', 1):gsub('^ ', '', 1):gsub('[^%w_]+', ' ')
return text
end
function parse_language_name(raw_name)
local text = raw_name:lower():gsub(' $', '', 1):gsub('^ ', '', 1):gsub('[^%w%p_]+', ' ')
return text
end
local mt_instruction = {
last_line = function(instruction) return instruction[2] or instruction[1] end,
first_line = function(instruction) return instruction[1] end
}
mt_instruction.__index = mt_instruction
-- Rewrite non-alphanumerics to single space; remove initial space;
-- remove final space and lowercase the string
function parse_instruction(first_line_no, last_line_no)
local raw_instruction = table.concat(lines, '', first_line_no, last_line_no)
local prefix, body
if last_line_no then
local raw_instruction = table.concat(lines, '', first_line_no, last_line_no)
local equals
prefix, equals, body = raw_instruction.match('^( *)--(%[(=*)%[)? *miranda: *(.*)$')
body = body:gsub('%]' .. equals .. '%]$', '', 1)
else
local raw_instruction = lines[first_line_no]
prefix, body = raw_instruction:match('^( *)-- *miranda: *(.*)$')
end
-- print('raw_instruction', first_line_no, last_line_no, raw_instruction)
local words = {}
for word in body:gmatch('[%w%p]+') do
words[#words+1] = word
end
-- print('words', inspect(words))
local instruction = { first_line_no, last_line_no, prefix = #prefix }
setmetatable(instruction, mt_instruction)
if words[1] == 'section' then
instruction.type = words[1]
instruction.name = parse_section_name(table.concat(words, ' ', 2))
return instruction
end
if words[1] == 'section+' then
instruction.type = words[1]
instruction.name = parse_section_name(table.concat(words, ' ', 2))
return instruction
end
if words[1] == 'insert' then
instruction.type = words[1]
instruction.name = parse_section_name(table.concat(words, ' ', 2))
return instruction
end
if words[1] == 'end' and words[2] == 'section' then
instruction.type = table.concat(words, ' ', 1, 2)
return instruction
end
if words[1] == 'language' then
instruction.type = words[1]
instruction.name = parse_language_name(table.concat(words, ' ', 2))
local language = instruction.name
if language ~= 'c' and language ~= 'lua' then
error(string.format(
'Problem in "language" instruction starting at line %d\n'
.. ' Unknown language specified: "%s"\n',
first_line_no, language))
end
return instruction
end
error(string.format(
'Cannot parse instruction starting at line %d\n'
.. ' Instruction begins: %s',
first_line_no, lines[first_line_no]
))
end
local instructions = {}
for instruction in instruction_iter() do
-- print(inspect(instruction))
instructions[#instructions+1] = instruction
end
local function next_run_find(instruction_ix)
-- print('instruction_ix', instruction_ix)
while true do
local instruction = instructions[instruction_ix]
if not instruction then return nil end
local itype = instruction.type
-- print('itype=', itype)
if itype == 'section' then
local run_name = instruction.name
local section = sections[run_name]
if not section then
sections[run_name] = {}
return instruction_ix
end
error(problem_in_line(instruction:first_line())
.. " 'section' command, but section already exists\n"
.. " A 'section' command must start a new section\n"
)
end
if itype == 'section+' then
local run_name = instruction.name
local section = sections[run_name]
if section then return instruction_ix end
error(problem_in_line(instruction:first_line())
.. " 'section+' command, but section does not exist\n"
.. " A 'section+' command must continue an existing section\n"
)
end
-- Currently no instruction are allowed outside a run -- they
-- must either start a run or occur within one.
-- print('error instruction', inspect(instruction))
error(problem_in_line(instruction:first_line())
.. string.format(" '%s' command is only allowed inside a run\n",
itype)
)
end
return nil
end
-- A "run" is a series of consecutive lines which end up in a
-- section. A section consists 1 or more runs.
local function run_iter()
return coroutine.wrap(
function()
local run_prefix = nil
local current_line_no = nil
local start_instruction_ix = 1
-- while we can find start instructions
while true do
-- print('run_iter: start_instruction_ix=', start_instruction_ix)
start_instruction_ix = next_run_find(start_instruction_ix)
if not start_instruction_ix then return nil end
local start_instruction = instructions[start_instruction_ix]
local run_prefix = start_instruction.prefix
local current_instruction_ix = start_instruction_ix
while true do
local current_instruction = instructions[current_instruction_ix]
local next_instruction = instructions[current_instruction_ix+1]
local first_text_line = current_instruction:last_line() + 1
local last_text_line = next_instruction and next_instruction:first_line()-1 or #lines
local end_of_run = nil
for text_line_no = first_text_line, last_text_line do
local line = lines[text_line_no]
-- print('text line:', line)
if #line > 0 and line:sub(1, run_prefix) ~= string.rep(' ', run_prefix) then
end_of_run = text_line_no - 1
break
end
end
if end_of_run then
coroutine.yield(start_instruction_ix, end_of_run)
start_instruction_ix = current_instruction_ix + 1
goto NEXT_RUN
end
if not next_instruction then
coroutine.yield(start_instruction_ix, #lines)
goto NEXT_RUN
end
local next_instruction_line_no = next_instruction:first_line()
local next_instruction_first_line = lines[next_instruction_line_no]
if next_instruction_first_line:sub(1, run_prefix) ~= string.rep(' ', run_prefix) then
coroutine.yield(start_instruction_ix, next_instruction_line_no-1)
start_instruction_ix = current_instruction_ix + 1
goto NEXT_RUN
end
if next_instruction.type == "end section" then
coroutine.yield(start_instruction_ix, next_instruction:last_line())
start_instruction_ix = current_instruction_ix + 2
goto NEXT_RUN
end
if next_instruction.type == "section"
or next_instruction.type == "section+"
then
coroutine.yield(start_instruction_ix, next_instruction_line_no-1)
start_instruction_ix = current_instruction_ix + 1
goto NEXT_RUN
end
current_instruction_ix = current_instruction_ix + 1
end
start_instruction_ix = current_instruction_ix
::NEXT_RUN::
end
end
)
end
local Run = {}
function Run:new(o)
local o = o or {}
setmetatable(o, self)
self.__index = self
return o
end
function Run:first_instruction_ix()
return self[1]
end
function Run:last_line_no()
return self[2]
end
for start_instruction_ix, end_line in run_iter() do
local instruction = instructions[start_instruction_ix]
local section = sections[instruction.name]
section[#section+1] = Run:new{ start_instruction_ix, end_line }
-- print(inspect(instruction), end_line)
end
-- Returns an iterator of the instructions in a section.
-- The optional second arg is the last line of the text run
-- following the instruction
local function section_instructions(section)
return coroutine.wrap(
function()
for _, run in ipairs(section) do
-- print('Starting run')
local instruction_ix = run:first_instruction_ix()
local instruction = instructions[instruction_ix]
local instruction_last_line_no = instruction:last_line()
local run_last_line_no = run:last_line_no()
-- run prefix is prefix of 1st instruction in run
local run_prefix = instruction.prefix
-- For every "subrun" of this run.
-- A subrun consists of one instruction,
-- followed by zero or more non-instruction ("text") lines.
while instruction_last_line_no <= run_last_line_no do
if instruction_last_line_no == run_last_line_no then
coroutine.yield(instruction, run_prefix, run_last_line_no)
goto NEXT_RUN
end
local next_instruction = instructions[instruction_ix+1]
if not next_instruction then
coroutine.yield(instruction, run_prefix, run_last_line_no)
goto NEXT_RUN
end
local next_instruction_first_line_no = next_instruction:first_line()
local pre_next_instruction_line_no = next_instruction_first_line_no - 1
if pre_next_instruction_line_no >= run_last_line_no then
coroutine.yield(instruction, run_prefix, run_last_line_no)
goto NEXT_RUN
end
coroutine.yield(instruction, run_prefix, pre_next_instruction_line_no)
-- Set up the next instruction
instruction = next_instruction
instruction_last_line_no = next_instruction:last_line()
instruction_ix = instruction_ix + 1
-- print('Ending subrun')
end
::NEXT_RUN::
-- print('Ending run')
end
end
)
end
local section_on_stack = {}
-- Figure out per-section values
for section_name,section in pairs(sections) do
local language = nil
local last_language_instruction = nil
for instruction, run_prefix, last_text_line_no in section_instructions(section) do
if instruction.type == 'language' then
language = instruction.name
if last_language_instruction
and last_language_instruction.name ~= language
then
local instruction_line_no_1 = last_language_instruction:first_line()
local instruction_line_no_2 = instruction:first_line()
error(
string.format('Inconsistent language instructions for section "%s"',
section_name)
.. string.format(' First instruction, line %d: %s\n',
instruction_line_no_1, lines[instruction_line_no_1])
.. string.format(' Second instruction, line %d: %s\n',
instruction_line_no_2, lines[instruction_line_no_2])
)
end
section.language = language
last_language_instruction = instruction
end
end
end
function section_output(fh, language_arg, name, prefix_arg)
if section_on_stack[name] then
error(string.format('Section "%s" used inside itself', name)
.. ' This would cause an infinite recursion and is not allowed\n'
)
end
section_on_stack[name] = true
-- print('sections on stack:', inspect(section_on_stack))
local section = sections[name]
local language = section.language or language_arg
local current_prefix = prefix_arg or 0
if not section then
error(string.format('Attempt to output missing section "%s"', name))
end
for instruction, run_prefix, last_text_line_no in section_instructions(section) do
-- We first deal with outputing the instruction.
local instruction_last_line_no = instruction:last_line()
if instruction.type == 'insert' then
local insertion_prefix = current_prefix + (instruction.prefix - run_prefix)
-- print(string.format("section_output: %s %d", instruction.name, instruction:first_line()))
section_output(fh, language, instruction.name, insertion_prefix)
else
local instruction_first_line_no = instruction:first_line()
local raw_instruction = {}
for line_no = instruction_first_line_no, instruction_last_line_no do
raw_instruction[#raw_instruction+1] = string.rep(' ', current_prefix)
raw_instruction[#raw_instruction+1] = lines[line_no]:sub(run_prefix+1)
raw_instruction[#raw_instruction+1] = '\n'
end
local raw_instruction = table.concat(raw_instruction, '')
if language == 'lua' then
fh:write(raw_instruction)
elseif language == 'c' then
local long_bracket_start, long_bracket_end = raw_instruction:match('^ *()[-][-]%[=*%[()')
if long_bracket_start then
fh:write(raw_instruction:sub(1, long_bracket_start-1))
fh:write('/*')
raw_instruction:gsub('%]=*%]$', '*/', 1)
fh:write(raw_instruction:sub(long_bracket_end))
else
raw_instruction = raw_instruction:gsub('[-][-]', '/*', 1)
raw_instruction = raw_instruction:gsub(' ?\n$', ' */\n', 1)
fh:write(raw_instruction)
end
else
error(string.format('Bad language ("%s") for section "%s"', language, name))
end
end
for line_no = instruction_last_line_no+1, last_text_line_no do
fh:write(string.rep(' ', current_prefix) .. lines[line_no]:sub(run_prefix+1), '\n')
end
-- print('Ending run')
end
section_on_stack[name] = nil
end
for section_name, filename in pairs(outputs) do
print('section_name,filename ', section_name,filename)
local handle, error_message = io.open(filename, 'w')
if not handle then error(error_message) end
section_output(handle, 'lua', section_name)
end
-- vim: expandtab shiftwidth=4: