1188 lines
38 KiB
Elixir
1188 lines
38 KiB
Elixir
defmodule NimbleParsec.Compiler do
|
|
@moduledoc false
|
|
@arity 6
|
|
|
|
@doc """
|
|
Returns a parsec entrypoint named `name`.
|
|
"""
|
|
def entry_point(name) do
|
|
doc = """
|
|
Parses the given `binary` as #{name}.
|
|
|
|
Returns `{:ok, [token], rest, context, position, byte_offset}` or
|
|
`{:error, reason, rest, context, line, byte_offset}` where `position`
|
|
describes the location of the #{name} (start position) as `{line, offset_to_start_of_line}`.
|
|
|
|
To column where the error occurred can be inferred from `byte_offset - offset_to_start_of_line`.
|
|
|
|
## Options
|
|
|
|
* `:byte_offset` - the byte offset for the whole binary, defaults to 0
|
|
* `:line` - the line and the byte offset into that line, defaults to `{1, byte_offset}`
|
|
* `:context` - the initial context value. It will be converted to a map
|
|
"""
|
|
|
|
spec =
|
|
quote do
|
|
unquote(name)(binary, keyword) ::
|
|
{:ok, [term], rest, context, line, byte_offset}
|
|
| {:error, reason, rest, context, line, byte_offset}
|
|
when line: {pos_integer, byte_offset},
|
|
byte_offset: non_neg_integer,
|
|
rest: binary,
|
|
reason: String.t(),
|
|
context: map
|
|
end
|
|
|
|
args = quote(do: [binary, opts \\ []])
|
|
guards = quote(do: is_binary(binary))
|
|
|
|
case =
|
|
quote generated: true do
|
|
case unquote(:"#{name}__0")(binary, [], [], context, line, byte_offset) do
|
|
{:ok, acc, rest, context, line, offset} ->
|
|
{:ok, :lists.reverse(acc), rest, context, line, offset}
|
|
|
|
{:error, _, _, _, _, _} = error ->
|
|
error
|
|
end
|
|
end
|
|
|
|
body =
|
|
quote do
|
|
context = Map.new(Keyword.get(opts, :context, []))
|
|
byte_offset = Keyword.get(opts, :byte_offset, 0)
|
|
|
|
line =
|
|
case Keyword.get(opts, :line, 1) do
|
|
{_, _} = line -> line
|
|
line -> {line, byte_offset}
|
|
end
|
|
|
|
unquote(case)
|
|
end
|
|
|
|
{doc, spec, {name, args, guards, body}}
|
|
end
|
|
|
|
@doc """
|
|
Compiles the given combinators into multiple definitions.
|
|
"""
|
|
def compile(name, [], _opts) do
|
|
raise ArgumentError, "cannot compile #{inspect(name)} with an empty parser combinator"
|
|
end
|
|
|
|
def compile(name, combinators, opts) when is_list(combinators) do
|
|
inline? = Keyword.get(opts, :inline, false)
|
|
{defs, inline} = compile(name, combinators)
|
|
|
|
if inline? do
|
|
{defs, inline}
|
|
else
|
|
{defs, []}
|
|
end
|
|
end
|
|
|
|
defp compile(name, combinators) do
|
|
config = %{
|
|
acc_depth: 0,
|
|
catch_all: nil,
|
|
labels: [],
|
|
name: name,
|
|
replace: false
|
|
}
|
|
|
|
{next, step} = build_next(0, config)
|
|
|
|
{defs, inline, last, _step} =
|
|
combinators
|
|
|> Enum.reverse()
|
|
|> compile([], [], next, step, config)
|
|
|
|
{Enum.reverse([build_ok(last) | defs]), [{last, @arity} | inline]}
|
|
end
|
|
|
|
defp compile([], defs, inline, current, step, _config) do
|
|
{defs, inline, current, step}
|
|
end
|
|
|
|
defp compile([{:update, key, fun} | combinators], defs, inline, current, step, config) do
|
|
compile(combinators, defs, inline, current, step, Map.update!(config, key, fun))
|
|
end
|
|
|
|
defp compile(combinators, defs, inline, current, step, config) do
|
|
{next_combinators, used_combinators, {new_defs, new_inline, next, step, catch_all}} =
|
|
case take_bound_combinators(combinators) do
|
|
{[combinator | combinators], [], [], [], [], _metadata} ->
|
|
case combinator do
|
|
{:label, label_combinators, label} ->
|
|
pre_combinators = [{:update, :labels, &[label | &1]} | label_combinators]
|
|
pos_combinators = [{:update, :labels, &tl(&1)} | combinators]
|
|
|
|
{pre_combinators ++ pos_combinators, [combinator],
|
|
{[], [], current, step, :catch_none}}
|
|
|
|
_ ->
|
|
{combinators, [combinator],
|
|
compile_unbound_combinator(combinator, current, step, config)}
|
|
end
|
|
|
|
{combinators, inputs, guards, outputs, acc, metadata} ->
|
|
{combinators, Enum.reverse(acc),
|
|
compile_bound_combinator(inputs, guards, outputs, metadata, current, step, config)}
|
|
end
|
|
|
|
catch_all_defs =
|
|
case catch_all do
|
|
:catch_all -> [build_catch_all(:positive, current, used_combinators, config)]
|
|
:catch_none -> []
|
|
end
|
|
|
|
defs = catch_all_defs ++ Enum.reverse(new_defs) ++ defs
|
|
compile(next_combinators, defs, new_inline ++ inline, next, step, config)
|
|
end
|
|
|
|
## Unbound combinators
|
|
|
|
defp compile_unbound_combinator({:parsec, parsec}, current, step, config) do
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
|
|
catch_all =
|
|
case config do
|
|
%{catch_all: nil} ->
|
|
quote(do: error)
|
|
|
|
%{catch_all: catch_all, acc_depth: n} ->
|
|
{_, _, _, body} = build_proxy_to(current, catch_all, n)
|
|
body
|
|
end
|
|
|
|
call =
|
|
case parsec do
|
|
{mod, fun} ->
|
|
quote do
|
|
unquote(mod).unquote(:"#{fun}__0")(rest, acc, [], context, line, offset)
|
|
end
|
|
|
|
fun ->
|
|
quote do
|
|
unquote(:"#{fun}__0")(rest, acc, [], context, line, offset)
|
|
end
|
|
end
|
|
|
|
body =
|
|
quote generated: true do
|
|
case unquote(call) do
|
|
{:ok, acc, rest, context, line, offset} ->
|
|
unquote(next)(rest, acc, stack, context, line, offset)
|
|
|
|
{:error, _, _, _, _, _} = error ->
|
|
unquote(catch_all)
|
|
end
|
|
end
|
|
|
|
def = {current, head, true, body}
|
|
{[def], [{current, @arity}], next, step, :catch_none}
|
|
end
|
|
|
|
defp compile_unbound_combinator({:lookahead, combinators, kind}, current, step, config) do
|
|
choices = extract_choices_from_lookahead(combinators)
|
|
|
|
if Enum.all?(choices, &all_bound_combinators?/1) do
|
|
{next, step} = build_next(step, config)
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
success_body = {next, [], args}
|
|
|
|
{_, [_bin | negative_head], _, failure_body} =
|
|
build_catch_all(kind, current, combinators, config)
|
|
|
|
{success_body, failure_body, head} =
|
|
if kind == :positive do
|
|
{success_body, failure_body, quote(do: [acc, stack, context, line, offset])}
|
|
else
|
|
{failure_body, success_body, negative_head}
|
|
end
|
|
|
|
defs =
|
|
for choice <- choices do
|
|
{[], inputs, guards, _, _, metadata} = take_bound_combinators(choice)
|
|
{bin, _} = compile_bound_bin_pattern(inputs, metadata, quote(do: _))
|
|
head = quote(do: [unquote(bin) = rest]) ++ quote(do: unquote(head))
|
|
guards = guards_list_to_quoted(guards)
|
|
{current, head, guards, success_body}
|
|
end
|
|
|
|
defs = if [] in choices, do: defs, else: defs ++ [{current, args, true, failure_body}]
|
|
{defs, [], next, step, :catch_none}
|
|
else
|
|
compile_unbound_lookahead(combinators, kind, current, step, config)
|
|
end
|
|
end
|
|
|
|
defp compile_unbound_combinator(
|
|
{:traverse, combinators, kind, traversal},
|
|
current,
|
|
step,
|
|
config
|
|
) do
|
|
fun = &traverse(traversal, &1, &2, &3, &4, &5, &6, config)
|
|
config = if kind == :constant, do: put_in(config.replace, true), else: config
|
|
compile_unbound_traverse(combinators, kind, current, step, config, fun)
|
|
end
|
|
|
|
defp compile_unbound_combinator({:times, combinators, count}, current, step, config) do
|
|
if all_no_context_combinators?(combinators) do
|
|
compile_bound_times(combinators, count, current, step, config)
|
|
else
|
|
compile_unbound_times(combinators, count, current, step, config)
|
|
end
|
|
end
|
|
|
|
defp compile_unbound_combinator({:repeat, combinators, while, _gen}, current, step, config) do
|
|
{failure, step} = build_next(step, config)
|
|
config = %{config | catch_all: failure, acc_depth: 0}
|
|
|
|
if all_no_context_combinators?(combinators) do
|
|
compile_bound_repeat(combinators, while, current, failure, step, config)
|
|
else
|
|
compile_unbound_repeat(combinators, while, current, failure, step, config)
|
|
end
|
|
end
|
|
|
|
defp compile_unbound_combinator({:eventually, combinators}, current, step, config) do
|
|
compile_eventually(combinators, current, step, config)
|
|
end
|
|
|
|
defp compile_unbound_combinator({:choice, choices, _} = combinator, current, step, config) do
|
|
config =
|
|
update_in(config.labels, fn
|
|
[] -> [label(combinator)]
|
|
other -> other
|
|
end)
|
|
|
|
if Enum.all?(choices, &all_bound_combinators?/1) do
|
|
compile_bound_choice(choices, current, step, config)
|
|
else
|
|
compile_unbound_choice(choices, current, step, config)
|
|
end
|
|
end
|
|
|
|
## Lookahead
|
|
|
|
defp extract_choices_from_lookahead([{:choice, choices, _}]), do: choices
|
|
defp extract_choices_from_lookahead(other), do: [other]
|
|
|
|
defp compile_unbound_lookahead(combinators, kind, current, step, config) do
|
|
{_, _, _, catch_all} = build_catch_all(kind, current, combinators, config)
|
|
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
|
|
args =
|
|
quote(do: [rest, [], [{rest, acc, context, line, offset} | stack], context, line, offset])
|
|
|
|
body = {next, [], args}
|
|
entry_point = {current, head, true, body}
|
|
|
|
{failure, step} = build_next(step, config)
|
|
config = %{config | catch_all: failure, acc_depth: 0}
|
|
{defs, inline, success, step} = compile(combinators, [entry_point], [], next, step, config)
|
|
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [_, _, [{rest, acc, context, line, offset} | stack], _, _, _])
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
|
|
success_failure =
|
|
if kind == :positive do
|
|
[{success, head, true, body}, {failure, head, true, catch_all}]
|
|
else
|
|
[{failure, head, true, body}, {success, head, true, catch_all}]
|
|
end
|
|
|
|
inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline]
|
|
{Enum.reverse(success_failure ++ defs), inline, next, step, :catch_none}
|
|
end
|
|
|
|
## Traverse
|
|
|
|
defp compile_unbound_traverse([], _kind, current, step, config, fun) do
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
[rest, _, _, context, line, offset] = head
|
|
|
|
body = fun.(next, rest, [], context, line, offset)
|
|
def = {current, head, true, body}
|
|
{[def], [{current, @arity}], next, step, :catch_none}
|
|
end
|
|
|
|
defp compile_unbound_traverse(combinators, kind, current, step, config, fun) do
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
|
|
args =
|
|
if kind == :pre do
|
|
quote(do: [rest, [], [{acc, line, offset} | stack], context, line, offset])
|
|
else
|
|
quote(do: [rest, [], [acc | stack], context, line, offset])
|
|
end
|
|
|
|
body = {next, [], args}
|
|
entry_point = {current, head, true, body}
|
|
|
|
config = update_in(config.acc_depth, &(&1 + 1))
|
|
{defs, inline, last, step} = compile(combinators, [entry_point], [], next, step, config)
|
|
|
|
# Now we need to traverse the accumulator with the user code and
|
|
# concatenate with the previous accumulator at the top of the stack.
|
|
{next, step} = build_next(step, config)
|
|
|
|
{head, {traverse_line, traverse_offset}} =
|
|
if kind == :pre do
|
|
quote do
|
|
{[rest, user_acc, [{acc, stack_line, stack_offset} | stack], context, line, offset],
|
|
{stack_line, stack_offset}}
|
|
end
|
|
else
|
|
quote do
|
|
{[rest, user_acc, [acc | stack], context, line, offset], {line, offset}}
|
|
end
|
|
end
|
|
|
|
[rest, user_acc, _, context | _] = head
|
|
body = fun.(next, rest, user_acc, context, traverse_line, traverse_offset)
|
|
last_def = {last, head, true, body}
|
|
|
|
inline = [{current, @arity}, {last, @arity} | inline]
|
|
{Enum.reverse([last_def | defs]), inline, next, step, :catch_none}
|
|
end
|
|
|
|
defp traverse(_traversal, next, _, user_acc, _, _, _, %{replace: true}) do
|
|
quote do
|
|
_ = unquote(user_acc)
|
|
unquote(next)(rest, acc, stack, context, line, offset)
|
|
end
|
|
end
|
|
|
|
defp traverse(traversal, next, rest, user_acc, context, line, offset, _) do
|
|
case apply_traverse(traversal, rest, user_acc, context, line, offset) do
|
|
{:{}, _, [rest, expanded_acc, context]} ->
|
|
quote do
|
|
_ = unquote(user_acc)
|
|
|
|
unquote(next)(
|
|
unquote(rest),
|
|
unquote(expanded_acc) ++ acc,
|
|
stack,
|
|
unquote(context),
|
|
line,
|
|
offset
|
|
)
|
|
end
|
|
|
|
{:error, reason} ->
|
|
quote do
|
|
{:error, unquote(reason), rest, context, line, offset}
|
|
end
|
|
|
|
quoted ->
|
|
quote generated: true do
|
|
case unquote(quoted) do
|
|
{rest, user_acc, context} when is_list(user_acc) ->
|
|
unquote(next)(rest, user_acc ++ acc, stack, context, line, offset)
|
|
|
|
{:error, reason} ->
|
|
{:error, reason, rest, context, line, offset}
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
defp apply_traverse(mfargs, rest, acc, context, line, offset) do
|
|
apply_traverse(Enum.reverse(mfargs), {:{}, [], [rest, acc, context]}, line, offset)
|
|
end
|
|
|
|
defp apply_traverse([mfargs | tail], {:{}, _, [rest, acc, context]}, line, offset) do
|
|
rest_acc_context = apply_traverse_mfa(mfargs, [rest, acc, context, line, offset], rest)
|
|
apply_traverse(tail, rest_acc_context, line, offset)
|
|
end
|
|
|
|
defp apply_traverse([], rest_acc_context, _line, _offset) do
|
|
rest_acc_context
|
|
end
|
|
|
|
defp apply_traverse(tail, rest_acc_context, line, offset) do
|
|
pattern = quote(do: {rest, acc, context})
|
|
args = [quote(do: rest), quote(do: acc), quote(do: context), line, offset]
|
|
|
|
entries =
|
|
Enum.map(tail, fn mfargs ->
|
|
quote(do: unquote(pattern) <- unquote(apply_traverse_mfa(mfargs, args, quote(do: rest))))
|
|
end)
|
|
|
|
quote do
|
|
with unquote(pattern) <- unquote(rest_acc_context), unquote_splicing(entries) do
|
|
{rest, acc, context}
|
|
end
|
|
end
|
|
end
|
|
|
|
defp apply_traverse_mfa(mfargs, args, rest) do
|
|
case apply_mfa(mfargs, args) do
|
|
{:{}, _, [_, _, _]} = res ->
|
|
res
|
|
|
|
{acc, context} when acc != :error ->
|
|
IO.warn(
|
|
"returning a two-element tuple {acc, context} in pre_traverse/post_traverse is deprecated, " <>
|
|
"please return {rest, acc, context} instead"
|
|
)
|
|
|
|
{:{}, [], [rest, acc, context]}
|
|
|
|
{:error, context} ->
|
|
{:error, context}
|
|
|
|
quoted ->
|
|
# TODO: Deprecate two element tuple return that is not error
|
|
quote generated: true do
|
|
case unquote(quoted) do
|
|
{_, _, _} = res ->
|
|
res
|
|
|
|
{:error, reason} ->
|
|
{:error, reason}
|
|
|
|
{acc, context} ->
|
|
IO.warn(
|
|
"returning a two-element tuple {acc, context} in pre_traverse/post_traverse is deprecated, " <>
|
|
"please return {rest, acc, context} instead"
|
|
)
|
|
|
|
{unquote(rest), acc, context}
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
## Eventually
|
|
|
|
defp compile_eventually(combinators, current, step, config) do
|
|
# First add the initial accumulator to the stack
|
|
{entrypoint, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
args = quote(do: [rest, acc, [acc | stack], context, line, offset])
|
|
body = {entrypoint, [], args}
|
|
current_def = {current, head, true, body}
|
|
|
|
# Now define the failure point which will recur
|
|
{failure, step} = build_next(step, config)
|
|
failure_def = build_eventually_next_def(entrypoint, failure)
|
|
config = update_in(config.acc_depth, &(&1 + 1))
|
|
catch_all_def = build_catch_all(:positive, failure, combinators, config)
|
|
|
|
# And compile down the inner combinators
|
|
config = %{config | catch_all: failure, acc_depth: 0}
|
|
{defs, inline, success, step} = compile(combinators, [], [], entrypoint, step, config)
|
|
|
|
# In the exit remove the accumulator from the stack
|
|
{exitpoint, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, [_ | stack], context, line, offset])
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {exitpoint, [], args}
|
|
success_def = {success, head, true, body}
|
|
|
|
defs = Enum.reverse(defs, [success_def, current_def, failure_def, catch_all_def])
|
|
inline = [{success, @arity}, {current, @arity}, {failure, @arity} | inline]
|
|
{defs, inline, exitpoint, step, :catch_none}
|
|
end
|
|
|
|
defp build_eventually_next_def(entrypoint, failure) do
|
|
head = quote(do: [<<byte, rest::binary>>, _acc, [acc | _] = stack, context, line, offset])
|
|
offset = add_offset(quote(do: offset), 1)
|
|
line = add_line(quote(do: line), offset, quote(do: byte))
|
|
body = {entrypoint, [], quote(do: [rest, acc, stack, context]) ++ [line, offset]}
|
|
{failure, head, true, body}
|
|
end
|
|
|
|
## Repeat
|
|
|
|
defp compile_bound_repeat(combinators, while, current, failure, step, config) do
|
|
{defs, recur, next, step} =
|
|
case apply_mfa(while, quote(do: [rest, context, line, offset])) do
|
|
{:cont, quote(do: context)} ->
|
|
{[], current, current, step}
|
|
|
|
quoted ->
|
|
{next, step} = build_next(step, config)
|
|
head = args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = repeat_while(quoted, next, args, failure, args)
|
|
{[{current, head, true, body}], current, next, step}
|
|
end
|
|
|
|
{defs, inline, success, step} = compile(combinators, defs, [], next, step, config)
|
|
def = build_proxy_to(success, recur, 0)
|
|
{Enum.reverse([def | defs]), [{success, @arity} | inline], failure, step, :catch_none}
|
|
end
|
|
|
|
defp compile_unbound_repeat(combinators, while, current, failure, step, config) do
|
|
{recur, step} = build_next(step, config)
|
|
{defs, inline, success, step} = compile(combinators, [], [], recur, step, config)
|
|
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [_, _, [{rest, acc, context, line, offset} | stack], _, _, _])
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
failure_def = {failure, head, true, body}
|
|
|
|
while = apply_mfa(while, quote(do: [rest, context, line, offset]))
|
|
cont = quote(do: {rest, acc, context, line, offset})
|
|
|
|
head =
|
|
quote do
|
|
[inner_rest, inner_acc, [unquote(cont) | stack], inner_context, inner_line, inner_offset]
|
|
end
|
|
|
|
cont = quote(do: {inner_rest, inner_acc ++ acc, inner_context, inner_line, inner_offset})
|
|
|
|
true_args =
|
|
quote do
|
|
[inner_rest, [], [unquote(cont) | stack], inner_context, inner_line, inner_offset]
|
|
end
|
|
|
|
false_args = quote(do: [rest, acc, stack, context, line, offset])
|
|
|
|
# We need to do this dance because of unused variables
|
|
body =
|
|
case compile_time_repeat_while(while) do
|
|
:cont ->
|
|
quote do
|
|
_ = {rest, acc, context, line, offset}
|
|
unquote({recur, [], true_args})
|
|
end
|
|
|
|
:halt ->
|
|
quote do
|
|
_ = {inner_rest, inner_acc, inner_context, inner_line, inner_offset}
|
|
unquote({next, [], false_args})
|
|
end
|
|
|
|
:none ->
|
|
repeat_while(while, recur, true_args, next, false_args)
|
|
end
|
|
|
|
success_def = {success, head, true, body}
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
|
|
true_args =
|
|
quote do
|
|
[rest, [], [{rest, acc, context, line, offset} | stack], context, line, offset]
|
|
end
|
|
|
|
false_args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = repeat_while(while, recur, true_args, next, false_args)
|
|
current_def = {current, head, true, body}
|
|
|
|
defs = [current_def | Enum.reverse([success_def, failure_def | defs])]
|
|
inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline]
|
|
{defs, inline, next, step, :catch_none}
|
|
end
|
|
|
|
defp compile_time_repeat_while({:cont, {:context, _, __MODULE__}}), do: :cont
|
|
defp compile_time_repeat_while({:halt, {:context, _, __MODULE__}}), do: :halt
|
|
defp compile_time_repeat_while(_), do: :none
|
|
|
|
defp repeat_while(quoted, true_name, true_args, false_name, false_args) do
|
|
case compile_time_repeat_while(quoted) do
|
|
:cont ->
|
|
{true_name, [], true_args}
|
|
|
|
:halt ->
|
|
{false_name, [], false_args}
|
|
|
|
:none ->
|
|
quote do
|
|
case unquote(quoted) do
|
|
{:cont, unquote(Enum.at(true_args, 3))} -> unquote({true_name, [], true_args})
|
|
{:halt, unquote(Enum.at(false_args, 3))} -> unquote({false_name, [], false_args})
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
## Repeat up to
|
|
|
|
defp compile_bound_times(combinators, count, current, step, config) do
|
|
{failure, step} = build_next(step, config)
|
|
{recur, step} = build_next(step, config)
|
|
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
args = quote(do: [rest, acc, [unquote(count) | stack], context, line, offset])
|
|
body = {recur, [], args}
|
|
current_def = {current, head, true, body}
|
|
|
|
config = %{config | catch_all: failure, acc_depth: 0}
|
|
{defs, inline, success, step} = compile(combinators, [current_def], [], recur, step, config)
|
|
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [rest, acc, [1 | stack], context, line, offset])
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
success_def0 = {success, head, true, body}
|
|
|
|
head = quote(do: [rest, acc, [count | stack], context, line, offset])
|
|
args = quote(do: [rest, acc, [count - 1 | stack], context, line, offset])
|
|
body = {recur, [], args}
|
|
success_def1 = {success, head, true, body}
|
|
|
|
head = quote(do: [rest, acc, [_ | stack], context, line, offset])
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
failure_def = {failure, head, true, body}
|
|
|
|
defs = Enum.reverse([success_def1, success_def0, failure_def | defs])
|
|
inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline]
|
|
{defs, inline, next, step, :catch_none}
|
|
end
|
|
|
|
defp compile_unbound_times(combinators, count, current, step, config) do
|
|
{failure, step} = build_next(step, config)
|
|
{recur, step} = build_next(step, config)
|
|
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
cont = quote(do: {unquote(count), rest, acc, context, line, offset})
|
|
args = quote(do: [rest, [], [unquote(cont) | stack], context, line, offset])
|
|
body = {recur, [], args}
|
|
current_def = {current, head, true, body}
|
|
|
|
config = %{config | catch_all: failure, acc_depth: 0}
|
|
{defs, inline, success, step} = compile(combinators, [current_def], [], recur, step, config)
|
|
|
|
{next, step} = build_next(step, config)
|
|
head = quote(do: [rest, user_acc, [{1, _, acc, _, _, _} | stack], context, line, offset])
|
|
args = quote(do: [rest, user_acc ++ acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
success_def0 = {success, head, true, body}
|
|
|
|
head = quote(do: [rest, user_acc, [{count, _, acc, _, _, _} | stack], context, line, offset])
|
|
cont = quote(do: {count - 1, rest, user_acc ++ acc, context, line, offset})
|
|
args = quote(do: [rest, [], [unquote(cont) | stack], context, line, offset])
|
|
body = {recur, [], args}
|
|
success_def1 = {success, head, true, body}
|
|
|
|
head = quote(do: [_, _, [{_, rest, acc, context, line, offset} | stack], _, _, _])
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
failure_def = {failure, head, true, body}
|
|
|
|
defs = Enum.reverse([success_def1, success_def0, failure_def | defs])
|
|
inline = [{current, @arity}, {success, @arity}, {failure, @arity} | inline]
|
|
{defs, inline, next, step, :catch_none}
|
|
end
|
|
|
|
## Choice
|
|
|
|
defp compile_bound_choice(choices, current, step, config) do
|
|
{next_name, next_step} = build_next(step, config)
|
|
|
|
defs =
|
|
for choice <- choices do
|
|
{[], inputs, guards, outputs, _, metadata} = take_bound_combinators(choice)
|
|
|
|
{[def], [], ^next_name, ^next_step, _} =
|
|
compile_bound_combinator(inputs, guards, outputs, metadata, current, step, config)
|
|
|
|
def
|
|
end
|
|
|
|
catch_all = if [] in choices, do: :catch_none, else: :catch_all
|
|
{defs, [], next_name, next_step, catch_all}
|
|
end
|
|
|
|
defp compile_unbound_choice(choices, current, step, config) do
|
|
{done, step} = build_next(step, config)
|
|
|
|
# We process choices in reverse order. The last order does not
|
|
# have any fallback besides the requirement to drop the stack
|
|
# this allows us to compose with repeat and traverse.
|
|
config = update_in(config.acc_depth, &(&1 + 2))
|
|
|
|
{first, defs, inline, step} =
|
|
compile_unbound_choice(Enum.reverse(choices), [], [], :unused, step, done, config)
|
|
|
|
head = quote(do: [rest, acc, stack, context, line, offset])
|
|
cont = quote(do: {rest, context, line, offset})
|
|
args = quote(do: [rest, [], [unquote(cont), acc | stack], context, line, offset])
|
|
body = {first, [], args}
|
|
def = {current, head, true, body}
|
|
|
|
{[def | Enum.reverse(defs)], [{current, @arity} | inline], done, step, :catch_none}
|
|
end
|
|
|
|
defp compile_unbound_choice([], defs, inline, previous, step, _success, _config) do
|
|
# Discard the last failure definition that won't be used.
|
|
{previous, tl(defs), tl(inline), step - 1}
|
|
end
|
|
|
|
defp compile_unbound_choice([choice | choices], defs, inline, _previous, step, done, config) do
|
|
{current, step} = build_next(step, config)
|
|
{defs, inline, success, step} = compile(choice, defs, inline, current, step, config)
|
|
|
|
head = quote(do: [rest, acc, [_, previous_acc | stack], context, line, offset])
|
|
args = quote(do: [rest, acc ++ previous_acc, stack, context, line, offset])
|
|
body = {done, [], args}
|
|
success_def = {success, head, true, body}
|
|
|
|
{failure, step} = build_next(step, config)
|
|
head = quote(do: [_, _, [{rest, context, line, offset} | _] = stack, _, _, _])
|
|
args = quote(do: [rest, [], stack, context, line, offset])
|
|
body = {current, [], args}
|
|
failure_def = {failure, head, true, body}
|
|
|
|
defs = [failure_def, success_def | defs]
|
|
inline = [{failure, @arity}, {success, @arity} | inline]
|
|
config = %{config | catch_all: failure, acc_depth: 0}
|
|
compile_unbound_choice(choices, defs, inline, current, step, done, config)
|
|
end
|
|
|
|
## No context combinators
|
|
|
|
# If a combinator does not need a context, i.e. it cannot abort
|
|
# in the middle, then we can compile to an optimized version of
|
|
# repeat and times.
|
|
#
|
|
# For example, a lookahead at the beginning doesn't need a context.
|
|
# A choice that is bound doesn't need one either.
|
|
defp all_no_context_combinators?([{:lookahead, look_combinators, _} | combinators]) do
|
|
all_bound_combinators?(look_combinators) and
|
|
all_no_context_combinators_next?(combinators)
|
|
end
|
|
|
|
defp all_no_context_combinators?(combinators) do
|
|
all_no_context_combinators_next?(combinators)
|
|
end
|
|
|
|
defp all_no_context_combinators_next?([{:choice, choice_combinators, _} | combinators]) do
|
|
all_bound_combinators?(choice_combinators) and
|
|
all_no_context_combinators_next?(combinators)
|
|
end
|
|
|
|
defp all_no_context_combinators_next?(combinators) do
|
|
all_bound_combinators?(combinators)
|
|
end
|
|
|
|
## Bound combinators
|
|
|
|
# A bound combinator is a combinator where the number of inputs, guards,
|
|
# outputs, line and offset shifts are known at compilation time. We inline
|
|
# those bound combinators into a single bitstring pattern for performance.
|
|
# Currently error reporting will accuse the beginning of the bound combinator
|
|
# in case of errors but such can be addressed if desired.
|
|
|
|
defp compile_bound_combinator(inputs, guards, outputs, metadata, current, step, config) do
|
|
%{line: line, offset: offset} = metadata
|
|
{next, step} = build_next(step, config)
|
|
{bin, rest} = compile_bound_bin_pattern(inputs, metadata, quote(do: rest))
|
|
|
|
acc = if config.replace, do: quote(do: acc), else: quote(do: unquote(outputs) ++ acc)
|
|
|
|
args =
|
|
quote(do: [unquote(rest), unquote(acc), stack, context, unquote(line), unquote(offset)])
|
|
|
|
head = quote(do: [unquote(bin), acc, stack, context, comb__line, comb__offset])
|
|
body = {next, [], args}
|
|
|
|
guards = guards_list_to_quoted(guards)
|
|
def = {current, head, guards, body}
|
|
{[def], [], next, step, :catch_all}
|
|
end
|
|
|
|
defp compile_bound_bin_pattern(inputs, %{eos: eos?}, var) do
|
|
rest = if eos?, do: "", else: var
|
|
bin = {:<<>>, [], inputs ++ [quote(do: unquote(rest) :: binary)]}
|
|
{bin, rest}
|
|
end
|
|
|
|
defp all_bound_combinators?(combinators) do
|
|
match?({[], _, _, _, _, _}, take_bound_combinators(combinators))
|
|
end
|
|
|
|
defp take_bound_combinators(combinators) do
|
|
{line, offset} = line_offset_pair()
|
|
metadata = %{eos: false, line: line, offset: offset, counter: 0}
|
|
take_bound_combinators(combinators, [], [], [], [], metadata)
|
|
end
|
|
|
|
defp take_bound_combinators([:eos | combinators], inputs, guards, outputs, acc, metadata) do
|
|
combinators = Enum.drop_while(combinators, &(&1 == :eos))
|
|
{combinators, inputs, guards, outputs, [:eos | acc], %{metadata | eos: true}}
|
|
end
|
|
|
|
defp take_bound_combinators(combinators, inputs, guards, outputs, acc, metadata) do
|
|
with [combinator | combinators] <- combinators,
|
|
{:ok, new_inputs, new_guards, new_outputs, metadata} <-
|
|
bound_combinator(combinator, metadata) do
|
|
take_bound_combinators(
|
|
combinators,
|
|
inputs ++ new_inputs,
|
|
guards ++ new_guards,
|
|
merge_output(new_outputs, outputs),
|
|
[combinator | acc],
|
|
metadata
|
|
)
|
|
else
|
|
_ ->
|
|
{combinators, inputs, guards, outputs, acc, metadata}
|
|
end
|
|
end
|
|
|
|
defp merge_output(left, right) when is_list(left) and is_list(right), do: left ++ right
|
|
defp merge_output(left, right), do: quote(do: unquote(left) ++ unquote(right))
|
|
|
|
defp bound_combinator({:string, string}, %{line: line, offset: offset} = metadata) do
|
|
size = byte_size(string)
|
|
|
|
line =
|
|
case String.split(string, "\n") do
|
|
[_] ->
|
|
line
|
|
|
|
[_ | _] = many ->
|
|
last_size = many |> List.last() |> byte_size()
|
|
line_offset = add_offset(offset, size - last_size)
|
|
|
|
quote do
|
|
{elem(unquote(line), 0) + unquote(length(many) - 1), unquote(line_offset)}
|
|
end
|
|
end
|
|
|
|
offset = add_offset(offset, size)
|
|
{:ok, [string], [], [string], %{metadata | line: line, offset: offset}}
|
|
end
|
|
|
|
defp bound_combinator({:bin_segment, inclusive, exclusive, modifier}, metadata) do
|
|
%{line: line, offset: offset, counter: counter} = metadata
|
|
|
|
{var, counter} = build_var(counter)
|
|
input = apply_bin_modifier(var, modifier)
|
|
guards = compile_bin_ranges(var, inclusive, exclusive)
|
|
|
|
offset =
|
|
if modifier == :integer do
|
|
add_offset(offset, 1)
|
|
else
|
|
add_offset(offset, quote(do: byte_size(<<unquote(input)>>)))
|
|
end
|
|
|
|
line =
|
|
if newline_allowed?(inclusive) and not newline_forbidden?(exclusive) do
|
|
add_line(line, offset, var)
|
|
else
|
|
line
|
|
end
|
|
|
|
metadata = %{metadata | line: line, offset: offset, counter: counter}
|
|
{:ok, [input], guards, [var], metadata}
|
|
end
|
|
|
|
defp bound_combinator({:label, combinators, _labels}, metadata) do
|
|
case take_bound_combinators(combinators, [], [], [], [], metadata) do
|
|
{[], inputs, guards, outputs, _, metadata} ->
|
|
{:ok, inputs, guards, outputs, metadata}
|
|
|
|
{_, _, _, _, _, _} ->
|
|
:error
|
|
end
|
|
end
|
|
|
|
defp bound_combinator({:traverse, combinators, kind, mfargs}, pre_metadata) do
|
|
case take_bound_combinators(combinators, [], [], [], [], pre_metadata) do
|
|
{[], inputs, guards, outputs, _, post_metadata} ->
|
|
{rest, context} = quote(do: {rest, context})
|
|
{traverse_line, traverse_offset} = pre_post_traverse(kind, pre_metadata, post_metadata)
|
|
|
|
case apply_traverse(mfargs, rest, outputs, context, traverse_line, traverse_offset) do
|
|
{:{}, _, [^rest, outputs, ^context]} when outputs != :error ->
|
|
{:ok, inputs, guards, outputs, post_metadata}
|
|
|
|
_ ->
|
|
:error
|
|
end
|
|
|
|
{_, _, _, _, _, _} ->
|
|
:error
|
|
end
|
|
end
|
|
|
|
defp bound_combinator({:bytes, count}, metadata) do
|
|
%{counter: counter, offset: offset} = metadata
|
|
{var, counter} = build_var(counter)
|
|
input = quote do: unquote(var) :: binary - size(unquote(count))
|
|
offset = add_offset(offset, count)
|
|
metadata = %{metadata | counter: counter, offset: offset}
|
|
{:ok, [input], [], [var], metadata}
|
|
end
|
|
|
|
defp bound_combinator(_, _) do
|
|
:error
|
|
end
|
|
|
|
## Line and offset handling
|
|
|
|
# For pre traversal returns the AST before, otherwise the AST after
|
|
# for post. For constant, line/offset are never used.
|
|
defp pre_post_traverse(:pre, %{line: line, offset: offset}, _), do: {line, offset}
|
|
defp pre_post_traverse(_, _, %{line: line, offset: offset}), do: {line, offset}
|
|
|
|
defp line_offset_pair() do
|
|
quote(do: {comb__line, comb__offset})
|
|
end
|
|
|
|
defp add_offset({:+, _, [var, current]}, extra)
|
|
when is_integer(current) and is_integer(extra) do
|
|
{:+, [], [var, current + extra]}
|
|
end
|
|
|
|
defp add_offset(var, extra) do
|
|
{:+, [], [var, extra]}
|
|
end
|
|
|
|
defp newline_allowed?([]), do: true
|
|
|
|
defp newline_allowed?(ors) do
|
|
Enum.any?(ors, fn
|
|
_.._//_ = range -> ?\n in range
|
|
codepoint -> ?\n === codepoint
|
|
end)
|
|
end
|
|
|
|
defp newline_forbidden?([]), do: false
|
|
|
|
defp newline_forbidden?(ands) do
|
|
Enum.any?(ands, fn
|
|
{:not, _.._//_ = range} -> ?\n in range
|
|
{:not, codepoint} -> ?\n === codepoint
|
|
end)
|
|
end
|
|
|
|
defp add_line(line, offset, var) do
|
|
quote do
|
|
line = unquote(line)
|
|
|
|
case unquote(var) do
|
|
?\n -> {elem(line, 0) + 1, unquote(offset)}
|
|
_ -> line
|
|
end
|
|
end
|
|
end
|
|
|
|
## Label
|
|
|
|
defp labels([]) do
|
|
"nothing"
|
|
end
|
|
|
|
defp labels(combinators) do
|
|
Enum.map_join(combinators, ", followed by ", &label/1)
|
|
end
|
|
|
|
defp label({:string, binary}) do
|
|
"string #{inspect(binary)}"
|
|
end
|
|
|
|
defp label({:label, _combinator, label}) do
|
|
label
|
|
end
|
|
|
|
defp label({:bin_segment, inclusive, exclusive, modifier}) do
|
|
{inclusive, printable?} = Enum.map_reduce(inclusive, true, &inspect_bin_range(&1, &2))
|
|
|
|
{exclusive, printable?} =
|
|
Enum.map_reduce(exclusive, printable?, &inspect_bin_range(elem(&1, 1), &2))
|
|
|
|
prefix =
|
|
cond do
|
|
modifier == :integer and not printable? -> "byte"
|
|
modifier == :integer -> "ASCII character"
|
|
modifier == :utf8 -> "utf8 codepoint"
|
|
modifier == :utf16 -> "utf16 codepoint"
|
|
modifier == :utf32 -> "utf32 codepoint"
|
|
end
|
|
|
|
prefix <> Enum.join([Enum.join(inclusive, " or") | exclusive], ", and not")
|
|
end
|
|
|
|
defp label(:eos) do
|
|
"end of string"
|
|
end
|
|
|
|
defp label({:lookahead, combinators, _}) do
|
|
labels(combinators)
|
|
end
|
|
|
|
defp label({:repeat, combinators, _, _}) do
|
|
labels(combinators)
|
|
end
|
|
|
|
defp label({:eventually, combinators}) do
|
|
labels(combinators) <> " eventually"
|
|
end
|
|
|
|
defp label({:times, combinators, _}) do
|
|
labels(combinators)
|
|
end
|
|
|
|
defp label({:choice, choices, _}) do
|
|
Enum.map_join(choices, " or ", &labels/1)
|
|
end
|
|
|
|
defp label({:traverse, combinators, _, _}) do
|
|
labels(combinators)
|
|
end
|
|
|
|
defp label({:parsec, {_module, function}}) do
|
|
Atom.to_string(function)
|
|
end
|
|
|
|
defp label({:parsec, name}) do
|
|
Atom.to_string(name)
|
|
end
|
|
|
|
defp label({:bytes, count}) do
|
|
"#{inspect(count)} bytes"
|
|
end
|
|
|
|
## Bin segments
|
|
|
|
defp compile_bin_ranges(var, ors, ands) do
|
|
ands = Enum.map(ands, &bin_range_to_guard(var, &1))
|
|
|
|
if ors == [] do
|
|
ands
|
|
else
|
|
ors =
|
|
ors
|
|
|> Enum.map(&bin_range_to_guard(var, &1))
|
|
|> Enum.reduce(&{:or, [], [&2, &1]})
|
|
|
|
[ors | ands]
|
|
end
|
|
end
|
|
|
|
defp bin_range_to_guard(var, range) do
|
|
case range do
|
|
min..min//step when abs(step) == 1 ->
|
|
quote(do: unquote(var) === unquote(min))
|
|
|
|
min..max//1 ->
|
|
quote(do: unquote(var) >= unquote(min) and unquote(var) <= unquote(max))
|
|
|
|
min..max//-1 ->
|
|
quote(do: unquote(var) >= unquote(max) and unquote(var) <= unquote(min))
|
|
|
|
min when is_integer(min) ->
|
|
quote(do: unquote(var) === unquote(min))
|
|
|
|
{:not, min..min//step} when abs(step) == 1 ->
|
|
quote(do: unquote(var) !== unquote(min))
|
|
|
|
{:not, min..max//1} ->
|
|
quote(do: unquote(var) < unquote(min) or unquote(var) > unquote(max))
|
|
|
|
{:not, min..max//-1} ->
|
|
quote(do: unquote(var) < unquote(max) or unquote(var) > unquote(min))
|
|
|
|
{:not, min} when is_integer(min) ->
|
|
quote(do: unquote(var) !== unquote(min))
|
|
end
|
|
end
|
|
|
|
defp inspect_bin_range(min..max//_, printable?) do
|
|
{" in the range #{inspect_char(min)} to #{inspect_char(max)}",
|
|
printable? and printable?(min) and printable?(max)}
|
|
end
|
|
|
|
defp inspect_bin_range(min, printable?) do
|
|
{" equal to #{inspect_char(min)}", printable? and printable?(min)}
|
|
end
|
|
|
|
defp printable?(codepoint), do: List.ascii_printable?([codepoint])
|
|
defp inspect_char(codepoint), do: inspect(<<codepoint::utf8>>)
|
|
|
|
defp apply_bin_modifier(expr, :integer), do: expr
|
|
|
|
defp apply_bin_modifier(expr, modifier) do
|
|
{:"::", [], [expr, Macro.var(modifier, __MODULE__)]}
|
|
end
|
|
|
|
## Helpers
|
|
|
|
defp apply_mfa({mod, fun, args}, extra) do
|
|
apply(mod, fun, extra ++ args)
|
|
end
|
|
|
|
defp guards_list_to_quoted([]), do: true
|
|
defp guards_list_to_quoted(guards), do: Enum.reduce(guards, &{:and, [], [&2, &1]})
|
|
|
|
defp build_var(counter) do
|
|
{{:"x#{counter}", [], __MODULE__}, counter + 1}
|
|
end
|
|
|
|
defp build_next(step, %{name: name}) do
|
|
{:"#{name}__#{step}", step + 1}
|
|
end
|
|
|
|
defp build_ok(current) do
|
|
head = quote(do: [rest, acc, _stack, context, line, offset])
|
|
body = quote(do: {:ok, acc, rest, context, line, offset})
|
|
{current, head, true, body}
|
|
end
|
|
|
|
defp build_catch_all(kind, name, combinators, %{catch_all: nil, labels: labels}) do
|
|
reason = error_reason(combinators, labels)
|
|
reason = if kind == :positive, do: "expected " <> reason, else: "did not expect " <> reason
|
|
args = quote(do: [rest, _acc, _stack, context, line, offset])
|
|
body = quote(do: {:error, unquote(reason), rest, context, line, offset})
|
|
{name, args, true, body}
|
|
end
|
|
|
|
defp build_catch_all(_kind, name, _combinators, %{catch_all: next, acc_depth: n}) do
|
|
build_proxy_to(name, next, n)
|
|
end
|
|
|
|
defp build_acc_depth(1, acc, stack), do: [{:|, [], [acc, stack]}]
|
|
defp build_acc_depth(n, acc, stack), do: [quote(do: _) | build_acc_depth(n - 1, acc, stack)]
|
|
|
|
defp build_proxy_to(name, next, 0) do
|
|
args = quote(do: [rest, acc, stack, context, line, offset])
|
|
body = {next, [], args}
|
|
{name, args, true, body}
|
|
end
|
|
|
|
defp build_proxy_to(name, next, n) do
|
|
args = quote(do: [rest, _acc, stack, context, line, offset])
|
|
{acc, stack} = quote(do: {acc, stack})
|
|
|
|
body =
|
|
quote do
|
|
unquote(build_acc_depth(n, acc, stack)) = stack
|
|
unquote(next)(rest, acc, stack, context, line, offset)
|
|
end
|
|
|
|
{name, args, true, body}
|
|
end
|
|
|
|
defp error_reason(combinators, []) do
|
|
labels(combinators)
|
|
end
|
|
|
|
defp error_reason(_combinators, [head]) do
|
|
head
|
|
end
|
|
|
|
defp error_reason(_combinators, [head | tail]) do
|
|
"#{head} while processing #{Enum.join(tail, " inside ")}"
|
|
end
|
|
end
|